LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(2));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
325
326 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327 setOperationAction(ISD::LOAD, MVT::i64, Custom);
328 setOperationAction(ISD::STORE, MVT::i64, Custom);
329 }
330
331 if (Subtarget.is64Bit()) {
333
334 setOperationAction(ISD::LOAD, MVT::i32, Custom);
336 MVT::i32, Custom);
338 if (!Subtarget.hasStdExtZbb())
341 Custom);
343 }
344 if (!Subtarget.hasStdExtZmmul()) {
346 } else if (Subtarget.is64Bit()) {
349 } else {
351 }
352
353 if (!Subtarget.hasStdExtM()) {
355 Expand);
356 } else if (Subtarget.is64Bit()) {
358 {MVT::i8, MVT::i16, MVT::i32}, Custom);
359 }
360
363 Expand);
364
366 Custom);
367
368 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
369 if (Subtarget.is64Bit())
371 } else if (Subtarget.hasVendorXTHeadBb()) {
372 if (Subtarget.is64Bit())
375 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
377 } else {
379 }
380
382 Subtarget.hasREV8Like() ? Legal : Expand);
383
384 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
385 !Subtarget.is64Bit()) {
387 } else {
388 // Zbkb can use rev8+brev8 to implement bitreverse.
390 Subtarget.hasStdExtZbkb() ? Custom : Expand);
391 if (Subtarget.hasStdExtZbkb())
393 }
394
395 if (Subtarget.hasStdExtZbb() ||
396 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
398 Legal);
399 }
400
401 if (Subtarget.hasCTZLike()) {
402 if (Subtarget.is64Bit())
404 } else {
406 }
407
408 if (!Subtarget.hasCPOPLike()) {
409 // TODO: These should be set to LibCall, but this currently breaks
410 // the Linux kernel build. See #101786. Lacks i128 tests, too.
411 if (Subtarget.is64Bit())
413 else
416 }
417
418 if (Subtarget.hasCLZLike()) {
419 // We need the custom lowering to make sure that the resulting sequence
420 // for the 32bit case is efficient on 64bit targets.
421 // Use default promotion for i32 without Zbb.
422 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
424 } else {
426 }
427
428 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
430 } else if (Subtarget.hasShortForwardBranchOpt()) {
431 // We can use PseudoCCSUB to implement ABS.
433 } else if (Subtarget.is64Bit()) {
435 }
436
437 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
439
440 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
447 }
448
449 static const unsigned FPLegalNodeTypes[] = {
450 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
451 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
452 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
457
458 static const ISD::CondCode FPCCToExpand[] = {
462
463 static const unsigned FPOpToExpand[] = {
464 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
465 ISD::FREM};
466
467 static const unsigned FPRndMode[] = {
468 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
469 ISD::FROUNDEVEN};
470
471 static const unsigned ZfhminZfbfminPromoteOps[] = {
472 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
473 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
478 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
479 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
480 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
481
482 if (Subtarget.hasStdExtZfbfmin()) {
483 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
487 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
490 setOperationAction(ISD::FABS, MVT::bf16, Custom);
491 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
495 }
496
497 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
498 if (Subtarget.hasStdExtZfhOrZhinx()) {
499 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
500 setOperationAction(FPRndMode, MVT::f16,
501 Subtarget.hasStdExtZfa() ? Legal : Custom);
503 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
505 if (Subtarget.hasStdExtZfa())
507 } else {
508 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
509 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
510 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
513 setOperationAction(Op, MVT::f16, Custom);
514 setOperationAction(ISD::FABS, MVT::f16, Custom);
515 setOperationAction(ISD::FNEG, MVT::f16, Custom);
519 }
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522
525 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
528 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
529
531 ISD::FNEARBYINT, MVT::f16,
532 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
533 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
534 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
535 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
536 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
537 MVT::f16, Promote);
538
539 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
540 // complete support for all operations in LegalizeDAG.
545 MVT::f16, Promote);
546
547 // We need to custom promote this.
548 if (Subtarget.is64Bit())
549 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
550 }
551
552 if (Subtarget.hasStdExtFOrZfinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
554 setOperationAction(FPRndMode, MVT::f32,
555 Subtarget.hasStdExtZfa() ? Legal : Custom);
556 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
559 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
560 setOperationAction(FPOpToExpand, MVT::f32, Expand);
561 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
562 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
563 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
564 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
566 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
567 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
568 Subtarget.isSoftFPABI() ? LibCall : Custom);
569 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
570 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
571 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
572 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
573
574 if (Subtarget.hasStdExtZfa()) {
576 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
577 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
578 } else {
579 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
580 }
581 }
582
583 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
584 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
585
586 if (Subtarget.hasStdExtDOrZdinx()) {
587 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
588
589 if (!Subtarget.is64Bit())
590 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
591
592 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
593 !Subtarget.is64Bit()) {
594 setOperationAction(ISD::LOAD, MVT::f64, Custom);
595 setOperationAction(ISD::STORE, MVT::f64, Custom);
596 }
597
598 if (Subtarget.hasStdExtZfa()) {
600 setOperationAction(FPRndMode, MVT::f64, Legal);
601 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
602 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
603 } else {
604 if (Subtarget.is64Bit())
605 setOperationAction(FPRndMode, MVT::f64, Custom);
606
607 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
608 }
609
612 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
615 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
616 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
617 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
618 setOperationAction(FPOpToExpand, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
621 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
622 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
624 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
625 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
626 Subtarget.isSoftFPABI() ? LibCall : Custom);
627 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
628 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
629 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
630 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
631 }
632
633 if (Subtarget.is64Bit()) {
636 MVT::i32, Custom);
637 setOperationAction(ISD::LROUND, MVT::i32, Custom);
638 }
639
640 if (Subtarget.hasStdExtFOrZfinx()) {
642 Custom);
643
644 // f16/bf16 require custom handling.
646 Custom);
648 Custom);
649
651 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
652 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
653 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
654 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
655 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
656 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
657 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
658 }
659
662 XLenVT, Custom);
663
665
666 if (Subtarget.is64Bit())
668
669 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
670 // Unfortunately this can't be determined just from the ISA naming string.
671 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
672 Subtarget.is64Bit() ? Legal : Custom);
673 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
674 Subtarget.is64Bit() ? Legal : Custom);
675
676 if (Subtarget.is64Bit()) {
677 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
678 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
679 }
680
681 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
683 if (Subtarget.is64Bit())
685
686 if (Subtarget.hasVendorXMIPSCBOP())
687 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
688 else if (Subtarget.hasStdExtZicbop())
689 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
690
691 if (Subtarget.hasStdExtA()) {
692 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
693 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
695 else
697 } else if (Subtarget.hasForcedAtomics()) {
698 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
699 } else {
701 }
702
703 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
704
706
707 if (getTargetMachine().getTargetTriple().isOSLinux()) {
708 // Custom lowering of llvm.clear_cache.
710 }
711
712 if (Subtarget.hasVInstructions()) {
714
715 setOperationAction(ISD::VSCALE, XLenVT, Custom);
716
717 // RVV intrinsics may have illegal operands.
718 // We also need to custom legalize vmv.x.s.
721 {MVT::i8, MVT::i16}, Custom);
722 if (Subtarget.is64Bit())
724 MVT::i32, Custom);
725 else
727 MVT::i64, Custom);
728
730 MVT::Other, Custom);
731
732 static const unsigned IntegerVPOps[] = {
733 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
734 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
735 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
736 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
737 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
738 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
739 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
740 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
741 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
742 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
743 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
744 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
745 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
746 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
747 ISD::EXPERIMENTAL_VP_SPLAT};
748
749 static const unsigned FloatingPointVPOps[] = {
750 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
751 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
752 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
753 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
754 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
755 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
756 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
757 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
758 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
759 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
760 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
761 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
762 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
763
764 static const unsigned IntegerVecReduceOps[] = {
765 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
766 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
767 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
768
769 static const unsigned FloatingPointVecReduceOps[] = {
770 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
771 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
772
773 static const unsigned FloatingPointLibCallOps[] = {
774 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
775 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
776
777 if (!Subtarget.is64Bit()) {
778 // We must custom-lower certain vXi64 operations on RV32 due to the vector
779 // element type being illegal.
781 MVT::i64, Custom);
782
783 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
784
785 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
786 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
787 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
788 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
789 MVT::i64, Custom);
790 }
791
792 for (MVT VT : BoolVecVTs) {
793 if (!isTypeLegal(VT))
794 continue;
795
797
798 // Mask VTs are custom-expanded into a series of standard nodes
802 VT, Custom);
803
805 Custom);
806
808 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
809 Expand);
810 setOperationAction(ISD::VP_MERGE, VT, Custom);
811
812 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
813 Custom);
814
815 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
816
818 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
819 Custom);
820
822 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
823 Custom);
824
825 // RVV has native int->float & float->int conversions where the
826 // element type sizes are within one power-of-two of each other. Any
827 // wider distances between type sizes have to be lowered as sequences
828 // which progressively narrow the gap in stages.
833 VT, Custom);
835 Custom);
836
837 // Expand all extending loads to types larger than this, and truncating
838 // stores from types larger than this.
840 setTruncStoreAction(VT, OtherVT, Expand);
842 OtherVT, Expand);
843 }
844
845 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
846 ISD::VP_TRUNCATE, ISD::VP_SETCC},
847 VT, Custom);
848
851
853
854 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
855 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
856 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
857
860 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
861 }
862
863 for (MVT VT : IntVecVTs) {
864 if (!isTypeLegal(VT))
865 continue;
866
869
870 // Vectors implement MULHS/MULHU.
872
873 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
874 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
876
878 Legal);
879
881
882 // Custom-lower extensions and truncations from/to mask types.
884 VT, Custom);
885
886 // RVV has native int->float & float->int conversions where the
887 // element type sizes are within one power-of-two of each other. Any
888 // wider distances between type sizes have to be lowered as sequences
889 // which progressively narrow the gap in stages.
894 VT, Custom);
896 Custom);
900 VT, Legal);
901
902 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
903 // nodes which truncate by one power of two at a time.
906 Custom);
907
908 // Custom-lower insert/extract operations to simplify patterns.
910 Custom);
911
912 // Custom-lower reduction operations to set up the corresponding custom
913 // nodes' operands.
914 setOperationAction(IntegerVecReduceOps, VT, Custom);
915
916 setOperationAction(IntegerVPOps, VT, Custom);
917
918 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
919
920 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
921 VT, Custom);
922
924 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
925 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
926 VT, Custom);
927 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
928
931 VT, Custom);
932
935
937
939 setTruncStoreAction(VT, OtherVT, Expand);
941 OtherVT, Expand);
942 }
943
946
947 // Splice
949
950 if (Subtarget.hasStdExtZvkb()) {
952 setOperationAction(ISD::VP_BSWAP, VT, Custom);
953 } else {
954 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
956 }
957
958 if (Subtarget.hasStdExtZvbb()) {
960 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
961 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
962 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
963 VT, Custom);
964 } else {
965 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
967 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
968 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
969 VT, Expand);
970
971 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
972 // range of f32.
973 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
974 if (isTypeLegal(FloatVT)) {
976 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
977 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
978 VT, Custom);
979 }
980 }
981
983 }
984
985 for (MVT VT : VecTupleVTs) {
986 if (!isTypeLegal(VT))
987 continue;
988
989 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
990 }
991
992 // Expand various CCs to best match the RVV ISA, which natively supports UNE
993 // but no other unordered comparisons, and supports all ordered comparisons
994 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
995 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
996 // and we pattern-match those back to the "original", swapping operands once
997 // more. This way we catch both operations and both "vf" and "fv" forms with
998 // fewer patterns.
999 static const ISD::CondCode VFPCCToExpand[] = {
1003 };
1004
1005 // TODO: support more ops.
1006 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1007 ISD::FMINNUM,
1008 ISD::FMAXNUM,
1009 ISD::FMINIMUMNUM,
1010 ISD::FMAXIMUMNUM,
1011 ISD::FADD,
1012 ISD::FSUB,
1013 ISD::FMUL,
1014 ISD::FMA,
1015 ISD::FDIV,
1016 ISD::FSQRT,
1017 ISD::FCEIL,
1018 ISD::FTRUNC,
1019 ISD::FFLOOR,
1020 ISD::FROUND,
1021 ISD::FROUNDEVEN,
1022 ISD::FRINT,
1023 ISD::FNEARBYINT,
1025 ISD::SETCC,
1026 ISD::FMAXIMUM,
1027 ISD::FMINIMUM,
1034 ISD::VECREDUCE_FMIN,
1035 ISD::VECREDUCE_FMAX,
1036 ISD::VECREDUCE_FMINIMUM,
1037 ISD::VECREDUCE_FMAXIMUM};
1038
1039 // TODO: support more vp ops.
1040 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1041 ISD::VP_FADD,
1042 ISD::VP_FSUB,
1043 ISD::VP_FMUL,
1044 ISD::VP_FDIV,
1045 ISD::VP_FMA,
1046 ISD::VP_REDUCE_FMIN,
1047 ISD::VP_REDUCE_FMAX,
1048 ISD::VP_SQRT,
1049 ISD::VP_FMINNUM,
1050 ISD::VP_FMAXNUM,
1051 ISD::VP_FCEIL,
1052 ISD::VP_FFLOOR,
1053 ISD::VP_FROUND,
1054 ISD::VP_FROUNDEVEN,
1055 ISD::VP_FROUNDTOZERO,
1056 ISD::VP_FRINT,
1057 ISD::VP_FNEARBYINT,
1058 ISD::VP_SETCC,
1059 ISD::VP_FMINIMUM,
1060 ISD::VP_FMAXIMUM,
1061 ISD::VP_REDUCE_FMINIMUM,
1062 ISD::VP_REDUCE_FMAXIMUM};
1063
1064 // Sets common operation actions on RVV floating-point vector types.
1065 const auto SetCommonVFPActions = [&](MVT VT) {
1067 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1068 // sizes are within one power-of-two of each other. Therefore conversions
1069 // between vXf16 and vXf64 must be lowered as sequences which convert via
1070 // vXf32.
1071 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1072 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1073 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1074 // Custom-lower insert/extract operations to simplify patterns.
1076 Custom);
1077 // Expand various condition codes (explained above).
1078 setCondCodeAction(VFPCCToExpand, VT, Expand);
1079
1081 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1082 Legal);
1083 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1084
1085 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1086 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1088 VT, Custom);
1089
1090 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1091
1092 // Expand FP operations that need libcalls.
1093 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1094
1096
1097 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1098
1099 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1100 VT, Custom);
1101
1103 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1104 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1105 VT, Custom);
1106 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1107
1110
1113 VT, Custom);
1114
1117
1119 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1120 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1121
1122 setOperationAction(FloatingPointVPOps, VT, Custom);
1123
1125 Custom);
1128 VT, Legal);
1133 VT, Custom);
1134
1136 };
1137
1138 // Sets common extload/truncstore actions on RVV floating-point vector
1139 // types.
1140 const auto SetCommonVFPExtLoadTruncStoreActions =
1141 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1142 for (auto SmallVT : SmallerVTs) {
1143 setTruncStoreAction(VT, SmallVT, Expand);
1144 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1145 }
1146 };
1147
1148 // Sets common actions for f16 and bf16 for when there's only
1149 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1150 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1151 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1153 Custom);
1154 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1155 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1156 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1157 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1158 Custom);
1160 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1166 VT, Custom);
1167 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1168 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1169 MVT EltVT = VT.getVectorElementType();
1170 if (isTypeLegal(EltVT))
1171 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1173 VT, Custom);
1174 else
1175 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1176 EltVT, Custom);
1177 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1178 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1179 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1180 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1181 ISD::VP_SCATTER},
1182 VT, Custom);
1183 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1184
1185 setOperationAction(ISD::FNEG, VT, Expand);
1186 setOperationAction(ISD::FABS, VT, Expand);
1188
1189 // Expand FP operations that need libcalls.
1190 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1191
1192 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1193 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1194 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1195 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1196 } else {
1197 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1198 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1199 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1200 }
1201 };
1202
1203 if (Subtarget.hasVInstructionsF16()) {
1204 for (MVT VT : F16VecVTs) {
1205 if (!isTypeLegal(VT))
1206 continue;
1207 SetCommonVFPActions(VT);
1208 }
1209 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1210 for (MVT VT : F16VecVTs) {
1211 if (!isTypeLegal(VT))
1212 continue;
1213 SetCommonPromoteToF32Actions(VT);
1214 }
1215 }
1216
1217 if (Subtarget.hasVInstructionsBF16Minimal()) {
1218 for (MVT VT : BF16VecVTs) {
1219 if (!isTypeLegal(VT))
1220 continue;
1221 SetCommonPromoteToF32Actions(VT);
1222 }
1223 }
1224
1225 if (Subtarget.hasVInstructionsF32()) {
1226 for (MVT VT : F32VecVTs) {
1227 if (!isTypeLegal(VT))
1228 continue;
1229 SetCommonVFPActions(VT);
1230 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1231 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1232 }
1233 }
1234
1235 if (Subtarget.hasVInstructionsF64()) {
1236 for (MVT VT : F64VecVTs) {
1237 if (!isTypeLegal(VT))
1238 continue;
1239 SetCommonVFPActions(VT);
1240 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1241 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1242 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1243 }
1244 }
1245
1246 if (Subtarget.useRVVForFixedLengthVectors()) {
1248 if (!useRVVForFixedLengthVectorVT(VT))
1249 continue;
1250
1251 // By default everything must be expanded.
1252 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1255 setTruncStoreAction(VT, OtherVT, Expand);
1257 OtherVT, Expand);
1258 }
1259
1260 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1261 // expansion to a build_vector of 0s.
1263
1264 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1266 Custom);
1267
1270 Custom);
1271
1273 VT, Custom);
1274
1276 VT, Custom);
1277
1279
1280 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1281
1283
1285
1288 Custom);
1289
1290 setOperationAction(ISD::BITCAST, VT, Custom);
1291
1293 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1294 Custom);
1295
1297 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1298 Custom);
1299
1301 {
1310 },
1311 VT, Custom);
1313 Custom);
1314
1316
1317 // Operations below are different for between masks and other vectors.
1318 if (VT.getVectorElementType() == MVT::i1) {
1319 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1320 ISD::OR, ISD::XOR},
1321 VT, Custom);
1322
1323 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1324 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1325 VT, Custom);
1326
1327 setOperationAction(ISD::VP_MERGE, VT, Custom);
1328
1329 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1330 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1331 continue;
1332 }
1333
1334 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1335 // it before type legalization for i64 vectors on RV32. It will then be
1336 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1337 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1338 // improvements first.
1339 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1342
1343 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1345 }
1346
1348 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1349
1350 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1351 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1352 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1353 ISD::VP_SCATTER},
1354 VT, Custom);
1355 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1356
1360 VT, Custom);
1361
1364
1366
1367 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1368 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1370
1374 VT, Custom);
1375
1377
1380
1381 // Custom-lower reduction operations to set up the corresponding custom
1382 // nodes' operands.
1383 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1384 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1385 ISD::VECREDUCE_UMIN},
1386 VT, Custom);
1387
1388 setOperationAction(IntegerVPOps, VT, Custom);
1389
1390 if (Subtarget.hasStdExtZvkb())
1392
1393 if (Subtarget.hasStdExtZvbb()) {
1396 VT, Custom);
1397 } else {
1398 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1399 // range of f32.
1400 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1401 if (isTypeLegal(FloatVT))
1404 Custom);
1405 }
1406
1408 }
1409
1411 // There are no extending loads or truncating stores.
1412 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1413 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1414 setTruncStoreAction(VT, InnerVT, Expand);
1415 }
1416
1417 if (!useRVVForFixedLengthVectorVT(VT))
1418 continue;
1419
1420 // By default everything must be expanded.
1421 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1423
1424 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1425 // expansion to a build_vector of 0s.
1427
1432 VT, Custom);
1433 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1434 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1435
1437 VT, Custom);
1438
1439 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1440 ISD::MGATHER, ISD::MSCATTER},
1441 VT, Custom);
1442 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1443 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1444 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1445 VT, Custom);
1446 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1447
1448 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1450 Custom);
1451
1452 if (VT.getVectorElementType() == MVT::f16 &&
1453 !Subtarget.hasVInstructionsF16()) {
1454 setOperationAction(ISD::BITCAST, VT, Custom);
1455 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1457 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1458 Custom);
1459 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1460 Custom);
1461 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1462 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1463 if (Subtarget.hasStdExtZfhmin()) {
1465 } else {
1466 // We need to custom legalize f16 build vectors if Zfhmin isn't
1467 // available.
1469 }
1470 setOperationAction(ISD::FNEG, VT, Expand);
1471 setOperationAction(ISD::FABS, VT, Expand);
1473 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1474 // Don't promote f16 vector operations to f32 if f32 vector type is
1475 // not legal.
1476 // TODO: could split the f16 vector into two vectors and do promotion.
1477 if (!isTypeLegal(F32VecVT))
1478 continue;
1479 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1480 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1481 continue;
1482 }
1483
1484 if (VT.getVectorElementType() == MVT::bf16) {
1485 setOperationAction(ISD::BITCAST, VT, Custom);
1486 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1487 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1488 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1489 if (Subtarget.hasStdExtZfbfmin()) {
1491 } else {
1492 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1493 // available.
1495 }
1497 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1498 Custom);
1499 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1500 // Don't promote f16 vector operations to f32 if f32 vector type is
1501 // not legal.
1502 // TODO: could split the f16 vector into two vectors and do promotion.
1503 if (!isTypeLegal(F32VecVT))
1504 continue;
1505 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1506 // TODO: Promote VP ops to fp32.
1507 continue;
1508 }
1509
1511 Custom);
1512
1514 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1515 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1516 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1517 ISD::FMAXIMUM, ISD::FMINIMUM},
1518 VT, Custom);
1519
1520 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1521 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1522 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1523 ISD::FNEARBYINT},
1524 VT, Custom);
1525
1526 setCondCodeAction(VFPCCToExpand, VT, Expand);
1527
1530
1531 setOperationAction(ISD::BITCAST, VT, Custom);
1532
1533 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1534
1535 setOperationAction(FloatingPointVPOps, VT, Custom);
1536
1543 VT, Custom);
1544 }
1545
1546 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1547 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1548 if (Subtarget.is64Bit())
1549 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1550 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1551 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1552 if (Subtarget.hasStdExtZfbfmin())
1553 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1554 if (Subtarget.hasStdExtFOrZfinx())
1555 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1556 if (Subtarget.hasStdExtDOrZdinx())
1557 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1558 }
1559 }
1560
1561 if (Subtarget.hasStdExtA())
1562 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1563
1564 if (Subtarget.hasForcedAtomics()) {
1565 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1567 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1568 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1569 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1570 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1571 XLenVT, LibCall);
1572 }
1573
1574 if (Subtarget.hasVendorXTHeadMemIdx()) {
1575 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1576 setIndexedLoadAction(im, MVT::i8, Legal);
1577 setIndexedStoreAction(im, MVT::i8, Legal);
1578 setIndexedLoadAction(im, MVT::i16, Legal);
1579 setIndexedStoreAction(im, MVT::i16, Legal);
1580 setIndexedLoadAction(im, MVT::i32, Legal);
1581 setIndexedStoreAction(im, MVT::i32, Legal);
1582
1583 if (Subtarget.is64Bit()) {
1584 setIndexedLoadAction(im, MVT::i64, Legal);
1585 setIndexedStoreAction(im, MVT::i64, Legal);
1586 }
1587 }
1588 }
1589
1590 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1594
1598 }
1599
1600 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1601 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1602 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1603 ISD::PARTIAL_REDUCE_UMLA,
1604 ISD::PARTIAL_REDUCE_SUMLA};
1605 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1606 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1607 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1610
1611 if (Subtarget.useRVVForFixedLengthVectors()) {
1613 if (VT.getVectorElementType() != MVT::i32 ||
1614 !useRVVForFixedLengthVectorVT(VT))
1615 continue;
1616 ElementCount EC = VT.getVectorElementCount();
1617 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1618 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1619 }
1620 }
1621 }
1622
1623 // Customize load and store operation for bf16 if zfh isn't enabled.
1624 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1625 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1626 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1627 }
1628
1629 // Function alignments.
1630 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1631 setMinFunctionAlignment(FunctionAlignment);
1632 // Set preferred alignments.
1633 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1634 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1635
1641
1642 if (Subtarget.hasStdExtFOrZfinx())
1643 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1644
1645 if (Subtarget.hasStdExtZbb())
1647
1648 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1649 Subtarget.hasVInstructions())
1651
1652 if (Subtarget.hasStdExtZbkb())
1654
1655 if (Subtarget.hasStdExtFOrZfinx())
1658 if (Subtarget.hasVInstructions())
1660 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1661 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1662 ISD::SRL, ISD::SHL, ISD::STORE,
1664 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1668 ISD::VSELECT, ISD::VECREDUCE_ADD});
1669
1670 if (Subtarget.hasVendorXTHeadMemPair())
1671 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1672 if (Subtarget.useRVVForFixedLengthVectors())
1673 setTargetDAGCombine(ISD::BITCAST);
1674
1675 // Disable strict node mutation.
1676 IsStrictFPEnabled = true;
1677 EnableExtLdPromotion = true;
1678
1679 // Let the subtarget decide if a predictable select is more expensive than the
1680 // corresponding branch. This information is used in CGP/SelectOpt to decide
1681 // when to convert selects into branches.
1682 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1683
1684 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1685 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1686
1687 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1688 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1689 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1690
1692 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1693 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1694
1695 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1696 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1697}
1698
1700 LLVMContext &Context,
1701 EVT VT) const {
1702 if (!VT.isVector())
1703 return getPointerTy(DL);
1704 if (Subtarget.hasVInstructions() &&
1705 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1706 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1708}
1709
1711 return Subtarget.getXLenVT();
1712}
1713
1714// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1715bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1716 unsigned VF,
1717 bool IsScalable) const {
1718 if (!Subtarget.hasVInstructions())
1719 return true;
1720
1721 if (!IsScalable)
1722 return true;
1723
1724 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1725 return true;
1726
1727 // Don't allow VF=1 if those types are't legal.
1728 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1729 return true;
1730
1731 // VLEN=32 support is incomplete.
1732 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1733 return true;
1734
1735 // The maximum VF is for the smallest element width with LMUL=8.
1736 // VF must be a power of 2.
1737 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1738 return VF > MaxVF || !isPowerOf2_32(VF);
1739}
1740
1742 return !Subtarget.hasVInstructions() ||
1743 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1744}
1745
1747 const CallInst &I,
1748 MachineFunction &MF,
1749 unsigned Intrinsic) const {
1750 auto &DL = I.getDataLayout();
1751
1752 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1753 bool IsUnitStrided, bool UsePtrVal = false) {
1754 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1755 // We can't use ptrVal if the intrinsic can access memory before the
1756 // pointer. This means we can't use it for strided or indexed intrinsics.
1757 if (UsePtrVal)
1758 Info.ptrVal = I.getArgOperand(PtrOp);
1759 else
1760 Info.fallbackAddressSpace =
1761 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1762 Type *MemTy;
1763 if (IsStore) {
1764 // Store value is the first operand.
1765 MemTy = I.getArgOperand(0)->getType();
1766 } else {
1767 // Use return type. If it's segment load, return type is a struct.
1768 MemTy = I.getType();
1769 if (MemTy->isStructTy())
1770 MemTy = MemTy->getStructElementType(0);
1771 }
1772 if (!IsUnitStrided)
1773 MemTy = MemTy->getScalarType();
1774
1775 Info.memVT = getValueType(DL, MemTy);
1776 if (MemTy->isTargetExtTy()) {
1777 // RISC-V vector tuple type's alignment type should be its element type.
1778 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1779 MemTy = Type::getIntNTy(
1780 MemTy->getContext(),
1781 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1782 ->getZExtValue());
1783 Info.align = DL.getABITypeAlign(MemTy);
1784 } else {
1785 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1786 }
1787 Info.size = MemoryLocation::UnknownSize;
1788 Info.flags |=
1790 return true;
1791 };
1792
1793 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1795
1797 switch (Intrinsic) {
1798 default:
1799 return false;
1800 case Intrinsic::riscv_masked_atomicrmw_xchg:
1801 case Intrinsic::riscv_masked_atomicrmw_add:
1802 case Intrinsic::riscv_masked_atomicrmw_sub:
1803 case Intrinsic::riscv_masked_atomicrmw_nand:
1804 case Intrinsic::riscv_masked_atomicrmw_max:
1805 case Intrinsic::riscv_masked_atomicrmw_min:
1806 case Intrinsic::riscv_masked_atomicrmw_umax:
1807 case Intrinsic::riscv_masked_atomicrmw_umin:
1808 case Intrinsic::riscv_masked_cmpxchg:
1809 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1810 // narrow atomic operation. These will be expanded to an LR/SC loop that
1811 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1812 // will be used to modify the appropriate part of the 4 byte data and
1813 // preserve the rest.
1814 Info.opc = ISD::INTRINSIC_W_CHAIN;
1815 Info.memVT = MVT::i32;
1816 Info.ptrVal = I.getArgOperand(0);
1817 Info.offset = 0;
1818 Info.align = Align(4);
1821 return true;
1822 case Intrinsic::riscv_seg2_load_mask:
1823 case Intrinsic::riscv_seg3_load_mask:
1824 case Intrinsic::riscv_seg4_load_mask:
1825 case Intrinsic::riscv_seg5_load_mask:
1826 case Intrinsic::riscv_seg6_load_mask:
1827 case Intrinsic::riscv_seg7_load_mask:
1828 case Intrinsic::riscv_seg8_load_mask:
1829 case Intrinsic::riscv_sseg2_load_mask:
1830 case Intrinsic::riscv_sseg3_load_mask:
1831 case Intrinsic::riscv_sseg4_load_mask:
1832 case Intrinsic::riscv_sseg5_load_mask:
1833 case Intrinsic::riscv_sseg6_load_mask:
1834 case Intrinsic::riscv_sseg7_load_mask:
1835 case Intrinsic::riscv_sseg8_load_mask:
1836 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1837 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1838 case Intrinsic::riscv_seg2_store_mask:
1839 case Intrinsic::riscv_seg3_store_mask:
1840 case Intrinsic::riscv_seg4_store_mask:
1841 case Intrinsic::riscv_seg5_store_mask:
1842 case Intrinsic::riscv_seg6_store_mask:
1843 case Intrinsic::riscv_seg7_store_mask:
1844 case Intrinsic::riscv_seg8_store_mask:
1845 // Operands are (vec, ..., vec, ptr, mask, vl)
1846 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1847 /*IsStore*/ true,
1848 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1849 case Intrinsic::riscv_sseg2_store_mask:
1850 case Intrinsic::riscv_sseg3_store_mask:
1851 case Intrinsic::riscv_sseg4_store_mask:
1852 case Intrinsic::riscv_sseg5_store_mask:
1853 case Intrinsic::riscv_sseg6_store_mask:
1854 case Intrinsic::riscv_sseg7_store_mask:
1855 case Intrinsic::riscv_sseg8_store_mask:
1856 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1857 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1858 /*IsStore*/ true,
1859 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1860 case Intrinsic::riscv_vlm:
1861 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1862 /*IsStore*/ false,
1863 /*IsUnitStrided*/ true,
1864 /*UsePtrVal*/ true);
1865 case Intrinsic::riscv_vle:
1866 case Intrinsic::riscv_vle_mask:
1867 case Intrinsic::riscv_vleff:
1868 case Intrinsic::riscv_vleff_mask:
1869 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1870 /*IsStore*/ false,
1871 /*IsUnitStrided*/ true,
1872 /*UsePtrVal*/ true);
1873 case Intrinsic::riscv_vsm:
1874 case Intrinsic::riscv_vse:
1875 case Intrinsic::riscv_vse_mask:
1876 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1877 /*IsStore*/ true,
1878 /*IsUnitStrided*/ true,
1879 /*UsePtrVal*/ true);
1880 case Intrinsic::riscv_vlse:
1881 case Intrinsic::riscv_vlse_mask:
1882 case Intrinsic::riscv_vloxei:
1883 case Intrinsic::riscv_vloxei_mask:
1884 case Intrinsic::riscv_vluxei:
1885 case Intrinsic::riscv_vluxei_mask:
1886 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1887 /*IsStore*/ false,
1888 /*IsUnitStrided*/ false);
1889 case Intrinsic::riscv_vsse:
1890 case Intrinsic::riscv_vsse_mask:
1891 case Intrinsic::riscv_vsoxei:
1892 case Intrinsic::riscv_vsoxei_mask:
1893 case Intrinsic::riscv_vsuxei:
1894 case Intrinsic::riscv_vsuxei_mask:
1895 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1896 /*IsStore*/ true,
1897 /*IsUnitStrided*/ false);
1898 case Intrinsic::riscv_vlseg2:
1899 case Intrinsic::riscv_vlseg3:
1900 case Intrinsic::riscv_vlseg4:
1901 case Intrinsic::riscv_vlseg5:
1902 case Intrinsic::riscv_vlseg6:
1903 case Intrinsic::riscv_vlseg7:
1904 case Intrinsic::riscv_vlseg8:
1905 case Intrinsic::riscv_vlseg2ff:
1906 case Intrinsic::riscv_vlseg3ff:
1907 case Intrinsic::riscv_vlseg4ff:
1908 case Intrinsic::riscv_vlseg5ff:
1909 case Intrinsic::riscv_vlseg6ff:
1910 case Intrinsic::riscv_vlseg7ff:
1911 case Intrinsic::riscv_vlseg8ff:
1912 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1913 /*IsStore*/ false,
1914 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1915 case Intrinsic::riscv_vlseg2_mask:
1916 case Intrinsic::riscv_vlseg3_mask:
1917 case Intrinsic::riscv_vlseg4_mask:
1918 case Intrinsic::riscv_vlseg5_mask:
1919 case Intrinsic::riscv_vlseg6_mask:
1920 case Intrinsic::riscv_vlseg7_mask:
1921 case Intrinsic::riscv_vlseg8_mask:
1922 case Intrinsic::riscv_vlseg2ff_mask:
1923 case Intrinsic::riscv_vlseg3ff_mask:
1924 case Intrinsic::riscv_vlseg4ff_mask:
1925 case Intrinsic::riscv_vlseg5ff_mask:
1926 case Intrinsic::riscv_vlseg6ff_mask:
1927 case Intrinsic::riscv_vlseg7ff_mask:
1928 case Intrinsic::riscv_vlseg8ff_mask:
1929 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1930 /*IsStore*/ false,
1931 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1932 case Intrinsic::riscv_vlsseg2:
1933 case Intrinsic::riscv_vlsseg3:
1934 case Intrinsic::riscv_vlsseg4:
1935 case Intrinsic::riscv_vlsseg5:
1936 case Intrinsic::riscv_vlsseg6:
1937 case Intrinsic::riscv_vlsseg7:
1938 case Intrinsic::riscv_vlsseg8:
1939 case Intrinsic::riscv_vloxseg2:
1940 case Intrinsic::riscv_vloxseg3:
1941 case Intrinsic::riscv_vloxseg4:
1942 case Intrinsic::riscv_vloxseg5:
1943 case Intrinsic::riscv_vloxseg6:
1944 case Intrinsic::riscv_vloxseg7:
1945 case Intrinsic::riscv_vloxseg8:
1946 case Intrinsic::riscv_vluxseg2:
1947 case Intrinsic::riscv_vluxseg3:
1948 case Intrinsic::riscv_vluxseg4:
1949 case Intrinsic::riscv_vluxseg5:
1950 case Intrinsic::riscv_vluxseg6:
1951 case Intrinsic::riscv_vluxseg7:
1952 case Intrinsic::riscv_vluxseg8:
1953 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1954 /*IsStore*/ false,
1955 /*IsUnitStrided*/ false);
1956 case Intrinsic::riscv_vlsseg2_mask:
1957 case Intrinsic::riscv_vlsseg3_mask:
1958 case Intrinsic::riscv_vlsseg4_mask:
1959 case Intrinsic::riscv_vlsseg5_mask:
1960 case Intrinsic::riscv_vlsseg6_mask:
1961 case Intrinsic::riscv_vlsseg7_mask:
1962 case Intrinsic::riscv_vlsseg8_mask:
1963 case Intrinsic::riscv_vloxseg2_mask:
1964 case Intrinsic::riscv_vloxseg3_mask:
1965 case Intrinsic::riscv_vloxseg4_mask:
1966 case Intrinsic::riscv_vloxseg5_mask:
1967 case Intrinsic::riscv_vloxseg6_mask:
1968 case Intrinsic::riscv_vloxseg7_mask:
1969 case Intrinsic::riscv_vloxseg8_mask:
1970 case Intrinsic::riscv_vluxseg2_mask:
1971 case Intrinsic::riscv_vluxseg3_mask:
1972 case Intrinsic::riscv_vluxseg4_mask:
1973 case Intrinsic::riscv_vluxseg5_mask:
1974 case Intrinsic::riscv_vluxseg6_mask:
1975 case Intrinsic::riscv_vluxseg7_mask:
1976 case Intrinsic::riscv_vluxseg8_mask:
1977 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1978 /*IsStore*/ false,
1979 /*IsUnitStrided*/ false);
1980 case Intrinsic::riscv_vsseg2:
1981 case Intrinsic::riscv_vsseg3:
1982 case Intrinsic::riscv_vsseg4:
1983 case Intrinsic::riscv_vsseg5:
1984 case Intrinsic::riscv_vsseg6:
1985 case Intrinsic::riscv_vsseg7:
1986 case Intrinsic::riscv_vsseg8:
1987 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1988 /*IsStore*/ true,
1989 /*IsUnitStrided*/ false);
1990 case Intrinsic::riscv_vsseg2_mask:
1991 case Intrinsic::riscv_vsseg3_mask:
1992 case Intrinsic::riscv_vsseg4_mask:
1993 case Intrinsic::riscv_vsseg5_mask:
1994 case Intrinsic::riscv_vsseg6_mask:
1995 case Intrinsic::riscv_vsseg7_mask:
1996 case Intrinsic::riscv_vsseg8_mask:
1997 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1998 /*IsStore*/ true,
1999 /*IsUnitStrided*/ false);
2000 case Intrinsic::riscv_vssseg2:
2001 case Intrinsic::riscv_vssseg3:
2002 case Intrinsic::riscv_vssseg4:
2003 case Intrinsic::riscv_vssseg5:
2004 case Intrinsic::riscv_vssseg6:
2005 case Intrinsic::riscv_vssseg7:
2006 case Intrinsic::riscv_vssseg8:
2007 case Intrinsic::riscv_vsoxseg2:
2008 case Intrinsic::riscv_vsoxseg3:
2009 case Intrinsic::riscv_vsoxseg4:
2010 case Intrinsic::riscv_vsoxseg5:
2011 case Intrinsic::riscv_vsoxseg6:
2012 case Intrinsic::riscv_vsoxseg7:
2013 case Intrinsic::riscv_vsoxseg8:
2014 case Intrinsic::riscv_vsuxseg2:
2015 case Intrinsic::riscv_vsuxseg3:
2016 case Intrinsic::riscv_vsuxseg4:
2017 case Intrinsic::riscv_vsuxseg5:
2018 case Intrinsic::riscv_vsuxseg6:
2019 case Intrinsic::riscv_vsuxseg7:
2020 case Intrinsic::riscv_vsuxseg8:
2021 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2022 /*IsStore*/ true,
2023 /*IsUnitStrided*/ false);
2024 case Intrinsic::riscv_vssseg2_mask:
2025 case Intrinsic::riscv_vssseg3_mask:
2026 case Intrinsic::riscv_vssseg4_mask:
2027 case Intrinsic::riscv_vssseg5_mask:
2028 case Intrinsic::riscv_vssseg6_mask:
2029 case Intrinsic::riscv_vssseg7_mask:
2030 case Intrinsic::riscv_vssseg8_mask:
2031 case Intrinsic::riscv_vsoxseg2_mask:
2032 case Intrinsic::riscv_vsoxseg3_mask:
2033 case Intrinsic::riscv_vsoxseg4_mask:
2034 case Intrinsic::riscv_vsoxseg5_mask:
2035 case Intrinsic::riscv_vsoxseg6_mask:
2036 case Intrinsic::riscv_vsoxseg7_mask:
2037 case Intrinsic::riscv_vsoxseg8_mask:
2038 case Intrinsic::riscv_vsuxseg2_mask:
2039 case Intrinsic::riscv_vsuxseg3_mask:
2040 case Intrinsic::riscv_vsuxseg4_mask:
2041 case Intrinsic::riscv_vsuxseg5_mask:
2042 case Intrinsic::riscv_vsuxseg6_mask:
2043 case Intrinsic::riscv_vsuxseg7_mask:
2044 case Intrinsic::riscv_vsuxseg8_mask:
2045 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2046 /*IsStore*/ true,
2047 /*IsUnitStrided*/ false);
2048 }
2049}
2050
2052 const AddrMode &AM, Type *Ty,
2053 unsigned AS,
2054 Instruction *I) const {
2055 // No global is ever allowed as a base.
2056 if (AM.BaseGV)
2057 return false;
2058
2059 // None of our addressing modes allows a scalable offset
2060 if (AM.ScalableOffset)
2061 return false;
2062
2063 // RVV instructions only support register addressing.
2064 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2065 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2066
2067 // Require a 12-bit signed offset.
2068 if (!isInt<12>(AM.BaseOffs))
2069 return false;
2070
2071 switch (AM.Scale) {
2072 case 0: // "r+i" or just "i", depending on HasBaseReg.
2073 break;
2074 case 1:
2075 if (!AM.HasBaseReg) // allow "r+i".
2076 break;
2077 return false; // disallow "r+r" or "r+r+i".
2078 default:
2079 return false;
2080 }
2081
2082 return true;
2083}
2084
2086 return isInt<12>(Imm);
2087}
2088
2090 return isInt<12>(Imm);
2091}
2092
2093// On RV32, 64-bit integers are split into their high and low parts and held
2094// in two different registers, so the trunc is free since the low register can
2095// just be used.
2096// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2097// isTruncateFree?
2099 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2100 return false;
2101 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2102 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2103 return (SrcBits == 64 && DestBits == 32);
2104}
2105
2107 // We consider i64->i32 free on RV64 since we have good selection of W
2108 // instructions that make promoting operations back to i64 free in many cases.
2109 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2110 !DstVT.isInteger())
2111 return false;
2112 unsigned SrcBits = SrcVT.getSizeInBits();
2113 unsigned DestBits = DstVT.getSizeInBits();
2114 return (SrcBits == 64 && DestBits == 32);
2115}
2116
2118 EVT SrcVT = Val.getValueType();
2119 // free truncate from vnsrl and vnsra
2120 if (Subtarget.hasVInstructions() &&
2121 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2122 SrcVT.isVector() && VT2.isVector()) {
2123 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2124 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2125 if (SrcBits == DestBits * 2) {
2126 return true;
2127 }
2128 }
2129 return TargetLowering::isTruncateFree(Val, VT2);
2130}
2131
2133 // Zexts are free if they can be combined with a load.
2134 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2135 // poorly with type legalization of compares preferring sext.
2136 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2137 EVT MemVT = LD->getMemoryVT();
2138 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2139 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2140 LD->getExtensionType() == ISD::ZEXTLOAD))
2141 return true;
2142 }
2143
2144 return TargetLowering::isZExtFree(Val, VT2);
2145}
2146
2148 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2149}
2150
2152 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2153}
2154
2156 return Subtarget.hasCTZLike();
2157}
2158
2160 return Subtarget.hasCLZLike();
2161}
2162
2164 const Instruction &AndI) const {
2165 // We expect to be able to match a bit extraction instruction if the Zbs
2166 // extension is supported and the mask is a power of two. However, we
2167 // conservatively return false if the mask would fit in an ANDI instruction,
2168 // on the basis that it's possible the sinking+duplication of the AND in
2169 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2170 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2171 if (!Subtarget.hasBEXTILike())
2172 return false;
2174 if (!Mask)
2175 return false;
2176 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2177}
2178
2180 EVT VT = Y.getValueType();
2181
2182 if (VT.isVector())
2183 return false;
2184
2185 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2186 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2187}
2188
2190 EVT VT = Y.getValueType();
2191
2192 if (!VT.isVector())
2193 return hasAndNotCompare(Y);
2194
2195 return Subtarget.hasStdExtZvkb();
2196}
2197
2199 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2200 if (Subtarget.hasStdExtZbs())
2201 return X.getValueType().isScalarInteger();
2202 auto *C = dyn_cast<ConstantSDNode>(Y);
2203 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2204 if (Subtarget.hasVendorXTHeadBs())
2205 return C != nullptr;
2206 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2207 return C && C->getAPIntValue().ule(10);
2208}
2209
2211 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2212 SDValue Y) const {
2213 if (SelectOpcode != ISD::VSELECT)
2214 return false;
2215
2216 // Only enable for rvv.
2217 if (!VT.isVector() || !Subtarget.hasVInstructions())
2218 return false;
2219
2220 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2221 return false;
2222
2223 return true;
2224}
2225
2227 Type *Ty) const {
2228 assert(Ty->isIntegerTy());
2229
2230 unsigned BitSize = Ty->getIntegerBitWidth();
2231 if (BitSize > Subtarget.getXLen())
2232 return false;
2233
2234 // Fast path, assume 32-bit immediates are cheap.
2235 int64_t Val = Imm.getSExtValue();
2236 if (isInt<32>(Val))
2237 return true;
2238
2239 // A constant pool entry may be more aligned than the load we're trying to
2240 // replace. If we don't support unaligned scalar mem, prefer the constant
2241 // pool.
2242 // TODO: Can the caller pass down the alignment?
2243 if (!Subtarget.enableUnalignedScalarMem())
2244 return true;
2245
2246 // Prefer to keep the load if it would require many instructions.
2247 // This uses the same threshold we use for constant pools but doesn't
2248 // check useConstantPoolForLargeInts.
2249 // TODO: Should we keep the load only when we're definitely going to emit a
2250 // constant pool?
2251
2253 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2254}
2255
2259 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2260 SelectionDAG &DAG) const {
2261 // One interesting pattern that we'd want to form is 'bit extract':
2262 // ((1 >> Y) & 1) ==/!= 0
2263 // But we also need to be careful not to try to reverse that fold.
2264
2265 // Is this '((1 >> Y) & 1)'?
2266 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2267 return false; // Keep the 'bit extract' pattern.
2268
2269 // Will this be '((1 >> Y) & 1)' after the transform?
2270 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2271 return true; // Do form the 'bit extract' pattern.
2272
2273 // If 'X' is a constant, and we transform, then we will immediately
2274 // try to undo the fold, thus causing endless combine loop.
2275 // So only do the transform if X is not a constant. This matches the default
2276 // implementation of this function.
2277 return !XC;
2278}
2279
2281 unsigned Opc = VecOp.getOpcode();
2282
2283 // Assume target opcodes can't be scalarized.
2284 // TODO - do we have any exceptions?
2285 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2286 return false;
2287
2288 // If the vector op is not supported, try to convert to scalar.
2289 EVT VecVT = VecOp.getValueType();
2291 return true;
2292
2293 // If the vector op is supported, but the scalar op is not, the transform may
2294 // not be worthwhile.
2295 // Permit a vector binary operation can be converted to scalar binary
2296 // operation which is custom lowered with illegal type.
2297 EVT ScalarVT = VecVT.getScalarType();
2298 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2299 isOperationCustom(Opc, ScalarVT);
2300}
2301
2303 const GlobalAddressSDNode *GA) const {
2304 // In order to maximise the opportunity for common subexpression elimination,
2305 // keep a separate ADD node for the global address offset instead of folding
2306 // it in the global address node. Later peephole optimisations may choose to
2307 // fold it back in when profitable.
2308 return false;
2309}
2310
2311// Returns 0-31 if the fli instruction is available for the type and this is
2312// legal FP immediate for the type. Returns -1 otherwise.
2314 if (!Subtarget.hasStdExtZfa())
2315 return -1;
2316
2317 bool IsSupportedVT = false;
2318 if (VT == MVT::f16) {
2319 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2320 } else if (VT == MVT::f32) {
2321 IsSupportedVT = true;
2322 } else if (VT == MVT::f64) {
2323 assert(Subtarget.hasStdExtD() && "Expect D extension");
2324 IsSupportedVT = true;
2325 }
2326
2327 if (!IsSupportedVT)
2328 return -1;
2329
2330 return RISCVLoadFPImm::getLoadFPImm(Imm);
2331}
2332
2334 bool ForCodeSize) const {
2335 bool IsLegalVT = false;
2336 if (VT == MVT::f16)
2337 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2338 else if (VT == MVT::f32)
2339 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2340 else if (VT == MVT::f64)
2341 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2342 else if (VT == MVT::bf16)
2343 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2344
2345 if (!IsLegalVT)
2346 return false;
2347
2348 if (getLegalZfaFPImm(Imm, VT) >= 0)
2349 return true;
2350
2351 // Some constants can be produced by fli+fneg.
2352 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2353 return true;
2354
2355 // Cannot create a 64 bit floating-point immediate value for rv32.
2356 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2357 // td can handle +0.0 or -0.0 already.
2358 // -0.0 can be created by fmv + fneg.
2359 return Imm.isZero();
2360 }
2361
2362 // Special case: fmv + fneg
2363 if (Imm.isNegZero())
2364 return true;
2365
2366 // Building an integer and then converting requires a fmv at the end of
2367 // the integer sequence. The fmv is not required for Zfinx.
2368 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2369 const int Cost =
2370 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2371 Subtarget.getXLen(), Subtarget);
2372 return Cost <= FPImmCost;
2373}
2374
2375// TODO: This is very conservative.
2377 unsigned Index) const {
2379 return false;
2380
2381 // Extracts from index 0 are just subreg extracts.
2382 if (Index == 0)
2383 return true;
2384
2385 // Only support extracting a fixed from a fixed vector for now.
2386 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2387 return false;
2388
2389 EVT EltVT = ResVT.getVectorElementType();
2390 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2391
2392 // The smallest type we can slide is i8.
2393 // TODO: We can extract index 0 from a mask vector without a slide.
2394 if (EltVT == MVT::i1)
2395 return false;
2396
2397 unsigned ResElts = ResVT.getVectorNumElements();
2398 unsigned SrcElts = SrcVT.getVectorNumElements();
2399
2400 unsigned MinVLen = Subtarget.getRealMinVLen();
2401 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2402
2403 // If we're extracting only data from the first VLEN bits of the source
2404 // then we can always do this with an m1 vslidedown.vx. Restricting the
2405 // Index ensures we can use a vslidedown.vi.
2406 // TODO: We can generalize this when the exact VLEN is known.
2407 if (Index + ResElts <= MinVLMAX && Index < 31)
2408 return true;
2409
2410 // Convervatively only handle extracting half of a vector.
2411 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2412 // the upper half of a vector until we have more test coverage.
2413 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2414 // a cheap extract. However, this case is important in practice for
2415 // shuffled extracts of longer vectors. How resolve?
2416 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2417}
2418
2420 CallingConv::ID CC,
2421 EVT VT) const {
2422 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2423 // We might still end up using a GPR but that will be decided based on ABI.
2424 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2425 !Subtarget.hasStdExtZfhminOrZhinxmin())
2426 return MVT::f32;
2427
2428 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2429
2430 return PartVT;
2431}
2432
2433unsigned
2435 std::optional<MVT> RegisterVT) const {
2436 // Pair inline assembly operand
2437 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2438 *RegisterVT == MVT::Untyped)
2439 return 1;
2440
2441 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2442}
2443
2445 CallingConv::ID CC,
2446 EVT VT) const {
2447 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2448 // We might still end up using a GPR but that will be decided based on ABI.
2449 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2450 !Subtarget.hasStdExtZfhminOrZhinxmin())
2451 return 1;
2452
2453 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2454}
2455
2457 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2458 unsigned &NumIntermediates, MVT &RegisterVT) const {
2460 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2461
2462 return NumRegs;
2463}
2464
2465// Changes the condition code and swaps operands if necessary, so the SetCC
2466// operation matches one of the comparisons supported directly by branches
2467// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2468// with 1/-1.
2470 ISD::CondCode &CC, SelectionDAG &DAG,
2471 const RISCVSubtarget &Subtarget) {
2472 // If this is a single bit test that can't be handled by ANDI, shift the
2473 // bit to be tested to the MSB and perform a signed compare with 0.
2474 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2475 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2476 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2477 // XAndesPerf supports branch on test bit.
2478 !Subtarget.hasVendorXAndesPerf()) {
2479 uint64_t Mask = LHS.getConstantOperandVal(1);
2480 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2481 unsigned ShAmt = 0;
2482 if (isPowerOf2_64(Mask)) {
2483 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2484 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2485 } else {
2486 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2487 }
2488
2489 LHS = LHS.getOperand(0);
2490 if (ShAmt != 0)
2491 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2492 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2493 return;
2494 }
2495 }
2496
2497 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2498 int64_t C = RHSC->getSExtValue();
2499 switch (CC) {
2500 default: break;
2501 case ISD::SETGT:
2502 // Convert X > -1 to X >= 0.
2503 if (C == -1) {
2504 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2505 CC = ISD::SETGE;
2506 return;
2507 }
2508 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2509 C != INT64_MAX && isInt<5>(C + 1)) {
2510 // We have a conditional move instruction for SETGE but not SETGT.
2511 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2512 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2513 CC = ISD::SETGE;
2514 return;
2515 }
2516 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2517 // We have a branch immediate instruction for SETGE but not SETGT.
2518 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2519 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2520 CC = ISD::SETGE;
2521 return;
2522 }
2523 break;
2524 case ISD::SETLT:
2525 // Convert X < 1 to 0 >= X.
2526 if (C == 1) {
2527 RHS = LHS;
2528 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2529 CC = ISD::SETGE;
2530 return;
2531 }
2532 break;
2533 case ISD::SETUGT:
2534 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2535 C != INT64_MAX && isUInt<5>(C + 1)) {
2536 // We have a conditional move instruction for SETUGE but not SETUGT.
2537 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2538 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2539 CC = ISD::SETUGE;
2540 return;
2541 }
2542 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2543 // We have a branch immediate instruction for SETUGE but not SETUGT.
2544 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2545 // immediate.
2546 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2547 CC = ISD::SETUGE;
2548 return;
2549 }
2550 break;
2551 }
2552 }
2553
2554 switch (CC) {
2555 default:
2556 break;
2557 case ISD::SETGT:
2558 case ISD::SETLE:
2559 case ISD::SETUGT:
2560 case ISD::SETULE:
2562 std::swap(LHS, RHS);
2563 break;
2564 }
2565}
2566
2568 if (VT.isRISCVVectorTuple()) {
2569 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2570 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2571 return RISCVVType::LMUL_F8;
2572 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2573 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2574 return RISCVVType::LMUL_F4;
2575 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2576 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2577 return RISCVVType::LMUL_F2;
2578 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2579 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2580 return RISCVVType::LMUL_1;
2581 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2582 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2583 return RISCVVType::LMUL_2;
2584 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2585 return RISCVVType::LMUL_4;
2586 llvm_unreachable("Invalid vector tuple type LMUL.");
2587 }
2588
2589 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2590 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2591 if (VT.getVectorElementType() == MVT::i1)
2592 KnownSize *= 8;
2593
2594 switch (KnownSize) {
2595 default:
2596 llvm_unreachable("Invalid LMUL.");
2597 case 8:
2598 return RISCVVType::LMUL_F8;
2599 case 16:
2600 return RISCVVType::LMUL_F4;
2601 case 32:
2602 return RISCVVType::LMUL_F2;
2603 case 64:
2604 return RISCVVType::LMUL_1;
2605 case 128:
2606 return RISCVVType::LMUL_2;
2607 case 256:
2608 return RISCVVType::LMUL_4;
2609 case 512:
2610 return RISCVVType::LMUL_8;
2611 }
2612}
2613
2615 switch (LMul) {
2616 default:
2617 llvm_unreachable("Invalid LMUL.");
2621 case RISCVVType::LMUL_1:
2622 return RISCV::VRRegClassID;
2623 case RISCVVType::LMUL_2:
2624 return RISCV::VRM2RegClassID;
2625 case RISCVVType::LMUL_4:
2626 return RISCV::VRM4RegClassID;
2627 case RISCVVType::LMUL_8:
2628 return RISCV::VRM8RegClassID;
2629 }
2630}
2631
2632unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2633 RISCVVType::VLMUL LMUL = getLMUL(VT);
2634 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2635 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2636 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2637 "Unexpected subreg numbering");
2638 return RISCV::sub_vrm1_0 + Index;
2639 }
2640 if (LMUL == RISCVVType::LMUL_2) {
2641 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2642 "Unexpected subreg numbering");
2643 return RISCV::sub_vrm2_0 + Index;
2644 }
2645 if (LMUL == RISCVVType::LMUL_4) {
2646 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2647 "Unexpected subreg numbering");
2648 return RISCV::sub_vrm4_0 + Index;
2649 }
2650 llvm_unreachable("Invalid vector type.");
2651}
2652
2654 if (VT.isRISCVVectorTuple()) {
2655 unsigned NF = VT.getRISCVVectorTupleNumFields();
2656 unsigned RegsPerField =
2657 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2658 (NF * RISCV::RVVBitsPerBlock));
2659 switch (RegsPerField) {
2660 case 1:
2661 if (NF == 2)
2662 return RISCV::VRN2M1RegClassID;
2663 if (NF == 3)
2664 return RISCV::VRN3M1RegClassID;
2665 if (NF == 4)
2666 return RISCV::VRN4M1RegClassID;
2667 if (NF == 5)
2668 return RISCV::VRN5M1RegClassID;
2669 if (NF == 6)
2670 return RISCV::VRN6M1RegClassID;
2671 if (NF == 7)
2672 return RISCV::VRN7M1RegClassID;
2673 if (NF == 8)
2674 return RISCV::VRN8M1RegClassID;
2675 break;
2676 case 2:
2677 if (NF == 2)
2678 return RISCV::VRN2M2RegClassID;
2679 if (NF == 3)
2680 return RISCV::VRN3M2RegClassID;
2681 if (NF == 4)
2682 return RISCV::VRN4M2RegClassID;
2683 break;
2684 case 4:
2685 assert(NF == 2);
2686 return RISCV::VRN2M4RegClassID;
2687 default:
2688 break;
2689 }
2690 llvm_unreachable("Invalid vector tuple type RegClass.");
2691 }
2692
2693 if (VT.getVectorElementType() == MVT::i1)
2694 return RISCV::VRRegClassID;
2695 return getRegClassIDForLMUL(getLMUL(VT));
2696}
2697
2698// Attempt to decompose a subvector insert/extract between VecVT and
2699// SubVecVT via subregister indices. Returns the subregister index that
2700// can perform the subvector insert/extract with the given element index, as
2701// well as the index corresponding to any leftover subvectors that must be
2702// further inserted/extracted within the register class for SubVecVT.
2703std::pair<unsigned, unsigned>
2705 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2706 const RISCVRegisterInfo *TRI) {
2707 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2708 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2709 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2710 "Register classes not ordered");
2711 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2712 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2713
2714 // If VecVT is a vector tuple type, either it's the tuple type with same
2715 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2716 if (VecVT.isRISCVVectorTuple()) {
2717 if (VecRegClassID == SubRegClassID)
2718 return {RISCV::NoSubRegister, 0};
2719
2720 assert(SubVecVT.isScalableVector() &&
2721 "Only allow scalable vector subvector.");
2722 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2723 "Invalid vector tuple insert/extract for vector and subvector with "
2724 "different LMUL.");
2725 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2726 }
2727
2728 // Try to compose a subregister index that takes us from the incoming
2729 // LMUL>1 register class down to the outgoing one. At each step we half
2730 // the LMUL:
2731 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2732 // Note that this is not guaranteed to find a subregister index, such as
2733 // when we are extracting from one VR type to another.
2734 unsigned SubRegIdx = RISCV::NoSubRegister;
2735 for (const unsigned RCID :
2736 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2737 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2738 VecVT = VecVT.getHalfNumVectorElementsVT();
2739 bool IsHi =
2740 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2741 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2742 getSubregIndexByMVT(VecVT, IsHi));
2743 if (IsHi)
2744 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2745 }
2746 return {SubRegIdx, InsertExtractIdx};
2747}
2748
2749// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2750// stores for those types.
2751bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2752 return !Subtarget.useRVVForFixedLengthVectors() ||
2753 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2754}
2755
2757 if (!ScalarTy.isSimple())
2758 return false;
2759 switch (ScalarTy.getSimpleVT().SimpleTy) {
2760 case MVT::iPTR:
2761 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2762 case MVT::i8:
2763 case MVT::i16:
2764 case MVT::i32:
2765 return Subtarget.hasVInstructions();
2766 case MVT::i64:
2767 return Subtarget.hasVInstructionsI64();
2768 case MVT::f16:
2769 return Subtarget.hasVInstructionsF16Minimal();
2770 case MVT::bf16:
2771 return Subtarget.hasVInstructionsBF16Minimal();
2772 case MVT::f32:
2773 return Subtarget.hasVInstructionsF32();
2774 case MVT::f64:
2775 return Subtarget.hasVInstructionsF64();
2776 default:
2777 return false;
2778 }
2779}
2780
2781
2783 return NumRepeatedDivisors;
2784}
2785
2787 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2788 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2789 "Unexpected opcode");
2790 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2791 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2793 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2794 if (!II)
2795 return SDValue();
2796 return Op.getOperand(II->VLOperand + 1 + HasChain);
2797}
2798
2800 const RISCVSubtarget &Subtarget) {
2801 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2802 if (!Subtarget.useRVVForFixedLengthVectors())
2803 return false;
2804
2805 // We only support a set of vector types with a consistent maximum fixed size
2806 // across all supported vector element types to avoid legalization issues.
2807 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2808 // fixed-length vector type we support is 1024 bytes.
2809 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2810 return false;
2811
2812 unsigned MinVLen = Subtarget.getRealMinVLen();
2813
2814 MVT EltVT = VT.getVectorElementType();
2815
2816 // Don't use RVV for vectors we cannot scalarize if required.
2817 switch (EltVT.SimpleTy) {
2818 // i1 is supported but has different rules.
2819 default:
2820 return false;
2821 case MVT::i1:
2822 // Masks can only use a single register.
2823 if (VT.getVectorNumElements() > MinVLen)
2824 return false;
2825 MinVLen /= 8;
2826 break;
2827 case MVT::i8:
2828 case MVT::i16:
2829 case MVT::i32:
2830 break;
2831 case MVT::i64:
2832 if (!Subtarget.hasVInstructionsI64())
2833 return false;
2834 break;
2835 case MVT::f16:
2836 if (!Subtarget.hasVInstructionsF16Minimal())
2837 return false;
2838 break;
2839 case MVT::bf16:
2840 if (!Subtarget.hasVInstructionsBF16Minimal())
2841 return false;
2842 break;
2843 case MVT::f32:
2844 if (!Subtarget.hasVInstructionsF32())
2845 return false;
2846 break;
2847 case MVT::f64:
2848 if (!Subtarget.hasVInstructionsF64())
2849 return false;
2850 break;
2851 }
2852
2853 // Reject elements larger than ELEN.
2854 if (EltVT.getSizeInBits() > Subtarget.getELen())
2855 return false;
2856
2857 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2858 // Don't use RVV for types that don't fit.
2859 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2860 return false;
2861
2862 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2863 // the base fixed length RVV support in place.
2864 if (!VT.isPow2VectorType())
2865 return false;
2866
2867 return true;
2868}
2869
2870bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2871 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2872}
2873
2874// Return the largest legal scalable vector type that matches VT's element type.
2876 const RISCVSubtarget &Subtarget) {
2877 // This may be called before legal types are setup.
2878 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2879 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2880 "Expected legal fixed length vector!");
2881
2882 unsigned MinVLen = Subtarget.getRealMinVLen();
2883 unsigned MaxELen = Subtarget.getELen();
2884
2885 MVT EltVT = VT.getVectorElementType();
2886 switch (EltVT.SimpleTy) {
2887 default:
2888 llvm_unreachable("unexpected element type for RVV container");
2889 case MVT::i1:
2890 case MVT::i8:
2891 case MVT::i16:
2892 case MVT::i32:
2893 case MVT::i64:
2894 case MVT::bf16:
2895 case MVT::f16:
2896 case MVT::f32:
2897 case MVT::f64: {
2898 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2899 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2900 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2901 unsigned NumElts =
2903 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2904 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2905 return MVT::getScalableVectorVT(EltVT, NumElts);
2906 }
2907 }
2908}
2909
2911 const RISCVSubtarget &Subtarget) {
2913 Subtarget);
2914}
2915
2917 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2918}
2919
2920// Grow V to consume an entire RVV register.
2922 const RISCVSubtarget &Subtarget) {
2923 assert(VT.isScalableVector() &&
2924 "Expected to convert into a scalable vector!");
2925 assert(V.getValueType().isFixedLengthVector() &&
2926 "Expected a fixed length vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2929}
2930
2931// Shrink V so it's just big enough to maintain a VT's worth of data.
2933 const RISCVSubtarget &Subtarget) {
2935 "Expected to convert into a fixed length vector!");
2936 assert(V.getValueType().isScalableVector() &&
2937 "Expected a scalable vector operand!");
2938 SDLoc DL(V);
2939 return DAG.getExtractSubvector(DL, VT, V, 0);
2940}
2941
2942/// Return the type of the mask type suitable for masking the provided
2943/// vector type. This is simply an i1 element type vector of the same
2944/// (possibly scalable) length.
2945static MVT getMaskTypeFor(MVT VecVT) {
2946 assert(VecVT.isVector());
2948 return MVT::getVectorVT(MVT::i1, EC);
2949}
2950
2951/// Creates an all ones mask suitable for masking a vector of type VecTy with
2952/// vector length VL. .
2953static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2954 SelectionDAG &DAG) {
2955 MVT MaskVT = getMaskTypeFor(VecVT);
2956 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2957}
2958
2959static std::pair<SDValue, SDValue>
2961 const RISCVSubtarget &Subtarget) {
2962 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2963 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2964 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2965 return {Mask, VL};
2966}
2967
2968static std::pair<SDValue, SDValue>
2969getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2970 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2971 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2972 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2973 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2974 return {Mask, VL};
2975}
2976
2977// Gets the two common "VL" operands: an all-ones mask and the vector length.
2978// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2979// the vector type that the fixed-length vector is contained in. Otherwise if
2980// VecVT is scalable, then ContainerVT should be the same as VecVT.
2981static std::pair<SDValue, SDValue>
2982getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2983 const RISCVSubtarget &Subtarget) {
2984 if (VecVT.isFixedLengthVector())
2985 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2986 Subtarget);
2987 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2988 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2989}
2990
2992 SelectionDAG &DAG) const {
2993 assert(VecVT.isScalableVector() && "Expected scalable vector");
2994 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2995 VecVT.getVectorElementCount());
2996}
2997
2998std::pair<unsigned, unsigned>
3000 const RISCVSubtarget &Subtarget) {
3001 assert(VecVT.isScalableVector() && "Expected scalable vector");
3002
3003 unsigned EltSize = VecVT.getScalarSizeInBits();
3004 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3005
3006 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3007 unsigned MaxVLMAX =
3008 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3009
3010 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3011 unsigned MinVLMAX =
3012 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3013
3014 return std::make_pair(MinVLMAX, MaxVLMAX);
3015}
3016
3017// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3018// of either is (currently) supported. This can get us into an infinite loop
3019// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3020// as a ..., etc.
3021// Until either (or both) of these can reliably lower any node, reporting that
3022// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3023// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3024// which is not desirable.
3026 EVT VT, unsigned DefinedValues) const {
3027 return false;
3028}
3029
3031 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3032 // implementation-defined.
3033 if (!VT.isVector())
3035 unsigned DLenFactor = Subtarget.getDLenFactor();
3036 unsigned Cost;
3037 if (VT.isScalableVector()) {
3038 unsigned LMul;
3039 bool Fractional;
3040 std::tie(LMul, Fractional) =
3042 if (Fractional)
3043 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3044 else
3045 Cost = (LMul * DLenFactor);
3046 } else {
3047 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3048 }
3049 return Cost;
3050}
3051
3052
3053/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3054/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3055/// be by default. VRGatherCostModel reflects available options. Note that
3056/// operand (index and possibly mask) are handled separately.
3058 auto LMULCost = getLMULCost(VT);
3059 bool Log2CostModel =
3060 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3061 if (Log2CostModel && LMULCost.isValid()) {
3062 unsigned Log = Log2_64(LMULCost.getValue());
3063 if (Log > 0)
3064 return LMULCost * Log;
3065 }
3066 return LMULCost * LMULCost;
3067}
3068
3069/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3070/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3071/// or may track the vrgather.vv cost. It is implementation-dependent.
3075
3076/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3077/// for the type VT. (This does not cover the vslide1up or vslide1down
3078/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3079/// or may track the vrgather.vv cost. It is implementation-dependent.
3083
3084/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3085/// for the type VT. (This does not cover the vslide1up or vslide1down
3086/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3087/// or may track the vrgather.vv cost. It is implementation-dependent.
3091
3093 const RISCVSubtarget &Subtarget) {
3094 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3095 // bf16 conversions are always promoted to f32.
3096 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3097 Op.getValueType() == MVT::bf16) {
3098 bool IsStrict = Op->isStrictFPOpcode();
3099
3100 SDLoc DL(Op);
3101 if (IsStrict) {
3102 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3103 {Op.getOperand(0), Op.getOperand(1)});
3104 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3105 {Op.getValueType(), MVT::Other},
3106 {Val.getValue(1), Val.getValue(0),
3107 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3108 }
3109 return DAG.getNode(
3110 ISD::FP_ROUND, DL, Op.getValueType(),
3111 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3112 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3113 }
3114
3115 // Other operations are legal.
3116 return Op;
3117}
3118
3120 const RISCVSubtarget &Subtarget) {
3121 // RISC-V FP-to-int conversions saturate to the destination register size, but
3122 // don't produce 0 for nan. We can use a conversion instruction and fix the
3123 // nan case with a compare and a select.
3124 SDValue Src = Op.getOperand(0);
3125
3126 MVT DstVT = Op.getSimpleValueType();
3127 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3128
3129 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3130
3131 if (!DstVT.isVector()) {
3132 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3133 // the result.
3134 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3135 Src.getValueType() == MVT::bf16) {
3136 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3137 }
3138
3139 unsigned Opc;
3140 if (SatVT == DstVT)
3141 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3142 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3143 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3144 else
3145 return SDValue();
3146 // FIXME: Support other SatVTs by clamping before or after the conversion.
3147
3148 SDLoc DL(Op);
3149 SDValue FpToInt = DAG.getNode(
3150 Opc, DL, DstVT, Src,
3152
3153 if (Opc == RISCVISD::FCVT_WU_RV64)
3154 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3155
3156 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3157 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3159 }
3160
3161 // Vectors.
3162
3163 MVT DstEltVT = DstVT.getVectorElementType();
3164 MVT SrcVT = Src.getSimpleValueType();
3165 MVT SrcEltVT = SrcVT.getVectorElementType();
3166 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3167 unsigned DstEltSize = DstEltVT.getSizeInBits();
3168
3169 // Only handle saturating to the destination type.
3170 if (SatVT != DstEltVT)
3171 return SDValue();
3172
3173 MVT DstContainerVT = DstVT;
3174 MVT SrcContainerVT = SrcVT;
3175 if (DstVT.isFixedLengthVector()) {
3176 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3177 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3178 assert(DstContainerVT.getVectorElementCount() ==
3179 SrcContainerVT.getVectorElementCount() &&
3180 "Expected same element count");
3181 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3182 }
3183
3184 SDLoc DL(Op);
3185
3186 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3187
3188 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3189 {Src, Src, DAG.getCondCode(ISD::SETNE),
3190 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3191
3192 // Need to widen by more than 1 step, promote the FP type, then do a widening
3193 // convert.
3194 if (DstEltSize > (2 * SrcEltSize)) {
3195 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3196 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3197 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3198 }
3199
3200 MVT CvtContainerVT = DstContainerVT;
3201 MVT CvtEltVT = DstEltVT;
3202 if (SrcEltSize > (2 * DstEltSize)) {
3203 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3204 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3205 }
3206
3207 unsigned RVVOpc =
3208 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3209 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3210
3211 while (CvtContainerVT != DstContainerVT) {
3212 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3213 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3214 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3215 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3216 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3217 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3218 }
3219
3220 SDValue SplatZero = DAG.getNode(
3221 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3222 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3223 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3224 Res, DAG.getUNDEF(DstContainerVT), VL);
3225
3226 if (DstVT.isFixedLengthVector())
3227 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3228
3229 return Res;
3230}
3231
3233 const RISCVSubtarget &Subtarget) {
3234 bool IsStrict = Op->isStrictFPOpcode();
3235 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3236
3237 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3238 // bf16 conversions are always promoted to f32.
3239 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3240 SrcVal.getValueType() == MVT::bf16) {
3241 SDLoc DL(Op);
3242 if (IsStrict) {
3243 SDValue Ext =
3244 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3245 {Op.getOperand(0), SrcVal});
3246 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3247 {Ext.getValue(1), Ext.getValue(0)});
3248 }
3249 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3250 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3251 }
3252
3253 // Other operations are legal.
3254 return Op;
3255}
3256
3258 switch (Opc) {
3259 case ISD::FROUNDEVEN:
3261 case ISD::VP_FROUNDEVEN:
3262 return RISCVFPRndMode::RNE;
3263 case ISD::FTRUNC:
3264 case ISD::STRICT_FTRUNC:
3265 case ISD::VP_FROUNDTOZERO:
3266 return RISCVFPRndMode::RTZ;
3267 case ISD::FFLOOR:
3268 case ISD::STRICT_FFLOOR:
3269 case ISD::VP_FFLOOR:
3270 return RISCVFPRndMode::RDN;
3271 case ISD::FCEIL:
3272 case ISD::STRICT_FCEIL:
3273 case ISD::VP_FCEIL:
3274 return RISCVFPRndMode::RUP;
3275 case ISD::FROUND:
3276 case ISD::LROUND:
3277 case ISD::LLROUND:
3278 case ISD::STRICT_FROUND:
3279 case ISD::STRICT_LROUND:
3281 case ISD::VP_FROUND:
3282 return RISCVFPRndMode::RMM;
3283 case ISD::FRINT:
3284 case ISD::LRINT:
3285 case ISD::LLRINT:
3286 case ISD::STRICT_FRINT:
3287 case ISD::STRICT_LRINT:
3288 case ISD::STRICT_LLRINT:
3289 case ISD::VP_FRINT:
3290 case ISD::VP_LRINT:
3291 case ISD::VP_LLRINT:
3292 return RISCVFPRndMode::DYN;
3293 }
3294
3296}
3297
3298// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3299// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3300// the integer domain and back. Taking care to avoid converting values that are
3301// nan or already correct.
3302static SDValue
3304 const RISCVSubtarget &Subtarget) {
3305 MVT VT = Op.getSimpleValueType();
3306 assert(VT.isVector() && "Unexpected type");
3307
3308 SDLoc DL(Op);
3309
3310 SDValue Src = Op.getOperand(0);
3311
3312 // Freeze the source since we are increasing the number of uses.
3313 Src = DAG.getFreeze(Src);
3314
3315 MVT ContainerVT = VT;
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 SDValue Mask, VL;
3322 if (Op->isVPOpcode()) {
3323 Mask = Op.getOperand(1);
3324 if (VT.isFixedLengthVector())
3325 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3326 Subtarget);
3327 VL = Op.getOperand(2);
3328 } else {
3329 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3330 }
3331
3332 // We do the conversion on the absolute value and fix the sign at the end.
3333 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3334
3335 // Determine the largest integer that can be represented exactly. This and
3336 // values larger than it don't have any fractional bits so don't need to
3337 // be converted.
3338 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3339 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3340 APFloat MaxVal = APFloat(FltSem);
3341 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3342 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3343 SDValue MaxValNode =
3344 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3345 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3346 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3347
3348 // If abs(Src) was larger than MaxVal or nan, keep it.
3349 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3350 Mask =
3351 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3352 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3353 Mask, Mask, VL});
3354
3355 // Truncate to integer and convert back to FP.
3356 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3357 MVT XLenVT = Subtarget.getXLenVT();
3358 SDValue Truncated;
3359
3360 switch (Op.getOpcode()) {
3361 default:
3362 llvm_unreachable("Unexpected opcode");
3363 case ISD::FRINT:
3364 case ISD::VP_FRINT:
3365 case ISD::FCEIL:
3366 case ISD::VP_FCEIL:
3367 case ISD::FFLOOR:
3368 case ISD::VP_FFLOOR:
3369 case ISD::FROUND:
3370 case ISD::FROUNDEVEN:
3371 case ISD::VP_FROUND:
3372 case ISD::VP_FROUNDEVEN:
3373 case ISD::VP_FROUNDTOZERO: {
3376 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3377 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3378 break;
3379 }
3380 case ISD::FTRUNC:
3381 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3382 Mask, VL);
3383 break;
3384 case ISD::FNEARBYINT:
3385 case ISD::VP_FNEARBYINT:
3386 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3387 Mask, VL);
3388 break;
3389 }
3390
3391 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3392 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3393 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3394 Mask, VL);
3395
3396 // Restore the original sign so that -0.0 is preserved.
3397 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3398 Src, Src, Mask, VL);
3399
3400 if (!VT.isFixedLengthVector())
3401 return Truncated;
3402
3403 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3404}
3405
3406// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3407// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3408// qNan and converting the new source to integer and back to FP.
3409static SDValue
3411 const RISCVSubtarget &Subtarget) {
3412 SDLoc DL(Op);
3413 MVT VT = Op.getSimpleValueType();
3414 SDValue Chain = Op.getOperand(0);
3415 SDValue Src = Op.getOperand(1);
3416
3417 MVT ContainerVT = VT;
3418 if (VT.isFixedLengthVector()) {
3419 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3420 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3421 }
3422
3423 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3424
3425 // Freeze the source since we are increasing the number of uses.
3426 Src = DAG.getFreeze(Src);
3427
3428 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3429 MVT MaskVT = Mask.getSimpleValueType();
3430 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3431 DAG.getVTList(MaskVT, MVT::Other),
3432 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3433 DAG.getUNDEF(MaskVT), Mask, VL});
3434 Chain = Unorder.getValue(1);
3435 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3436 DAG.getVTList(ContainerVT, MVT::Other),
3437 {Chain, Src, Src, Src, Unorder, VL});
3438 Chain = Src.getValue(1);
3439
3440 // We do the conversion on the absolute value and fix the sign at the end.
3441 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3442
3443 // Determine the largest integer that can be represented exactly. This and
3444 // values larger than it don't have any fractional bits so don't need to
3445 // be converted.
3446 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3447 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3448 APFloat MaxVal = APFloat(FltSem);
3449 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3450 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3451 SDValue MaxValNode =
3452 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3453 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3454 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3455
3456 // If abs(Src) was larger than MaxVal or nan, keep it.
3457 Mask = DAG.getNode(
3458 RISCVISD::SETCC_VL, DL, MaskVT,
3459 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3460
3461 // Truncate to integer and convert back to FP.
3462 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3463 MVT XLenVT = Subtarget.getXLenVT();
3464 SDValue Truncated;
3465
3466 switch (Op.getOpcode()) {
3467 default:
3468 llvm_unreachable("Unexpected opcode");
3469 case ISD::STRICT_FCEIL:
3470 case ISD::STRICT_FFLOOR:
3471 case ISD::STRICT_FROUND:
3475 Truncated = DAG.getNode(
3476 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3477 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3478 break;
3479 }
3480 case ISD::STRICT_FTRUNC:
3481 Truncated =
3482 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3483 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3484 break;
3486 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3487 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3488 Mask, VL);
3489 break;
3490 }
3491 Chain = Truncated.getValue(1);
3492
3493 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3494 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3495 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3496 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3497 Truncated, Mask, VL);
3498 Chain = Truncated.getValue(1);
3499 }
3500
3501 // Restore the original sign so that -0.0 is preserved.
3502 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3503 Src, Src, Mask, VL);
3504
3505 if (VT.isFixedLengthVector())
3506 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3507 return DAG.getMergeValues({Truncated, Chain}, DL);
3508}
3509
3510static SDValue
3512 const RISCVSubtarget &Subtarget) {
3513 MVT VT = Op.getSimpleValueType();
3514 if (VT.isVector())
3515 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3516
3517 if (DAG.shouldOptForSize())
3518 return SDValue();
3519
3520 SDLoc DL(Op);
3521 SDValue Src = Op.getOperand(0);
3522
3523 // Create an integer the size of the mantissa with the MSB set. This and all
3524 // values larger than it don't have any fractional bits so don't need to be
3525 // converted.
3526 const fltSemantics &FltSem = VT.getFltSemantics();
3527 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3528 APFloat MaxVal = APFloat(FltSem);
3529 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3530 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3531 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3532
3534 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3535 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3536}
3537
3538// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3540 const RISCVSubtarget &Subtarget) {
3541 SDLoc DL(Op);
3542 MVT DstVT = Op.getSimpleValueType();
3543 SDValue Src = Op.getOperand(0);
3544 MVT SrcVT = Src.getSimpleValueType();
3545 assert(SrcVT.isVector() && DstVT.isVector() &&
3546 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3547 "Unexpected type");
3548
3549 MVT DstContainerVT = DstVT;
3550 MVT SrcContainerVT = SrcVT;
3551
3552 if (DstVT.isFixedLengthVector()) {
3553 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3554 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3555 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3556 }
3557
3558 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3559
3560 // [b]f16 -> f32
3561 MVT SrcElemType = SrcVT.getVectorElementType();
3562 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3563 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3564 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3565 }
3566
3567 SDValue Res =
3568 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3569 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3570 Subtarget.getXLenVT()),
3571 VL);
3572
3573 if (!DstVT.isFixedLengthVector())
3574 return Res;
3575
3576 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3577}
3578
3579static SDValue
3581 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3582 SDValue Offset, SDValue Mask, SDValue VL,
3584 if (Passthru.isUndef())
3586 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3587 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3588 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3589}
3590
3591static SDValue
3592getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3593 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3594 SDValue VL,
3596 if (Passthru.isUndef())
3598 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3599 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3600 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3601}
3602
3606 int64_t Addend;
3607};
3608
3609static std::optional<APInt> getExactInteger(const APFloat &APF,
3611 // We will use a SINT_TO_FP to materialize this constant so we should use a
3612 // signed APSInt here.
3613 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3614 // We use an arbitrary rounding mode here. If a floating-point is an exact
3615 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3616 // the rounding mode changes the output value, then it is not an exact
3617 // integer.
3619 bool IsExact;
3620 // If it is out of signed integer range, it will return an invalid operation.
3621 // If it is not an exact integer, IsExact is false.
3622 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3624 !IsExact)
3625 return std::nullopt;
3626 return ValInt.extractBits(BitWidth, 0);
3627}
3628
3629// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3630// to the (non-zero) step S and start value X. This can be then lowered as the
3631// RVV sequence (VID * S) + X, for example.
3632// The step S is represented as an integer numerator divided by a positive
3633// denominator. Note that the implementation currently only identifies
3634// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3635// cannot detect 2/3, for example.
3636// Note that this method will also match potentially unappealing index
3637// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3638// determine whether this is worth generating code for.
3639//
3640// EltSizeInBits is the size of the type that the sequence will be calculated
3641// in, i.e. SEW for build_vectors or XLEN for address calculations.
3642static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3643 unsigned EltSizeInBits) {
3644 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3646 return std::nullopt;
3647 bool IsInteger = Op.getValueType().isInteger();
3648
3649 std::optional<unsigned> SeqStepDenom;
3650 std::optional<APInt> SeqStepNum;
3651 std::optional<APInt> SeqAddend;
3652 std::optional<std::pair<APInt, unsigned>> PrevElt;
3653 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3654
3655 // First extract the ops into a list of constant integer values. This may not
3656 // be possible for floats if they're not all representable as integers.
3657 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3658 const unsigned OpSize = Op.getScalarValueSizeInBits();
3659 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3660 if (Elt.isUndef()) {
3661 Elts[Idx] = std::nullopt;
3662 continue;
3663 }
3664 if (IsInteger) {
3665 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3666 } else {
3667 auto ExactInteger =
3668 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3669 if (!ExactInteger)
3670 return std::nullopt;
3671 Elts[Idx] = *ExactInteger;
3672 }
3673 }
3674
3675 for (auto [Idx, Elt] : enumerate(Elts)) {
3676 // Assume undef elements match the sequence; we just have to be careful
3677 // when interpolating across them.
3678 if (!Elt)
3679 continue;
3680
3681 if (PrevElt) {
3682 // Calculate the step since the last non-undef element, and ensure
3683 // it's consistent across the entire sequence.
3684 unsigned IdxDiff = Idx - PrevElt->second;
3685 APInt ValDiff = *Elt - PrevElt->first;
3686
3687 // A zero-value value difference means that we're somewhere in the middle
3688 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3689 // step change before evaluating the sequence.
3690 if (ValDiff == 0)
3691 continue;
3692
3693 int64_t Remainder = ValDiff.srem(IdxDiff);
3694 // Normalize the step if it's greater than 1.
3695 if (Remainder != ValDiff.getSExtValue()) {
3696 // The difference must cleanly divide the element span.
3697 if (Remainder != 0)
3698 return std::nullopt;
3699 ValDiff = ValDiff.sdiv(IdxDiff);
3700 IdxDiff = 1;
3701 }
3702
3703 if (!SeqStepNum)
3704 SeqStepNum = ValDiff;
3705 else if (ValDiff != SeqStepNum)
3706 return std::nullopt;
3707
3708 if (!SeqStepDenom)
3709 SeqStepDenom = IdxDiff;
3710 else if (IdxDiff != *SeqStepDenom)
3711 return std::nullopt;
3712 }
3713
3714 // Record this non-undef element for later.
3715 if (!PrevElt || PrevElt->first != *Elt)
3716 PrevElt = std::make_pair(*Elt, Idx);
3717 }
3718
3719 // We need to have logged a step for this to count as a legal index sequence.
3720 if (!SeqStepNum || !SeqStepDenom)
3721 return std::nullopt;
3722
3723 // Loop back through the sequence and validate elements we might have skipped
3724 // while waiting for a valid step. While doing this, log any sequence addend.
3725 for (auto [Idx, Elt] : enumerate(Elts)) {
3726 if (!Elt)
3727 continue;
3728 APInt ExpectedVal =
3729 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3730 *SeqStepNum)
3731 .sdiv(*SeqStepDenom);
3732
3733 APInt Addend = *Elt - ExpectedVal;
3734 if (!SeqAddend)
3735 SeqAddend = Addend;
3736 else if (Addend != SeqAddend)
3737 return std::nullopt;
3738 }
3739
3740 assert(SeqAddend && "Must have an addend if we have a step");
3741
3742 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3743 SeqAddend->getSExtValue()};
3744}
3745
3746// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3747// and lower it as a VRGATHER_VX_VL from the source vector.
3748static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3749 SelectionDAG &DAG,
3750 const RISCVSubtarget &Subtarget) {
3751 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3752 return SDValue();
3753 SDValue Src = SplatVal.getOperand(0);
3754 // Don't perform this optimization for i1 vectors, or if the element types are
3755 // different
3756 // FIXME: Support i1 vectors, maybe by promoting to i8?
3757 MVT EltTy = VT.getVectorElementType();
3758 if (EltTy == MVT::i1 ||
3759 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3760 return SDValue();
3761 MVT SrcVT = Src.getSimpleValueType();
3762 if (EltTy != SrcVT.getVectorElementType())
3763 return SDValue();
3764 SDValue Idx = SplatVal.getOperand(1);
3765 // The index must be a legal type.
3766 if (Idx.getValueType() != Subtarget.getXLenVT())
3767 return SDValue();
3768
3769 // Check that we know Idx lies within VT
3770 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3771 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3772 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3773 return SDValue();
3774 }
3775
3776 // Convert fixed length vectors to scalable
3777 MVT ContainerVT = VT;
3778 if (VT.isFixedLengthVector())
3779 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3780
3781 MVT SrcContainerVT = SrcVT;
3782 if (SrcVT.isFixedLengthVector()) {
3783 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3784 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3785 }
3786
3787 // Put Vec in a VT sized vector
3788 if (SrcContainerVT.getVectorMinNumElements() <
3789 ContainerVT.getVectorMinNumElements())
3790 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3791 else
3792 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3793
3794 // We checked that Idx fits inside VT earlier
3795 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3796 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3797 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3798 if (VT.isFixedLengthVector())
3799 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3800 return Gather;
3801}
3802
3804 const RISCVSubtarget &Subtarget) {
3805 MVT VT = Op.getSimpleValueType();
3806 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3807
3808 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3809
3810 SDLoc DL(Op);
3811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3812
3813 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3814 int64_t StepNumerator = SimpleVID->StepNumerator;
3815 unsigned StepDenominator = SimpleVID->StepDenominator;
3816 int64_t Addend = SimpleVID->Addend;
3817
3818 assert(StepNumerator != 0 && "Invalid step");
3819 bool Negate = false;
3820 int64_t SplatStepVal = StepNumerator;
3821 unsigned StepOpcode = ISD::MUL;
3822 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3823 // anyway as the shift of 63 won't fit in uimm5.
3824 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3825 isPowerOf2_64(std::abs(StepNumerator))) {
3826 Negate = StepNumerator < 0;
3827 StepOpcode = ISD::SHL;
3828 SplatStepVal = Log2_64(std::abs(StepNumerator));
3829 }
3830
3831 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3832 // since it's the immediate value many RVV instructions accept. There is
3833 // no vmul.vi instruction so ensure multiply constant can fit in a
3834 // single addi instruction. For the addend, we allow up to 32 bits..
3835 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3836 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3837 isPowerOf2_32(StepDenominator) &&
3838 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3839 MVT VIDVT =
3841 MVT VIDContainerVT =
3842 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3843 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3844 // Convert right out of the scalable type so we can use standard ISD
3845 // nodes for the rest of the computation. If we used scalable types with
3846 // these, we'd lose the fixed-length vector info and generate worse
3847 // vsetvli code.
3848 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3849 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3850 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3851 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3852 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3853 }
3854 if (StepDenominator != 1) {
3855 SDValue SplatStep =
3856 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3857 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3858 }
3859 if (Addend != 0 || Negate) {
3860 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3861 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3862 VID);
3863 }
3864 if (VT.isFloatingPoint()) {
3865 // TODO: Use vfwcvt to reduce register pressure.
3866 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3867 }
3868 return VID;
3869 }
3870 }
3871
3872 return SDValue();
3873}
3874
3875/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3876/// which constitute a large proportion of the elements. In such cases we can
3877/// splat a vector with the dominant element and make up the shortfall with
3878/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3879/// Note that this includes vectors of 2 elements by association. The
3880/// upper-most element is the "dominant" one, allowing us to use a splat to
3881/// "insert" the upper element, and an insert of the lower element at position
3882/// 0, which improves codegen.
3884 const RISCVSubtarget &Subtarget) {
3885 MVT VT = Op.getSimpleValueType();
3886 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3887
3888 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3889
3890 SDLoc DL(Op);
3891 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3892
3893 MVT XLenVT = Subtarget.getXLenVT();
3894 unsigned NumElts = Op.getNumOperands();
3895
3896 SDValue DominantValue;
3897 unsigned MostCommonCount = 0;
3898 DenseMap<SDValue, unsigned> ValueCounts;
3899 unsigned NumUndefElts =
3900 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3901
3902 // Track the number of scalar loads we know we'd be inserting, estimated as
3903 // any non-zero floating-point constant. Other kinds of element are either
3904 // already in registers or are materialized on demand. The threshold at which
3905 // a vector load is more desirable than several scalar materializion and
3906 // vector-insertion instructions is not known.
3907 unsigned NumScalarLoads = 0;
3908
3909 for (SDValue V : Op->op_values()) {
3910 if (V.isUndef())
3911 continue;
3912
3913 unsigned &Count = ValueCounts[V];
3914 if (0 == Count)
3915 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3916 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3917
3918 // Is this value dominant? In case of a tie, prefer the highest element as
3919 // it's cheaper to insert near the beginning of a vector than it is at the
3920 // end.
3921 if (++Count >= MostCommonCount) {
3922 DominantValue = V;
3923 MostCommonCount = Count;
3924 }
3925 }
3926
3927 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3928 unsigned NumDefElts = NumElts - NumUndefElts;
3929 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3930
3931 // Don't perform this optimization when optimizing for size, since
3932 // materializing elements and inserting them tends to cause code bloat.
3933 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3934 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3935 ((MostCommonCount > DominantValueCountThreshold) ||
3936 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3937 // Start by splatting the most common element.
3938 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3939
3940 DenseSet<SDValue> Processed{DominantValue};
3941
3942 // We can handle an insert into the last element (of a splat) via
3943 // v(f)slide1down. This is slightly better than the vslideup insert
3944 // lowering as it avoids the need for a vector group temporary. It
3945 // is also better than using vmerge.vx as it avoids the need to
3946 // materialize the mask in a vector register.
3947 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3948 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3949 LastOp != DominantValue) {
3950 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3951 auto OpCode =
3952 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3953 if (!VT.isFloatingPoint())
3954 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3955 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3956 LastOp, Mask, VL);
3957 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3958 Processed.insert(LastOp);
3959 }
3960
3961 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3962 for (const auto &OpIdx : enumerate(Op->ops())) {
3963 const SDValue &V = OpIdx.value();
3964 if (V.isUndef() || !Processed.insert(V).second)
3965 continue;
3966 if (ValueCounts[V] == 1) {
3967 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3968 } else {
3969 // Blend in all instances of this value using a VSELECT, using a
3970 // mask where each bit signals whether that element is the one
3971 // we're after.
3973 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3974 return DAG.getConstant(V == V1, DL, XLenVT);
3975 });
3976 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3977 DAG.getBuildVector(SelMaskTy, DL, Ops),
3978 DAG.getSplatBuildVector(VT, DL, V), Vec);
3979 }
3980 }
3981
3982 return Vec;
3983 }
3984
3985 return SDValue();
3986}
3987
3989 const RISCVSubtarget &Subtarget) {
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992
3993 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3994
3995 SDLoc DL(Op);
3996 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3997
3998 MVT XLenVT = Subtarget.getXLenVT();
3999 unsigned NumElts = Op.getNumOperands();
4000
4001 if (VT.getVectorElementType() == MVT::i1) {
4002 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4003 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4004 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4005 }
4006
4007 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4008 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4009 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4010 }
4011
4012 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4013 // scalar integer chunks whose bit-width depends on the number of mask
4014 // bits and XLEN.
4015 // First, determine the most appropriate scalar integer type to use. This
4016 // is at most XLenVT, but may be shrunk to a smaller vector element type
4017 // according to the size of the final vector - use i8 chunks rather than
4018 // XLenVT if we're producing a v8i1. This results in more consistent
4019 // codegen across RV32 and RV64.
4020 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4021 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4022 // If we have to use more than one INSERT_VECTOR_ELT then this
4023 // optimization is likely to increase code size; avoid performing it in
4024 // such a case. We can use a load from a constant pool in this case.
4025 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4026 return SDValue();
4027 // Now we can create our integer vector type. Note that it may be larger
4028 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4029 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4030 MVT IntegerViaVecVT =
4031 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4032 IntegerViaVecElts);
4033
4034 uint64_t Bits = 0;
4035 unsigned BitPos = 0, IntegerEltIdx = 0;
4036 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4037
4038 for (unsigned I = 0; I < NumElts;) {
4039 SDValue V = Op.getOperand(I);
4040 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4041 Bits |= ((uint64_t)BitValue << BitPos);
4042 ++BitPos;
4043 ++I;
4044
4045 // Once we accumulate enough bits to fill our scalar type or process the
4046 // last element, insert into our vector and clear our accumulated data.
4047 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4048 if (NumViaIntegerBits <= 32)
4049 Bits = SignExtend64<32>(Bits);
4050 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4051 Elts[IntegerEltIdx] = Elt;
4052 Bits = 0;
4053 BitPos = 0;
4054 IntegerEltIdx++;
4055 }
4056 }
4057
4058 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4059
4060 if (NumElts < NumViaIntegerBits) {
4061 // If we're producing a smaller vector than our minimum legal integer
4062 // type, bitcast to the equivalent (known-legal) mask type, and extract
4063 // our final mask.
4064 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4065 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4066 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4067 } else {
4068 // Else we must have produced an integer type with the same size as the
4069 // mask type; bitcast for the final result.
4070 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4071 Vec = DAG.getBitcast(VT, Vec);
4072 }
4073
4074 return Vec;
4075 }
4076
4078 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4079 : RISCVISD::VMV_V_X_VL;
4080 if (!VT.isFloatingPoint())
4081 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4082 Splat =
4083 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4084 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4085 }
4086
4087 // Try and match index sequences, which we can lower to the vid instruction
4088 // with optional modifications. An all-undef vector is matched by
4089 // getSplatValue, above.
4090 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4091 return Res;
4092
4093 // For very small build_vectors, use a single scalar insert of a constant.
4094 // TODO: Base this on constant rematerialization cost, not size.
4095 const unsigned EltBitSize = VT.getScalarSizeInBits();
4096 if (VT.getSizeInBits() <= 32 &&
4098 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4099 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4100 "Unexpected sequence type");
4101 // If we can use the original VL with the modified element type, this
4102 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4103 // be moved into InsertVSETVLI?
4104 unsigned ViaVecLen =
4105 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4106 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4107
4108 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4109 uint64_t SplatValue = 0;
4110 // Construct the amalgamated value at this larger vector type.
4111 for (const auto &OpIdx : enumerate(Op->op_values())) {
4112 const auto &SeqV = OpIdx.value();
4113 if (!SeqV.isUndef())
4114 SplatValue |=
4115 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4116 }
4117
4118 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4119 // achieve better constant materializion.
4120 // On RV32, we need to sign-extend to use getSignedConstant.
4121 if (ViaIntVT == MVT::i32)
4122 SplatValue = SignExtend64<32>(SplatValue);
4123
4124 SDValue Vec = DAG.getInsertVectorElt(
4125 DL, DAG.getUNDEF(ViaVecVT),
4126 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4127 if (ViaVecLen != 1)
4128 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4129 return DAG.getBitcast(VT, Vec);
4130 }
4131
4132
4133 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4134 // when re-interpreted as a vector with a larger element type. For example,
4135 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4136 // could be instead splat as
4137 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4138 // TODO: This optimization could also work on non-constant splats, but it
4139 // would require bit-manipulation instructions to construct the splat value.
4140 SmallVector<SDValue> Sequence;
4141 const auto *BV = cast<BuildVectorSDNode>(Op);
4142 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4144 BV->getRepeatedSequence(Sequence) &&
4145 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4146 unsigned SeqLen = Sequence.size();
4147 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4148 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4149 ViaIntVT == MVT::i64) &&
4150 "Unexpected sequence type");
4151
4152 // If we can use the original VL with the modified element type, this
4153 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4154 // be moved into InsertVSETVLI?
4155 const unsigned RequiredVL = NumElts / SeqLen;
4156 const unsigned ViaVecLen =
4157 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4158 NumElts : RequiredVL;
4159 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4160
4161 unsigned EltIdx = 0;
4162 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4163 uint64_t SplatValue = 0;
4164 // Construct the amalgamated value which can be splatted as this larger
4165 // vector type.
4166 for (const auto &SeqV : Sequence) {
4167 if (!SeqV.isUndef())
4168 SplatValue |=
4169 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4170 EltIdx++;
4171 }
4172
4173 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4174 // achieve better constant materializion.
4175 // On RV32, we need to sign-extend to use getSignedConstant.
4176 if (ViaIntVT == MVT::i32)
4177 SplatValue = SignExtend64<32>(SplatValue);
4178
4179 // Since we can't introduce illegal i64 types at this stage, we can only
4180 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4181 // way we can use RVV instructions to splat.
4182 assert((ViaIntVT.bitsLE(XLenVT) ||
4183 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4184 "Unexpected bitcast sequence");
4185 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4186 SDValue ViaVL =
4187 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4188 MVT ViaContainerVT =
4189 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4190 SDValue Splat =
4191 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4192 DAG.getUNDEF(ViaContainerVT),
4193 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4194 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4195 if (ViaVecLen != RequiredVL)
4197 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4198 return DAG.getBitcast(VT, Splat);
4199 }
4200 }
4201
4202 // If the number of signbits allows, see if we can lower as a <N x i8>.
4203 // Our main goal here is to reduce LMUL (and thus work) required to
4204 // build the constant, but we will also narrow if the resulting
4205 // narrow vector is known to materialize cheaply.
4206 // TODO: We really should be costing the smaller vector. There are
4207 // profitable cases this misses.
4208 if (EltBitSize > 8 && VT.isInteger() &&
4209 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4210 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4211 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4212 DL, Op->ops());
4213 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4214 Source, DAG, Subtarget);
4215 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4216 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4217 }
4218
4219 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4220 return Res;
4221
4222 // For constant vectors, use generic constant pool lowering. Otherwise,
4223 // we'd have to materialize constants in GPRs just to move them into the
4224 // vector.
4225 return SDValue();
4226}
4227
4228static unsigned getPACKOpcode(unsigned DestBW,
4229 const RISCVSubtarget &Subtarget) {
4230 switch (DestBW) {
4231 default:
4232 llvm_unreachable("Unsupported pack size");
4233 case 16:
4234 return RISCV::PACKH;
4235 case 32:
4236 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4237 case 64:
4238 assert(Subtarget.is64Bit());
4239 return RISCV::PACK;
4240 }
4241}
4242
4243/// Double the element size of the build vector to reduce the number
4244/// of vslide1down in the build vector chain. In the worst case, this
4245/// trades three scalar operations for 1 vector operation. Scalar
4246/// operations are generally lower latency, and for out-of-order cores
4247/// we also benefit from additional parallelism.
4249 const RISCVSubtarget &Subtarget) {
4250 SDLoc DL(Op);
4251 MVT VT = Op.getSimpleValueType();
4252 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4253 MVT ElemVT = VT.getVectorElementType();
4254 if (!ElemVT.isInteger())
4255 return SDValue();
4256
4257 // TODO: Relax these architectural restrictions, possibly with costing
4258 // of the actual instructions required.
4259 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4260 return SDValue();
4261
4262 unsigned NumElts = VT.getVectorNumElements();
4263 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4264 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4265 NumElts % 2 != 0)
4266 return SDValue();
4267
4268 // Produce [B,A] packed into a type twice as wide. Note that all
4269 // scalars are XLenVT, possibly masked (see below).
4270 MVT XLenVT = Subtarget.getXLenVT();
4271 SDValue Mask = DAG.getConstant(
4272 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4273 auto pack = [&](SDValue A, SDValue B) {
4274 // Bias the scheduling of the inserted operations to near the
4275 // definition of the element - this tends to reduce register
4276 // pressure overall.
4277 SDLoc ElemDL(B);
4278 if (Subtarget.hasStdExtZbkb())
4279 // Note that we're relying on the high bits of the result being
4280 // don't care. For PACKW, the result is *sign* extended.
4281 return SDValue(
4282 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4283 ElemDL, XLenVT, A, B),
4284 0);
4285
4286 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4287 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4288 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4289 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4290 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4292 };
4293
4294 SmallVector<SDValue> NewOperands;
4295 NewOperands.reserve(NumElts / 2);
4296 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4297 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4298 assert(NumElts == NewOperands.size() * 2);
4299 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4300 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4301 return DAG.getNode(ISD::BITCAST, DL, VT,
4302 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4303}
4304
4306 const RISCVSubtarget &Subtarget) {
4307 MVT VT = Op.getSimpleValueType();
4308 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4309
4310 MVT EltVT = VT.getVectorElementType();
4311 MVT XLenVT = Subtarget.getXLenVT();
4312
4313 SDLoc DL(Op);
4314
4315 // Proper support for f16 requires Zvfh. bf16 always requires special
4316 // handling. We need to cast the scalar to integer and create an integer
4317 // build_vector.
4318 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4319 MVT IVT = VT.changeVectorElementType(MVT::i16);
4320 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4321 for (const auto &[I, U] : enumerate(Op->ops())) {
4322 SDValue Elem = U.get();
4323 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4324 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4325 // Called by LegalizeDAG, we need to use XLenVT operations since we
4326 // can't create illegal types.
4327 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4328 // Manually constant fold so the integer build_vector can be lowered
4329 // better. Waiting for DAGCombine will be too late.
4330 APInt V =
4331 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4332 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4333 } else {
4334 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4335 }
4336 } else {
4337 // Called by scalar type legalizer, we can use i16.
4338 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4339 }
4340 }
4341 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4342 return DAG.getBitcast(VT, Res);
4343 }
4344
4345 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4347 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4348
4349 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4350
4351 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4352
4353 if (VT.getVectorElementType() == MVT::i1) {
4354 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4355 // vector type, we have a legal equivalently-sized i8 type, so we can use
4356 // that.
4357 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4358 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4359
4360 SDValue WideVec;
4362 // For a splat, perform a scalar truncate before creating the wider
4363 // vector.
4364 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4365 DAG.getConstant(1, DL, Splat.getValueType()));
4366 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4367 } else {
4368 SmallVector<SDValue, 8> Ops(Op->op_values());
4369 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4370 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4371 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4372 }
4373
4374 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4375 }
4376
4378 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4379 return Gather;
4380
4381 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4382 // pressure at high LMUL.
4383 if (all_of(Op->ops().drop_front(),
4384 [](const SDUse &U) { return U.get().isUndef(); })) {
4385 unsigned Opc =
4386 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4387 if (!VT.isFloatingPoint())
4388 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4389 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4390 Splat, VL);
4391 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4392 }
4393
4394 unsigned Opc =
4395 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4396 if (!VT.isFloatingPoint())
4397 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4398 Splat =
4399 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4400 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4401 }
4402
4403 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4404 return Res;
4405
4406 // If we're compiling for an exact VLEN value, we can split our work per
4407 // register in the register group.
4408 if (const auto VLen = Subtarget.getRealVLen();
4409 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4410 MVT ElemVT = VT.getVectorElementType();
4411 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4412 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4413 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4414 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4415 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4416
4417 // The following semantically builds up a fixed length concat_vector
4418 // of the component build_vectors. We eagerly lower to scalable and
4419 // insert_subvector here to avoid DAG combining it back to a large
4420 // build_vector.
4421 SmallVector<SDValue> BuildVectorOps(Op->ops());
4422 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4423 SDValue Vec = DAG.getUNDEF(ContainerVT);
4424 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4425 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4426 SDValue SubBV =
4427 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4428 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4429 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4430 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4431 }
4432 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4433 }
4434
4435 // If we're about to resort to vslide1down (or stack usage), pack our
4436 // elements into the widest scalar type we can. This will force a VL/VTYPE
4437 // toggle, but reduces the critical path, the number of vslide1down ops
4438 // required, and possibly enables scalar folds of the values.
4439 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4440 return Res;
4441
4442 // For m1 vectors, if we have non-undef values in both halves of our vector,
4443 // split the vector into low and high halves, build them separately, then
4444 // use a vselect to combine them. For long vectors, this cuts the critical
4445 // path of the vslide1down sequence in half, and gives us an opportunity
4446 // to special case each half independently. Note that we don't change the
4447 // length of the sub-vectors here, so if both fallback to the generic
4448 // vslide1down path, we should be able to fold the vselect into the final
4449 // vslidedown (for the undef tail) for the first half w/ masking.
4450 unsigned NumElts = VT.getVectorNumElements();
4451 unsigned NumUndefElts =
4452 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4453 unsigned NumDefElts = NumElts - NumUndefElts;
4454 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4455 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4456 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4457 SmallVector<SDValue> MaskVals;
4458 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4459 SubVecAOps.reserve(NumElts);
4460 SubVecBOps.reserve(NumElts);
4461 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4462 SDValue Elem = U.get();
4463 if (Idx < NumElts / 2) {
4464 SubVecAOps.push_back(Elem);
4465 SubVecBOps.push_back(UndefElem);
4466 } else {
4467 SubVecAOps.push_back(UndefElem);
4468 SubVecBOps.push_back(Elem);
4469 }
4470 bool SelectMaskVal = (Idx < NumElts / 2);
4471 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4472 }
4473 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4474 MaskVals.size() == NumElts);
4475
4476 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4477 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4478 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4479 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4480 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4481 }
4482
4483 // Cap the cost at a value linear to the number of elements in the vector.
4484 // The default lowering is to use the stack. The vector store + scalar loads
4485 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4486 // being (at least) linear in LMUL. As a result, using the vslidedown
4487 // lowering for every element ends up being VL*LMUL..
4488 // TODO: Should we be directly costing the stack alternative? Doing so might
4489 // give us a more accurate upper bound.
4490 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4491
4492 // TODO: unify with TTI getSlideCost.
4493 InstructionCost PerSlideCost = 1;
4494 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4495 default: break;
4496 case RISCVVType::LMUL_2:
4497 PerSlideCost = 2;
4498 break;
4499 case RISCVVType::LMUL_4:
4500 PerSlideCost = 4;
4501 break;
4502 case RISCVVType::LMUL_8:
4503 PerSlideCost = 8;
4504 break;
4505 }
4506
4507 // TODO: Should we be using the build instseq then cost + evaluate scheme
4508 // we use for integer constants here?
4509 unsigned UndefCount = 0;
4510 for (const SDValue &V : Op->ops()) {
4511 if (V.isUndef()) {
4512 UndefCount++;
4513 continue;
4514 }
4515 if (UndefCount) {
4516 LinearBudget -= PerSlideCost;
4517 UndefCount = 0;
4518 }
4519 LinearBudget -= PerSlideCost;
4520 }
4521 if (UndefCount) {
4522 LinearBudget -= PerSlideCost;
4523 }
4524
4525 if (LinearBudget < 0)
4526 return SDValue();
4527
4528 assert((!VT.isFloatingPoint() ||
4529 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4530 "Illegal type which will result in reserved encoding");
4531
4532 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4533
4534 // General case: splat the first operand and slide other operands down one
4535 // by one to form a vector. Alternatively, if every operand is an
4536 // extraction from element 0 of a vector, we use that vector from the last
4537 // extraction as the start value and slide up instead of slide down. Such that
4538 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4539 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4540 // something we cannot do with vslide1down/vslidedown.
4541 // Of course, using vslide1up/vslideup might increase the register pressure,
4542 // and that's why we conservatively limit to cases where every operand is an
4543 // extraction from the first element.
4544 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4545 SDValue EVec;
4546 bool SlideUp = false;
4547 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4548 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4549 if (SlideUp)
4550 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4551 Mask, VL, Policy);
4552 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4553 Mask, VL, Policy);
4554 };
4555
4556 // The reason we don't use all_of here is because we're also capturing EVec
4557 // from the last non-undef operand. If the std::execution_policy of the
4558 // underlying std::all_of is anything but std::sequenced_policy we might
4559 // capture the wrong EVec.
4560 for (SDValue V : Operands) {
4561 using namespace SDPatternMatch;
4562 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4563 if (!SlideUp)
4564 break;
4565 }
4566
4567 if (SlideUp) {
4568 MVT EVecContainerVT = EVec.getSimpleValueType();
4569 // Make sure the original vector has scalable vector type.
4570 if (EVecContainerVT.isFixedLengthVector()) {
4571 EVecContainerVT =
4572 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4573 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4574 }
4575
4576 // Adapt EVec's type into ContainerVT.
4577 if (EVecContainerVT.getVectorMinNumElements() <
4578 ContainerVT.getVectorMinNumElements())
4579 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4580 else
4581 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4582
4583 // Reverse the elements as we're going to slide up from the last element.
4584 std::reverse(Operands.begin(), Operands.end());
4585 }
4586
4587 SDValue Vec;
4588 UndefCount = 0;
4589 for (SDValue V : Operands) {
4590 if (V.isUndef()) {
4591 UndefCount++;
4592 continue;
4593 }
4594
4595 // Start our sequence with either a TA splat or extract source in the
4596 // hopes that hardware is able to recognize there's no dependency on the
4597 // prior value of our temporary register.
4598 if (!Vec) {
4599 if (SlideUp) {
4600 Vec = EVec;
4601 } else {
4602 Vec = DAG.getSplatVector(VT, DL, V);
4603 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4604 }
4605
4606 UndefCount = 0;
4607 continue;
4608 }
4609
4610 if (UndefCount) {
4611 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4612 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4613 VL);
4614 UndefCount = 0;
4615 }
4616
4617 unsigned Opcode;
4618 if (VT.isFloatingPoint())
4619 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4620 else
4621 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4622
4623 if (!VT.isFloatingPoint())
4624 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4625 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4626 V, Mask, VL);
4627 }
4628 if (UndefCount) {
4629 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4630 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4631 VL);
4632 }
4633 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4634}
4635
4636static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4638 SelectionDAG &DAG) {
4639 if (!Passthru)
4640 Passthru = DAG.getUNDEF(VT);
4642 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4643 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4644 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4645 // node in order to try and match RVV vector/scalar instructions.
4646 if ((LoC >> 31) == HiC)
4647 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4648
4649 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4650 // VL. This can temporarily increase VL if VL less than VLMAX.
4651 if (LoC == HiC) {
4652 SDValue NewVL;
4653 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4654 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4655 else
4656 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4657 MVT InterVT =
4658 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4659 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4660 DAG.getUNDEF(InterVT), Lo, NewVL);
4661 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4662 }
4663 }
4664
4665 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4666 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4667 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4668 Hi.getConstantOperandVal(1) == 31)
4669 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4670
4671 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4672 // even if it might be sign extended.
4673 if (Hi.isUndef())
4674 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4675
4676 // Fall back to a stack store and stride x0 vector load.
4677 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4678 Hi, VL);
4679}
4680
4681// Called by type legalization to handle splat of i64 on RV32.
4682// FIXME: We can optimize this when the type has sign or zero bits in one
4683// of the halves.
4684static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4685 SDValue Scalar, SDValue VL,
4686 SelectionDAG &DAG) {
4687 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4688 SDValue Lo, Hi;
4689 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4690 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4691}
4692
4693// This function lowers a splat of a scalar operand Splat with the vector
4694// length VL. It ensures the final sequence is type legal, which is useful when
4695// lowering a splat after type legalization.
4696static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4697 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4698 const RISCVSubtarget &Subtarget) {
4699 bool HasPassthru = Passthru && !Passthru.isUndef();
4700 if (!HasPassthru && !Passthru)
4701 Passthru = DAG.getUNDEF(VT);
4702
4703 MVT EltVT = VT.getVectorElementType();
4704 MVT XLenVT = Subtarget.getXLenVT();
4705
4706 if (VT.isFloatingPoint()) {
4707 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4708 EltVT == MVT::bf16) {
4709 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4710 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4711 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4712 else
4713 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4714 MVT IVT = VT.changeVectorElementType(MVT::i16);
4715 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4716 SDValue Splat =
4717 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4718 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4719 }
4720 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4721 }
4722
4723 // Simplest case is that the operand needs to be promoted to XLenVT.
4724 if (Scalar.getValueType().bitsLE(XLenVT)) {
4725 // If the operand is a constant, sign extend to increase our chances
4726 // of being able to use a .vi instruction. ANY_EXTEND would become a
4727 // a zero extend and the simm5 check in isel would fail.
4728 // FIXME: Should we ignore the upper bits in isel instead?
4729 unsigned ExtOpc =
4731 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4732 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4733 }
4734
4735 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4736 "Unexpected scalar for splat lowering!");
4737
4738 if (isOneConstant(VL) && isNullConstant(Scalar))
4739 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4740 DAG.getConstant(0, DL, XLenVT), VL);
4741
4742 // Otherwise use the more complicated splatting algorithm.
4743 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4744}
4745
4746// This function lowers an insert of a scalar operand Scalar into lane
4747// 0 of the vector regardless of the value of VL. The contents of the
4748// remaining lanes of the result vector are unspecified. VL is assumed
4749// to be non-zero.
4751 const SDLoc &DL, SelectionDAG &DAG,
4752 const RISCVSubtarget &Subtarget) {
4753 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4754
4755 const MVT XLenVT = Subtarget.getXLenVT();
4756 SDValue Passthru = DAG.getUNDEF(VT);
4757
4758 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4759 isNullConstant(Scalar.getOperand(1))) {
4760 SDValue ExtractedVal = Scalar.getOperand(0);
4761 // The element types must be the same.
4762 if (ExtractedVal.getValueType().getVectorElementType() ==
4763 VT.getVectorElementType()) {
4764 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4765 MVT ExtractedContainerVT = ExtractedVT;
4766 if (ExtractedContainerVT.isFixedLengthVector()) {
4767 ExtractedContainerVT = getContainerForFixedLengthVector(
4768 DAG, ExtractedContainerVT, Subtarget);
4769 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4770 ExtractedVal, DAG, Subtarget);
4771 }
4772 if (ExtractedContainerVT.bitsLE(VT))
4773 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4774 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4775 }
4776 }
4777
4778 if (VT.isFloatingPoint())
4779 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4780 VL);
4781
4782 // Avoid the tricky legalization cases by falling back to using the
4783 // splat code which already handles it gracefully.
4784 if (!Scalar.getValueType().bitsLE(XLenVT))
4785 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4786 DAG.getConstant(1, DL, XLenVT),
4787 VT, DL, DAG, Subtarget);
4788
4789 // If the operand is a constant, sign extend to increase our chances
4790 // of being able to use a .vi instruction. ANY_EXTEND would become a
4791 // a zero extend and the simm5 check in isel would fail.
4792 // FIXME: Should we ignore the upper bits in isel instead?
4793 unsigned ExtOpc =
4795 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4796 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4797 VL);
4798}
4799
4800/// If concat_vector(V1,V2) could be folded away to some existing
4801/// vector source, return it. Note that the source may be larger
4802/// than the requested concat_vector (i.e. a extract_subvector
4803/// might be required.)
4805 EVT VT = V1.getValueType();
4806 assert(VT == V2.getValueType() && "argument types must match");
4807 // Both input must be extracts.
4808 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4810 return SDValue();
4811
4812 // Extracting from the same source.
4813 SDValue Src = V1.getOperand(0);
4814 if (Src != V2.getOperand(0) ||
4815 VT.isScalableVector() != Src.getValueType().isScalableVector())
4816 return SDValue();
4817
4818 // The extracts must extract the two halves of the source.
4819 if (V1.getConstantOperandVal(1) != 0 ||
4821 return SDValue();
4822
4823 return Src;
4824}
4825
4826// Can this shuffle be performed on exactly one (possibly larger) input?
4828
4829 if (V2.isUndef())
4830 return V1;
4831
4832 unsigned NumElts = VT.getVectorNumElements();
4833 // Src needs to have twice the number of elements.
4834 // TODO: Update shuffle lowering to add the extract subvector
4835 if (SDValue Src = foldConcatVector(V1, V2);
4836 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4837 return Src;
4838
4839 return SDValue();
4840}
4841
4842/// Is this shuffle interleaving contiguous elements from one vector into the
4843/// even elements and contiguous elements from another vector into the odd
4844/// elements. \p EvenSrc will contain the element that should be in the first
4845/// even element. \p OddSrc will contain the element that should be in the first
4846/// odd element. These can be the first element in a source or the element half
4847/// way through the source.
4848static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4849 int &OddSrc, const RISCVSubtarget &Subtarget) {
4850 // We need to be able to widen elements to the next larger integer type or
4851 // use the zip2a instruction at e64.
4852 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4853 !Subtarget.hasVendorXRivosVizip())
4854 return false;
4855
4856 int Size = Mask.size();
4857 int NumElts = VT.getVectorNumElements();
4858 assert(Size == (int)NumElts && "Unexpected mask size");
4859
4860 SmallVector<unsigned, 2> StartIndexes;
4861 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4862 return false;
4863
4864 EvenSrc = StartIndexes[0];
4865 OddSrc = StartIndexes[1];
4866
4867 // One source should be low half of first vector.
4868 if (EvenSrc != 0 && OddSrc != 0)
4869 return false;
4870
4871 // Subvectors will be subtracted from either at the start of the two input
4872 // vectors, or at the start and middle of the first vector if it's an unary
4873 // interleave.
4874 // In both cases, HalfNumElts will be extracted.
4875 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4876 // we'll create an illegal extract_subvector.
4877 // FIXME: We could support other values using a slidedown first.
4878 int HalfNumElts = NumElts / 2;
4879 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4880}
4881
4882/// Is this mask representing a masked combination of two slides?
4884 std::array<std::pair<int, int>, 2> &SrcInfo) {
4885 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4886 return false;
4887
4888 // Avoid matching vselect idioms
4889 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4890 return false;
4891 // Prefer vslideup as the second instruction, and identity
4892 // only as the initial instruction.
4893 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4894 SrcInfo[1].second == 0)
4895 std::swap(SrcInfo[0], SrcInfo[1]);
4896 assert(SrcInfo[0].first != -1 && "Must find one slide");
4897 return true;
4898}
4899
4900// Exactly matches the semantics of a previously existing custom matcher
4901// to allow migration to new matcher without changing output.
4902static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4903 unsigned NumElts) {
4904 if (SrcInfo[1].first == -1)
4905 return true;
4906 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4907 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4908}
4909
4910static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4911 ArrayRef<int> Mask, unsigned Factor,
4912 bool RequiredPolarity) {
4913 int NumElts = Mask.size();
4914 for (const auto &[Idx, M] : enumerate(Mask)) {
4915 if (M < 0)
4916 continue;
4917 int Src = M >= NumElts;
4918 int Diff = (int)Idx - (M % NumElts);
4919 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4920 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4921 "Must match exactly one of the two slides");
4922 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4923 return false;
4924 }
4925 return true;
4926}
4927
4928/// Given a shuffle which can be represented as a pair of two slides,
4929/// see if it is a zipeven idiom. Zipeven is:
4930/// vs2: a0 a1 a2 a3
4931/// vs1: b0 b1 b2 b3
4932/// vd: a0 b0 a2 b2
4933static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4934 ArrayRef<int> Mask, unsigned &Factor) {
4935 Factor = SrcInfo[1].second;
4936 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4937 Mask.size() % Factor == 0 &&
4938 isAlternating(SrcInfo, Mask, Factor, true);
4939}
4940
4941/// Given a shuffle which can be represented as a pair of two slides,
4942/// see if it is a zipodd idiom. Zipodd is:
4943/// vs2: a0 a1 a2 a3
4944/// vs1: b0 b1 b2 b3
4945/// vd: a1 b1 a3 b3
4946/// Note that the operand order is swapped due to the way we canonicalize
4947/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4948static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4949 ArrayRef<int> Mask, unsigned &Factor) {
4950 Factor = -SrcInfo[1].second;
4951 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4952 Mask.size() % Factor == 0 &&
4953 isAlternating(SrcInfo, Mask, Factor, false);
4954}
4955
4956// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4957// 2, 4, 8 and the integer type Factor-times larger than VT's
4958// element type must be a legal element type.
4959// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4960// -> [p, q, r, s] (Factor=2, Index=1)
4962 SDValue Src, unsigned Factor,
4963 unsigned Index, SelectionDAG &DAG) {
4964 unsigned EltBits = VT.getScalarSizeInBits();
4965 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4966 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4967 SrcEC.divideCoefficientBy(Factor));
4968 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4969 SrcEC.divideCoefficientBy(Factor));
4970 Src = DAG.getBitcast(WideSrcVT, Src);
4971
4972 unsigned Shift = Index * EltBits;
4973 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4974 DAG.getConstant(Shift, DL, WideSrcVT));
4975 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4977 Res = DAG.getBitcast(CastVT, Res);
4978 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4979}
4980
4981/// Match a single source shuffle which is an identity except that some
4982/// particular element is repeated. This can be lowered as a masked
4983/// vrgather.vi/vx. Note that the two source form of this is handled
4984/// by the recursive splitting logic and doesn't need special handling.
4986 const RISCVSubtarget &Subtarget,
4987 SelectionDAG &DAG) {
4988
4989 SDLoc DL(SVN);
4990 MVT VT = SVN->getSimpleValueType(0);
4991 SDValue V1 = SVN->getOperand(0);
4992 assert(SVN->getOperand(1).isUndef());
4993 ArrayRef<int> Mask = SVN->getMask();
4994 const unsigned NumElts = VT.getVectorNumElements();
4995 MVT XLenVT = Subtarget.getXLenVT();
4996
4997 std::optional<int> SplatIdx;
4998 for (auto [I, M] : enumerate(Mask)) {
4999 if (M == -1 || I == (unsigned)M)
5000 continue;
5001 if (SplatIdx && *SplatIdx != M)
5002 return SDValue();
5003 SplatIdx = M;
5004 }
5005
5006 if (!SplatIdx)
5007 return SDValue();
5008
5009 SmallVector<SDValue> MaskVals;
5010 for (int MaskIndex : Mask) {
5011 bool SelectMaskVal = MaskIndex == *SplatIdx;
5012 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5013 }
5014 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5015 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5016 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5017 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5018 SmallVector<int>(NumElts, *SplatIdx));
5019 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5020}
5021
5022// Lower the following shuffle to vslidedown.
5023// a)
5024// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5025// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5026// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5027// b)
5028// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5029// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5030// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5031// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5032// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5033// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5035 SDValue V1, SDValue V2,
5036 ArrayRef<int> Mask,
5037 const RISCVSubtarget &Subtarget,
5038 SelectionDAG &DAG) {
5039 auto findNonEXTRACT_SUBVECTORParent =
5040 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5041 uint64_t Offset = 0;
5042 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5043 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5044 // a scalable vector. But we don't want to match the case.
5045 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5046 Offset += Parent.getConstantOperandVal(1);
5047 Parent = Parent.getOperand(0);
5048 }
5049 return std::make_pair(Parent, Offset);
5050 };
5051
5052 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5053 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5054
5055 // Extracting from the same source.
5056 SDValue Src = V1Src;
5057 if (Src != V2Src)
5058 return SDValue();
5059
5060 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5061 SmallVector<int, 16> NewMask(Mask);
5062 for (size_t i = 0; i != NewMask.size(); ++i) {
5063 if (NewMask[i] == -1)
5064 continue;
5065
5066 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5067 NewMask[i] = NewMask[i] + V1IndexOffset;
5068 } else {
5069 // Minus NewMask.size() is needed. Otherwise, the b case would be
5070 // <5,6,7,12> instead of <5,6,7,8>.
5071 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5072 }
5073 }
5074
5075 // First index must be known and non-zero. It will be used as the slidedown
5076 // amount.
5077 if (NewMask[0] <= 0)
5078 return SDValue();
5079
5080 // NewMask is also continuous.
5081 for (unsigned i = 1; i != NewMask.size(); ++i)
5082 if (NewMask[i - 1] + 1 != NewMask[i])
5083 return SDValue();
5084
5085 MVT XLenVT = Subtarget.getXLenVT();
5086 MVT SrcVT = Src.getSimpleValueType();
5087 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5088 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5089 SDValue Slidedown =
5090 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5091 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5092 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5093 return DAG.getExtractSubvector(
5094 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5095}
5096
5097// Because vslideup leaves the destination elements at the start intact, we can
5098// use it to perform shuffles that insert subvectors:
5099//
5100// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5101// ->
5102// vsetvli zero, 8, e8, mf2, ta, ma
5103// vslideup.vi v8, v9, 4
5104//
5105// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5106// ->
5107// vsetvli zero, 5, e8, mf2, tu, ma
5108// vslideup.v1 v8, v9, 2
5110 SDValue V1, SDValue V2,
5111 ArrayRef<int> Mask,
5112 const RISCVSubtarget &Subtarget,
5113 SelectionDAG &DAG) {
5114 unsigned NumElts = VT.getVectorNumElements();
5115 int NumSubElts, Index;
5116 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5117 Index))
5118 return SDValue();
5119
5120 bool OpsSwapped = Mask[Index] < (int)NumElts;
5121 SDValue InPlace = OpsSwapped ? V2 : V1;
5122 SDValue ToInsert = OpsSwapped ? V1 : V2;
5123
5124 MVT XLenVT = Subtarget.getXLenVT();
5125 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5126 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5127 // We slide up by the index that the subvector is being inserted at, and set
5128 // VL to the index + the number of elements being inserted.
5129 unsigned Policy =
5131 // If the we're adding a suffix to the in place vector, i.e. inserting right
5132 // up to the very end of it, then we don't actually care about the tail.
5133 if (NumSubElts + Index >= (int)NumElts)
5134 Policy |= RISCVVType::TAIL_AGNOSTIC;
5135
5136 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5137 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5138 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5139
5140 SDValue Res;
5141 // If we're inserting into the lowest elements, use a tail undisturbed
5142 // vmv.v.v.
5143 if (Index == 0)
5144 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5145 VL);
5146 else
5147 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5148 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5149 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5150}
5151
5152/// Match v(f)slide1up/down idioms. These operations involve sliding
5153/// N-1 elements to make room for an inserted scalar at one end.
5155 SDValue V1, SDValue V2,
5156 ArrayRef<int> Mask,
5157 const RISCVSubtarget &Subtarget,
5158 SelectionDAG &DAG) {
5159 bool OpsSwapped = false;
5160 if (!isa<BuildVectorSDNode>(V1)) {
5161 if (!isa<BuildVectorSDNode>(V2))
5162 return SDValue();
5163 std::swap(V1, V2);
5164 OpsSwapped = true;
5165 }
5166 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5167 if (!Splat)
5168 return SDValue();
5169
5170 // Return true if the mask could describe a slide of Mask.size() - 1
5171 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5172 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5173 const unsigned S = (Offset > 0) ? 0 : -Offset;
5174 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5175 for (unsigned i = S; i != E; ++i)
5176 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5177 return false;
5178 return true;
5179 };
5180
5181 const unsigned NumElts = VT.getVectorNumElements();
5182 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5183 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5184 return SDValue();
5185
5186 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5187 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5188 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5189 return SDValue();
5190
5191 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5192 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5193
5194 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5195 // vslide1{down,up}.vx instead.
5196 if (VT.getVectorElementType() == MVT::bf16 ||
5197 (VT.getVectorElementType() == MVT::f16 &&
5198 !Subtarget.hasVInstructionsF16())) {
5199 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5200 Splat =
5201 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5202 V2 = DAG.getBitcast(
5203 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5204 SDValue Vec = DAG.getNode(
5205 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5206 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5207 Vec = DAG.getBitcast(ContainerVT, Vec);
5208 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5209 }
5210
5211 auto OpCode = IsVSlidedown ?
5212 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5213 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5214 if (!VT.isFloatingPoint())
5215 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5216 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5217 DAG.getUNDEF(ContainerVT),
5218 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5219 Splat, TrueMask, VL);
5220 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5221}
5222
5223/// Match a mask which "spreads" the leading elements of a vector evenly
5224/// across the result. Factor is the spread amount, and Index is the
5225/// offset applied. (on success, Index < Factor) This is the inverse
5226/// of a deinterleave with the same Factor and Index. This is analogous
5227/// to an interleave, except that all but one lane is undef.
5229 unsigned &Index) {
5230 SmallVector<bool> LaneIsUndef(Factor, true);
5231 for (unsigned i = 0; i < Mask.size(); i++)
5232 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5233
5234 bool Found = false;
5235 for (unsigned i = 0; i < Factor; i++) {
5236 if (LaneIsUndef[i])
5237 continue;
5238 if (Found)
5239 return false;
5240 Index = i;
5241 Found = true;
5242 }
5243 if (!Found)
5244 return false;
5245
5246 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5247 unsigned j = i * Factor + Index;
5248 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5249 return false;
5250 }
5251 return true;
5252}
5253
5254static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5255 const SDLoc &DL, SelectionDAG &DAG,
5256 const RISCVSubtarget &Subtarget) {
5257 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5258 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5259 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5261
5262 MVT VT = Op0.getSimpleValueType();
5264 Op0 = DAG.getBitcast(IntVT, Op0);
5265 Op1 = DAG.getBitcast(IntVT, Op1);
5266
5267 MVT ContainerVT = IntVT;
5268 if (VT.isFixedLengthVector()) {
5269 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5270 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5271 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5272 }
5273
5274 MVT InnerVT = ContainerVT;
5275 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5276 if (Op1.isUndef() &&
5277 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5278 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5279 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5280 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5281 Subtarget.getXLenVT());
5282 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5283 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5284 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5285 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5286 }
5287
5288 SDValue Passthru = DAG.getUNDEF(InnerVT);
5289 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5290 if (InnerVT.bitsLT(ContainerVT))
5291 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5292 if (IntVT.isFixedLengthVector())
5293 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5294 Res = DAG.getBitcast(VT, Res);
5295 return Res;
5296}
5297
5298// Given a vector a, b, c, d return a vector Factor times longer
5299// with Factor-1 undef's between elements. Ex:
5300// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5301// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5302static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5303 const SDLoc &DL, SelectionDAG &DAG) {
5304
5305 MVT VT = V.getSimpleValueType();
5306 unsigned EltBits = VT.getScalarSizeInBits();
5308 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5309
5310 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5311
5312 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5313 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5314 // allow the SHL to fold away if Index is 0.
5315 if (Index != 0)
5316 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5317 DAG.getConstant(EltBits * Index, DL, WideVT));
5318 // Make sure to use original element type
5320 EC.multiplyCoefficientBy(Factor));
5321 return DAG.getBitcast(ResultVT, Result);
5322}
5323
5324// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5325// to create an interleaved vector of <[vscale x] n*2 x ty>.
5326// This requires that the size of ty is less than the subtarget's maximum ELEN.
5328 const SDLoc &DL, SelectionDAG &DAG,
5329 const RISCVSubtarget &Subtarget) {
5330
5331 // FIXME: Not only does this optimize the code, it fixes some correctness
5332 // issues because MIR does not have freeze.
5333 if (EvenV.isUndef())
5334 return getWideningSpread(OddV, 2, 1, DL, DAG);
5335 if (OddV.isUndef())
5336 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5337
5338 MVT VecVT = EvenV.getSimpleValueType();
5339 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5340 // Convert fixed vectors to scalable if needed
5341 if (VecContainerVT.isFixedLengthVector()) {
5342 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5343 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5344 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5345 }
5346
5347 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5348
5349 // We're working with a vector of the same size as the resulting
5350 // interleaved vector, but with half the number of elements and
5351 // twice the SEW (Hence the restriction on not using the maximum
5352 // ELEN)
5353 MVT WideVT =
5355 VecVT.getVectorElementCount());
5356 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5357 if (WideContainerVT.isFixedLengthVector())
5358 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5359
5360 // Bitcast the input vectors to integers in case they are FP
5361 VecContainerVT = VecContainerVT.changeTypeToInteger();
5362 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5363 OddV = DAG.getBitcast(VecContainerVT, OddV);
5364
5365 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5366 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5367
5368 SDValue Interleaved;
5369 if (Subtarget.hasStdExtZvbb()) {
5370 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5371 SDValue OffsetVec =
5372 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5373 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5374 OffsetVec, Passthru, Mask, VL);
5375 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5376 Interleaved, EvenV, Passthru, Mask, VL);
5377 } else {
5378 // FIXME: We should freeze the odd vector here. We already handled the case
5379 // of provably undef/poison above.
5380
5381 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5382 // vwaddu.vv
5383 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5384 OddV, Passthru, Mask, VL);
5385
5386 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5387 SDValue AllOnesVec = DAG.getSplatVector(
5388 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5389 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5390 OddV, AllOnesVec, Passthru, Mask, VL);
5391
5392 // Add the two together so we get
5393 // (OddV * 0xff...ff) + (OddV + EvenV)
5394 // = (OddV * 0x100...00) + EvenV
5395 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5396 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5397 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5398 Interleaved, OddsMul, Passthru, Mask, VL);
5399 }
5400
5401 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5402 MVT ResultContainerVT = MVT::getVectorVT(
5403 VecVT.getVectorElementType(), // Make sure to use original type
5404 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5405 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5406
5407 // Convert back to a fixed vector if needed
5408 MVT ResultVT =
5411 if (ResultVT.isFixedLengthVector())
5412 Interleaved =
5413 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5414
5415 return Interleaved;
5416}
5417
5418// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5419// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5421 SelectionDAG &DAG,
5422 const RISCVSubtarget &Subtarget) {
5423 SDLoc DL(SVN);
5424 MVT VT = SVN->getSimpleValueType(0);
5425 SDValue V = SVN->getOperand(0);
5426 unsigned NumElts = VT.getVectorNumElements();
5427
5428 assert(VT.getVectorElementType() == MVT::i1);
5429
5431 SVN->getMask().size()) ||
5432 !SVN->getOperand(1).isUndef())
5433 return SDValue();
5434
5435 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5436 EVT ViaVT = EVT::getVectorVT(
5437 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5438 EVT ViaBitVT =
5439 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5440
5441 // If we don't have zvbb or the larger element type > ELEN, the operation will
5442 // be illegal.
5444 ViaVT) ||
5445 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5446 return SDValue();
5447
5448 // If the bit vector doesn't fit exactly into the larger element type, we need
5449 // to insert it into the larger vector and then shift up the reversed bits
5450 // afterwards to get rid of the gap introduced.
5451 if (ViaEltSize > NumElts)
5452 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5453
5454 SDValue Res =
5455 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5456
5457 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5458 // element type.
5459 if (ViaEltSize > NumElts)
5460 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5461 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5462
5463 Res = DAG.getBitcast(ViaBitVT, Res);
5464
5465 if (ViaEltSize > NumElts)
5466 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5467 return Res;
5468}
5469
5471 const RISCVSubtarget &Subtarget,
5472 MVT &RotateVT, unsigned &RotateAmt) {
5473 unsigned NumElts = VT.getVectorNumElements();
5474 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5475 unsigned NumSubElts;
5476 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5477 NumElts, NumSubElts, RotateAmt))
5478 return false;
5479 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5480 NumElts / NumSubElts);
5481
5482 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5483 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5484}
5485
5486// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5487// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5488// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5490 SelectionDAG &DAG,
5491 const RISCVSubtarget &Subtarget) {
5492 SDLoc DL(SVN);
5493
5494 EVT VT = SVN->getValueType(0);
5495 unsigned RotateAmt;
5496 MVT RotateVT;
5497 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5498 return SDValue();
5499
5500 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5501
5502 SDValue Rotate;
5503 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5504 // so canonicalize to vrev8.
5505 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5506 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5507 else
5508 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5509 DAG.getConstant(RotateAmt, DL, RotateVT));
5510
5511 return DAG.getBitcast(VT, Rotate);
5512}
5513
5514// If compiling with an exactly known VLEN, see if we can split a
5515// shuffle on m2 or larger into a small number of m1 sized shuffles
5516// which write each destination registers exactly once.
5518 SelectionDAG &DAG,
5519 const RISCVSubtarget &Subtarget) {
5520 SDLoc DL(SVN);
5521 MVT VT = SVN->getSimpleValueType(0);
5522 SDValue V1 = SVN->getOperand(0);
5523 SDValue V2 = SVN->getOperand(1);
5524 ArrayRef<int> Mask = SVN->getMask();
5525
5526 // If we don't know exact data layout, not much we can do. If this
5527 // is already m1 or smaller, no point in splitting further.
5528 const auto VLen = Subtarget.getRealVLen();
5529 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5530 return SDValue();
5531
5532 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5533 // expansion for.
5534 unsigned RotateAmt;
5535 MVT RotateVT;
5536 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5537 return SDValue();
5538
5539 MVT ElemVT = VT.getVectorElementType();
5540 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5541
5542 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5543 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5544 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5545 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5546 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5547 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5548 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5549 unsigned NumOfDestRegs = NumElts / NumOpElts;
5550 // The following semantically builds up a fixed length concat_vector
5551 // of the component shuffle_vectors. We eagerly lower to scalable here
5552 // to avoid DAG combining it back to a large shuffle_vector again.
5553 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5554 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5556 Operands;
5558 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5559 [&]() { Operands.emplace_back(); },
5560 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5561 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5562 SmallVector<int>(SrcSubMask));
5563 },
5564 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5565 if (NewReg)
5566 Operands.emplace_back();
5567 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5568 });
5569 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5570 // Note: check that we do not emit too many shuffles here to prevent code
5571 // size explosion.
5572 // TODO: investigate, if it can be improved by extra analysis of the masks to
5573 // check if the code is more profitable.
5574 unsigned NumShuffles = std::accumulate(
5575 Operands.begin(), Operands.end(), 0u,
5576 [&](unsigned N,
5577 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5578 if (Data.empty())
5579 return N;
5580 N += Data.size();
5581 for (const auto &P : Data) {
5582 unsigned Idx2 = std::get<1>(P);
5583 ArrayRef<int> Mask = std::get<2>(P);
5584 if (Idx2 != UINT_MAX)
5585 ++N;
5586 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5587 --N;
5588 }
5589 return N;
5590 });
5591 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5592 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5593 return SDValue();
5594 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5595 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5596 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5597 return SubVec;
5598 };
5599 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5601 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5602 return SubVec;
5603 };
5604 SDValue Vec = DAG.getUNDEF(ContainerVT);
5605 for (auto [I, Data] : enumerate(Operands)) {
5606 if (Data.empty())
5607 continue;
5609 for (unsigned I : seq<unsigned>(Data.size())) {
5610 const auto &[Idx1, Idx2, _] = Data[I];
5611 // If the shuffle contains permutation of odd number of elements,
5612 // Idx1 might be used already in the first iteration.
5613 //
5614 // Idx1 = shuffle Idx1, Idx2
5615 // Idx1 = shuffle Idx1, Idx3
5616 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5617 if (!V)
5618 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5619 (Idx1 % NumOfSrcRegs) * NumOpElts);
5620 if (Idx2 != UINT_MAX) {
5621 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5622 if (!V)
5623 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5624 (Idx2 % NumOfSrcRegs) * NumOpElts);
5625 }
5626 }
5627 SDValue V;
5628 for (const auto &[Idx1, Idx2, Mask] : Data) {
5629 SDValue V1 = Values.at(Idx1);
5630 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5631 V = PerformShuffle(V1, V2, Mask);
5632 Values[Idx1] = V;
5633 }
5634
5635 unsigned InsertIdx = I * NumOpElts;
5636 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5637 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5638 }
5639 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5640}
5641
5642// Matches a subset of compress masks with a contiguous prefix of output
5643// elements. This could be extended to allow gaps by deciding which
5644// source elements to spuriously demand.
5646 int Last = -1;
5647 bool SawUndef = false;
5648 for (const auto &[Idx, M] : enumerate(Mask)) {
5649 if (M == -1) {
5650 SawUndef = true;
5651 continue;
5652 }
5653 if (SawUndef)
5654 return false;
5655 if (Idx > (unsigned)M)
5656 return false;
5657 if (M <= Last)
5658 return false;
5659 Last = M;
5660 }
5661 return true;
5662}
5663
5664/// Given a shuffle where the indices are disjoint between the two sources,
5665/// e.g.:
5666///
5667/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5668///
5669/// Merge the two sources into one and do a single source shuffle:
5670///
5671/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5672/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5673///
5674/// A vselect will either be merged into a masked instruction or be lowered as a
5675/// vmerge.vvm, which is cheaper than a vrgather.vv.
5677 SelectionDAG &DAG,
5678 const RISCVSubtarget &Subtarget) {
5679 MVT VT = SVN->getSimpleValueType(0);
5680 MVT XLenVT = Subtarget.getXLenVT();
5681 SDLoc DL(SVN);
5682
5683 const ArrayRef<int> Mask = SVN->getMask();
5684
5685 // Work out which source each lane will come from.
5686 SmallVector<int, 16> Srcs(Mask.size(), -1);
5687
5688 for (int Idx : Mask) {
5689 if (Idx == -1)
5690 continue;
5691 unsigned SrcIdx = Idx % Mask.size();
5692 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5693 if (Srcs[SrcIdx] == -1)
5694 // Mark this source as using this lane.
5695 Srcs[SrcIdx] = Src;
5696 else if (Srcs[SrcIdx] != Src)
5697 // The other source is using this lane: not disjoint.
5698 return SDValue();
5699 }
5700
5701 SmallVector<SDValue> SelectMaskVals;
5702 for (int Lane : Srcs) {
5703 if (Lane == -1)
5704 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5705 else
5706 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5707 }
5708 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5709 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5710 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5711 SVN->getOperand(0), SVN->getOperand(1));
5712
5713 // Move all indices relative to the first source.
5714 SmallVector<int> NewMask(Mask.size());
5715 for (unsigned I = 0; I < Mask.size(); I++) {
5716 if (Mask[I] == -1)
5717 NewMask[I] = -1;
5718 else
5719 NewMask[I] = Mask[I] % Mask.size();
5720 }
5721
5722 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5723}
5724
5725/// Is this mask local (i.e. elements only move within their local span), and
5726/// repeating (that is, the same rearrangement is being done within each span)?
5727static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5728 // Require a prefix from the original mask until the consumer code
5729 // is adjusted to rewrite the mask instead of just taking a prefix.
5730 for (auto [I, M] : enumerate(Mask)) {
5731 if (M == -1)
5732 continue;
5733 if ((M / Span) != (int)(I / Span))
5734 return false;
5735 int SpanIdx = I % Span;
5736 int Expected = M % Span;
5737 if (Mask[SpanIdx] != Expected)
5738 return false;
5739 }
5740 return true;
5741}
5742
5743/// Is this mask only using elements from the first span of the input?
5744static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5745 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5746}
5747
5748/// Return true for a mask which performs an arbitrary shuffle within the first
5749/// span, and then repeats that same result across all remaining spans. Note
5750/// that this doesn't check if all the inputs come from a single span!
5751static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5752 // Require a prefix from the original mask until the consumer code
5753 // is adjusted to rewrite the mask instead of just taking a prefix.
5754 for (auto [I, M] : enumerate(Mask)) {
5755 if (M == -1)
5756 continue;
5757 int SpanIdx = I % Span;
5758 if (Mask[SpanIdx] != M)
5759 return false;
5760 }
5761 return true;
5762}
5763
5764/// Try to widen element type to get a new mask value for a better permutation
5765/// sequence. This doesn't try to inspect the widened mask for profitability;
5766/// we speculate the widened form is equal or better. This has the effect of
5767/// reducing mask constant sizes - allowing cheaper materialization sequences
5768/// - and index sequence sizes - reducing register pressure and materialization
5769/// cost, at the cost of (possibly) an extra VTYPE toggle.
5771 SDLoc DL(Op);
5772 MVT VT = Op.getSimpleValueType();
5773 MVT ScalarVT = VT.getVectorElementType();
5774 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5775 SDValue V0 = Op.getOperand(0);
5776 SDValue V1 = Op.getOperand(1);
5777 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5778
5779 // Avoid wasted work leading to isTypeLegal check failing below
5780 if (ElementSize > 32)
5781 return SDValue();
5782
5783 SmallVector<int, 8> NewMask;
5784 if (!widenShuffleMaskElts(Mask, NewMask))
5785 return SDValue();
5786
5787 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5788 : MVT::getIntegerVT(ElementSize * 2);
5789 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5790 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5791 return SDValue();
5792 V0 = DAG.getBitcast(NewVT, V0);
5793 V1 = DAG.getBitcast(NewVT, V1);
5794 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5795}
5796
5798 const RISCVSubtarget &Subtarget) {
5799 SDValue V1 = Op.getOperand(0);
5800 SDValue V2 = Op.getOperand(1);
5801 SDLoc DL(Op);
5802 MVT XLenVT = Subtarget.getXLenVT();
5803 MVT VT = Op.getSimpleValueType();
5804 unsigned NumElts = VT.getVectorNumElements();
5806
5807 if (VT.getVectorElementType() == MVT::i1) {
5808 // Lower to a vror.vi of a larger element type if possible before we promote
5809 // i1s to i8s.
5810 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5811 return V;
5812 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5813 return V;
5814
5815 // Promote i1 shuffle to i8 shuffle.
5816 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5817 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5818 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5819 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5820 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5821 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5822 ISD::SETNE);
5823 }
5824
5825 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5826
5827 // Store the return value in a single variable instead of structured bindings
5828 // so that we can pass it to GetSlide below, which cannot capture structured
5829 // bindings until C++20.
5830 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5831 auto [TrueMask, VL] = TrueMaskVL;
5832
5833 if (SVN->isSplat()) {
5834 const int Lane = SVN->getSplatIndex();
5835 if (Lane >= 0) {
5836 MVT SVT = VT.getVectorElementType();
5837
5838 // Turn splatted vector load into a strided load with an X0 stride.
5839 SDValue V = V1;
5840 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5841 // with undef.
5842 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5843 int Offset = Lane;
5844 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5845 int OpElements =
5846 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5847 V = V.getOperand(Offset / OpElements);
5848 Offset %= OpElements;
5849 }
5850
5851 // We need to ensure the load isn't atomic or volatile.
5852 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5853 auto *Ld = cast<LoadSDNode>(V);
5854 Offset *= SVT.getStoreSize();
5855 SDValue NewAddr = DAG.getMemBasePlusOffset(
5856 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5857
5858 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5859 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5860 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5861 SDValue IntID =
5862 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5863 SDValue Ops[] = {Ld->getChain(),
5864 IntID,
5865 DAG.getUNDEF(ContainerVT),
5866 NewAddr,
5867 DAG.getRegister(RISCV::X0, XLenVT),
5868 VL};
5869 SDValue NewLoad = DAG.getMemIntrinsicNode(
5870 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5872 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5873 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5874 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5875 }
5876
5877 MVT SplatVT = ContainerVT;
5878
5879 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5880 if (SVT == MVT::bf16 ||
5881 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5882 SVT = MVT::i16;
5883 SplatVT = ContainerVT.changeVectorElementType(SVT);
5884 }
5885
5886 // Otherwise use a scalar load and splat. This will give the best
5887 // opportunity to fold a splat into the operation. ISel can turn it into
5888 // the x0 strided load if we aren't able to fold away the select.
5889 if (SVT.isFloatingPoint())
5890 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5891 Ld->getPointerInfo().getWithOffset(Offset),
5892 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5893 else
5894 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5895 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5896 Ld->getBaseAlign(),
5897 Ld->getMemOperand()->getFlags());
5899
5900 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5901 : RISCVISD::VMV_V_X_VL;
5902 SDValue Splat =
5903 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5904 Splat = DAG.getBitcast(ContainerVT, Splat);
5905 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5906 }
5907
5908 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5909 assert(Lane < (int)NumElts && "Unexpected lane!");
5910 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5911 V1, DAG.getConstant(Lane, DL, XLenVT),
5912 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5913 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5914 }
5915 }
5916
5917 // For exact VLEN m2 or greater, try to split to m1 operations if we
5918 // can split cleanly.
5919 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5920 return V;
5921
5922 ArrayRef<int> Mask = SVN->getMask();
5923
5924 if (SDValue V =
5925 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5926 return V;
5927
5928 if (SDValue V =
5929 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5930 return V;
5931
5932 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5933 // available.
5934 if (Subtarget.hasStdExtZvkb())
5935 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5936 return V;
5937
5938 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5939 NumElts != 2)
5940 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5941
5942 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5943 // use shift and truncate to perform the shuffle.
5944 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5945 // shift-and-trunc reducing total cost for everything except an mf8 result.
5946 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5947 // to do the entire operation.
5948 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5949 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5950 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5951 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5952 unsigned Index = 0;
5953 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5954 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5955 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5956 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5957 if (1 < count_if(Mask,
5958 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5959 1 < count_if(Mask, [&Mask](int Idx) {
5960 return Idx >= (int)Mask.size();
5961 })) {
5962 // Narrow each source and concatenate them.
5963 // FIXME: For small LMUL it is better to concatenate first.
5964 MVT EltVT = VT.getVectorElementType();
5965 auto EltCnt = VT.getVectorElementCount();
5966 MVT SubVT =
5967 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5968
5969 SDValue Lo =
5970 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5971 SDValue Hi =
5972 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5973
5974 SDValue Concat =
5977 if (Factor == 2)
5978 return Concat;
5979
5980 SDValue Vec = DAG.getUNDEF(VT);
5981 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5982 }
5983 }
5984 }
5985 }
5986
5987 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5988 // e64 which can't match above.
5989 unsigned Index = 0;
5990 if (Subtarget.hasVendorXRivosVizip() &&
5992 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5993 unsigned Opc =
5994 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5995 if (V2.isUndef())
5996 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5997 if (auto VLEN = Subtarget.getRealVLen();
5998 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5999 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6000 if (SDValue Src = foldConcatVector(V1, V2)) {
6001 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6002 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6003 SDValue Res =
6004 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6005 return DAG.getExtractSubvector(DL, VT, Res, 0);
6006 }
6007 // Deinterleave each source and concatenate them, or concat first, then
6008 // deinterleave.
6009 if (1 < count_if(Mask,
6010 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6011 1 < count_if(Mask,
6012 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6013
6014 const unsigned EltSize = VT.getScalarSizeInBits();
6015 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6016 if (NumElts < MinVLMAX) {
6017 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6018 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6019 SDValue Res =
6020 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6021 return DAG.getExtractSubvector(DL, VT, Res, 0);
6022 }
6023
6024 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6025 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6026
6027 MVT SubVT = VT.getHalfNumVectorElementsVT();
6028 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6029 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6030 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6031 }
6032 }
6033
6034 if (SDValue V =
6035 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6036 return V;
6037
6038 // Detect an interleave shuffle and lower to
6039 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6040 int EvenSrc, OddSrc;
6041 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6042 !(NumElts == 2 &&
6043 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6044 // Extract the halves of the vectors.
6045 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6046
6047 // Recognize if one half is actually undef; the matching above will
6048 // otherwise reuse the even stream for the undef one. This improves
6049 // spread(2) shuffles.
6050 bool LaneIsUndef[2] = { true, true};
6051 for (const auto &[Idx, M] : enumerate(Mask))
6052 LaneIsUndef[Idx % 2] &= (M == -1);
6053
6054 int Size = Mask.size();
6055 SDValue EvenV, OddV;
6056 if (LaneIsUndef[0]) {
6057 EvenV = DAG.getUNDEF(HalfVT);
6058 } else {
6059 assert(EvenSrc >= 0 && "Undef source?");
6060 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6061 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6062 }
6063
6064 if (LaneIsUndef[1]) {
6065 OddV = DAG.getUNDEF(HalfVT);
6066 } else {
6067 assert(OddSrc >= 0 && "Undef source?");
6068 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6069 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6070 }
6071
6072 // Prefer vzip2a if available.
6073 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6074 if (Subtarget.hasVendorXRivosVizip()) {
6075 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6076 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6077 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6078 }
6079 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6080 }
6081
6082 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6083 // instructions (in any combination) with masking on the second instruction.
6084 // Also handles masked slides into an identity source, and single slides
6085 // without masking. Avoid matching bit rotates (which are not also element
6086 // rotates) as slide pairs. This is a performance heuristic, not a
6087 // functional check.
6088 std::array<std::pair<int, int>, 2> SrcInfo;
6089 unsigned RotateAmt;
6090 MVT RotateVT;
6091 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6092 (isElementRotate(SrcInfo, NumElts) ||
6093 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6094 SDValue Sources[2];
6095 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6096 int SrcIdx = Info.first;
6097 assert(SrcIdx == 0 || SrcIdx == 1);
6098 SDValue &Src = Sources[SrcIdx];
6099 if (!Src) {
6100 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6101 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6102 }
6103 return Src;
6104 };
6105 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6106 SDValue Passthru) {
6107 auto [TrueMask, VL] = TrueMaskVL;
6108 SDValue SrcV = GetSourceFor(Src);
6109 int SlideAmt = Src.second;
6110 if (SlideAmt == 0) {
6111 // Should never be second operation
6112 assert(Mask == TrueMask);
6113 return SrcV;
6114 }
6115 if (SlideAmt < 0)
6116 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6117 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6119 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6120 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6122 };
6123
6124 if (SrcInfo[1].first == -1) {
6125 SDValue Res = DAG.getUNDEF(ContainerVT);
6126 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6127 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6128 }
6129
6130 if (Subtarget.hasVendorXRivosVizip()) {
6131 bool TryWiden = false;
6132 unsigned Factor;
6133 if (isZipEven(SrcInfo, Mask, Factor)) {
6134 if (Factor == 1) {
6135 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6136 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6137 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6138 Subtarget);
6139 }
6140 TryWiden = true;
6141 }
6142 if (isZipOdd(SrcInfo, Mask, Factor)) {
6143 if (Factor == 1) {
6144 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6145 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6146 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6147 Subtarget);
6148 }
6149 TryWiden = true;
6150 }
6151 // If we found a widening oppurtunity which would let us form a
6152 // zipeven or zipodd, use the generic code to widen the shuffle
6153 // and recurse through this logic.
6154 if (TryWiden)
6155 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6156 return V;
6157 }
6158
6159 // Build the mask. Note that vslideup unconditionally preserves elements
6160 // below the slide amount in the destination, and thus those elements are
6161 // undefined in the mask. If the mask ends up all true (or undef), it
6162 // will be folded away by general logic.
6163 SmallVector<SDValue> MaskVals;
6164 for (const auto &[Idx, M] : enumerate(Mask)) {
6165 if (M < 0 ||
6166 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6167 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6168 continue;
6169 }
6170 int Src = M >= (int)NumElts;
6171 int Diff = (int)Idx - (M % NumElts);
6172 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6173 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6174 "Must match exactly one of the two slides");
6175 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6176 }
6177 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6178 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6179 SDValue SelectMask = convertToScalableVector(
6180 ContainerVT.changeVectorElementType(MVT::i1),
6181 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6182
6183 SDValue Res = DAG.getUNDEF(ContainerVT);
6184 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6185 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6186 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6187 }
6188
6189 // Handle any remaining single source shuffles
6190 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6191 if (V2.isUndef()) {
6192 // We might be able to express the shuffle as a bitrotate. But even if we
6193 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6194 // shifts and a vor will have a higher throughput than a vrgather.
6195 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6196 return V;
6197
6198 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6199 return V;
6200
6201 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6202 // is fully covered in interleave(2) above, so it is ignored here.
6203 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6204 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6205 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6206 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6207 unsigned Index;
6208 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6209 MVT NarrowVT =
6210 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6211 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6212 return getWideningSpread(Src, Factor, Index, DL, DAG);
6213 }
6214 }
6215 }
6216
6217 // If only a prefix of the source elements influence a prefix of the
6218 // destination elements, try to see if we can reduce the required LMUL
6219 unsigned MinVLen = Subtarget.getRealMinVLen();
6220 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6221 if (NumElts > MinVLMAX) {
6222 unsigned MaxIdx = 0;
6223 for (auto [I, M] : enumerate(Mask)) {
6224 if (M == -1)
6225 continue;
6226 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6227 }
6228 unsigned NewNumElts =
6229 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6230 if (NewNumElts != NumElts) {
6231 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6232 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6233 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6234 Mask.take_front(NewNumElts));
6235 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6236 }
6237 }
6238
6239 // Before hitting generic lowering fallbacks, try to widen the mask
6240 // to a wider SEW.
6241 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6242 return V;
6243
6244 // Can we generate a vcompress instead of a vrgather? These scale better
6245 // at high LMUL, at the cost of not being able to fold a following select
6246 // into them. The mask constants are also smaller than the index vector
6247 // constants, and thus easier to materialize.
6248 if (isCompressMask(Mask)) {
6249 SmallVector<SDValue> MaskVals(NumElts,
6250 DAG.getConstant(false, DL, XLenVT));
6251 for (auto Idx : Mask) {
6252 if (Idx == -1)
6253 break;
6254 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6255 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6256 }
6257 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6258 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6259 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6260 DAG.getUNDEF(VT));
6261 }
6262
6263 if (VT.getScalarSizeInBits() == 8 &&
6264 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6265 // On such a vector we're unable to use i8 as the index type.
6266 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6267 // may involve vector splitting if we're already at LMUL=8, or our
6268 // user-supplied maximum fixed-length LMUL.
6269 return SDValue();
6270 }
6271
6272 // Base case for the two operand recursion below - handle the worst case
6273 // single source shuffle.
6274 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6275 MVT IndexVT = VT.changeTypeToInteger();
6276 // Since we can't introduce illegal index types at this stage, use i16 and
6277 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6278 // than XLenVT.
6279 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6280 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6281 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6282 }
6283
6284 // If the mask allows, we can do all the index computation in 16 bits. This
6285 // requires less work and less register pressure at high LMUL, and creates
6286 // smaller constants which may be cheaper to materialize.
6287 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6288 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6289 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6290 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6291 }
6292
6293 MVT IndexContainerVT =
6294 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6295
6296 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6297 SmallVector<SDValue> GatherIndicesLHS;
6298 for (int MaskIndex : Mask) {
6299 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6300 GatherIndicesLHS.push_back(IsLHSIndex
6301 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6302 : DAG.getUNDEF(XLenVT));
6303 }
6304 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6305 LHSIndices =
6306 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6307 // At m1 and less, there's no point trying any of the high LMUL splitting
6308 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6309 if (NumElts <= MinVLMAX) {
6310 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6311 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6312 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6313 }
6314
6315 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6316 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6317 auto [InnerTrueMask, InnerVL] =
6318 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6319 int N =
6320 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6321 assert(isPowerOf2_32(N) && N <= 8);
6322
6323 // If we have a locally repeating mask, then we can reuse the first
6324 // register in the index register group for all registers within the
6325 // source register group. TODO: This generalizes to m2, and m4.
6326 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6327 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6328 SDValue Gather = DAG.getUNDEF(ContainerVT);
6329 for (int i = 0; i < N; i++) {
6330 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6331 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6332 SDValue SubVec =
6333 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6334 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6335 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6336 }
6337 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6338 }
6339
6340 // If we have a shuffle which only uses the first register in our source
6341 // register group, and repeats the same index across all spans, we can
6342 // use a single vrgather (and possibly some register moves).
6343 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6344 // which we can do a linear number of shuffles to form an m1 which
6345 // contains all the output elements.
6346 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6347 isSpanSplatShuffle(Mask, MinVLMAX)) {
6348 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6349 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6350 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6351 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6352 SDValue Gather = DAG.getUNDEF(ContainerVT);
6353 for (int i = 0; i < N; i++)
6354 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6355 M1VT.getVectorMinNumElements() * i);
6356 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6357 }
6358
6359 // If we have a shuffle which only uses the first register in our
6360 // source register group, we can do a linear number of m1 vrgathers
6361 // reusing the same source register (but with different indices)
6362 // TODO: This can be generalized for m2 or m4, or for any shuffle
6363 // for which we can do a vslidedown followed by this expansion.
6364 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6365 SDValue SlideAmt =
6366 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6367 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6368 SDValue Gather = DAG.getUNDEF(ContainerVT);
6369 for (int i = 0; i < N; i++) {
6370 if (i != 0)
6371 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6372 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6373 SlideAmt, TrueMask, VL);
6374 SDValue SubIndex =
6375 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6376 SDValue SubVec =
6377 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6378 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6379 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6380 M1VT.getVectorMinNumElements() * i);
6381 }
6382 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6383 }
6384
6385 // Fallback to generic vrgather if we can't find anything better.
6386 // On many machines, this will be O(LMUL^2)
6387 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6388 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6389 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6390 }
6391
6392 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6393 // merged with a second vrgather.
6394 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6395
6396 // Now construct the mask that will be used by the blended vrgather operation.
6397 // Construct the appropriate indices into each vector.
6398 for (int MaskIndex : Mask) {
6399 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6400 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6401 ? MaskIndex : -1);
6402 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6403 }
6404
6405 // If the mask indices are disjoint between the two sources, we can lower it
6406 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6407 // operands may end up being lowered to something cheaper than a vrgather.vv.
6408 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6409 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6410 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6411 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6412 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6413 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6414 return V;
6415
6416 // Before hitting generic lowering fallbacks, try to widen the mask
6417 // to a wider SEW.
6418 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6419 return V;
6420
6421 // Try to pick a profitable operand order.
6422 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6423 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6424
6425 // Recursively invoke lowering for each operand if we had two
6426 // independent single source shuffles, and then combine the result via a
6427 // vselect. Note that the vselect will likely be folded back into the
6428 // second permute (vrgather, or other) by the post-isel combine.
6429 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6430 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6431
6432 SmallVector<SDValue> MaskVals;
6433 for (int MaskIndex : Mask) {
6434 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6435 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6436 }
6437
6438 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6439 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6440 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6441
6442 if (SwapOps)
6443 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6444 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6445}
6446
6448 // Only support legal VTs for other shuffles for now.
6449 if (!isTypeLegal(VT))
6450 return false;
6451
6452 // Support splats for any type. These should type legalize well.
6454 return true;
6455
6456 const unsigned NumElts = M.size();
6457 MVT SVT = VT.getSimpleVT();
6458
6459 // Not for i1 vectors.
6460 if (SVT.getScalarType() == MVT::i1)
6461 return false;
6462
6463 std::array<std::pair<int, int>, 2> SrcInfo;
6464 int Dummy1, Dummy2;
6465 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6466 (::isMaskedSlidePair(M, SrcInfo) &&
6467 isElementRotate(SrcInfo, NumElts)) ||
6468 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6469}
6470
6471// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6472// the exponent.
6473SDValue
6474RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6475 SelectionDAG &DAG) const {
6476 MVT VT = Op.getSimpleValueType();
6477 unsigned EltSize = VT.getScalarSizeInBits();
6478 SDValue Src = Op.getOperand(0);
6479 SDLoc DL(Op);
6480 MVT ContainerVT = VT;
6481
6482 SDValue Mask, VL;
6483 if (Op->isVPOpcode()) {
6484 Mask = Op.getOperand(1);
6485 if (VT.isFixedLengthVector())
6486 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6487 Subtarget);
6488 VL = Op.getOperand(2);
6489 }
6490
6491 // We choose FP type that can represent the value if possible. Otherwise, we
6492 // use rounding to zero conversion for correct exponent of the result.
6493 // TODO: Use f16 for i8 when possible?
6494 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6495 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6496 FloatEltVT = MVT::f32;
6497 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6498
6499 // Legal types should have been checked in the RISCVTargetLowering
6500 // constructor.
6501 // TODO: Splitting may make sense in some cases.
6502 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6503 "Expected legal float type!");
6504
6505 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6506 // The trailing zero count is equal to log2 of this single bit value.
6507 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6508 SDValue Neg = DAG.getNegative(Src, DL, VT);
6509 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6510 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6511 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6512 Src, Mask, VL);
6513 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6514 }
6515
6516 // We have a legal FP type, convert to it.
6517 SDValue FloatVal;
6518 if (FloatVT.bitsGT(VT)) {
6519 if (Op->isVPOpcode())
6520 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6521 else
6522 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6523 } else {
6524 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6525 if (VT.isFixedLengthVector()) {
6526 ContainerVT = getContainerForFixedLengthVector(VT);
6527 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6528 }
6529 if (!Op->isVPOpcode())
6530 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6531 SDValue RTZRM =
6532 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6533 MVT ContainerFloatVT =
6534 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6535 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6536 Src, Mask, RTZRM, VL);
6537 if (VT.isFixedLengthVector())
6538 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6539 }
6540 // Bitcast to integer and shift the exponent to the LSB.
6541 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6542 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6543 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6544
6545 SDValue Exp;
6546 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6547 if (Op->isVPOpcode()) {
6548 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6549 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6550 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6551 } else {
6552 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6553 DAG.getConstant(ShiftAmt, DL, IntVT));
6554 if (IntVT.bitsLT(VT))
6555 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6556 else if (IntVT.bitsGT(VT))
6557 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6558 }
6559
6560 // The exponent contains log2 of the value in biased form.
6561 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6562 // For trailing zeros, we just need to subtract the bias.
6563 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6564 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6565 DAG.getConstant(ExponentBias, DL, VT));
6566 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6567 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6568 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6569
6570 // For leading zeros, we need to remove the bias and convert from log2 to
6571 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6572 unsigned Adjust = ExponentBias + (EltSize - 1);
6573 SDValue Res;
6574 if (Op->isVPOpcode())
6575 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6576 Mask, VL);
6577 else
6578 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6579
6580 // The above result with zero input equals to Adjust which is greater than
6581 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6582 if (Op.getOpcode() == ISD::CTLZ)
6583 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6584 else if (Op.getOpcode() == ISD::VP_CTLZ)
6585 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6586 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6587 return Res;
6588}
6589
6590SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6591 SelectionDAG &DAG) const {
6592 SDLoc DL(Op);
6593 MVT XLenVT = Subtarget.getXLenVT();
6594 SDValue Source = Op->getOperand(0);
6595 MVT SrcVT = Source.getSimpleValueType();
6596 SDValue Mask = Op->getOperand(1);
6597 SDValue EVL = Op->getOperand(2);
6598
6599 if (SrcVT.isFixedLengthVector()) {
6600 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6601 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6602 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6603 Subtarget);
6604 SrcVT = ContainerVT;
6605 }
6606
6607 // Convert to boolean vector.
6608 if (SrcVT.getScalarType() != MVT::i1) {
6609 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6610 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6611 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6612 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6613 DAG.getUNDEF(SrcVT), Mask, EVL});
6614 }
6615
6616 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6617 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6618 // In this case, we can interpret poison as -1, so nothing to do further.
6619 return Res;
6620
6621 // Convert -1 to VL.
6622 SDValue SetCC =
6623 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6624 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6625 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6626}
6627
6628// While RVV has alignment restrictions, we should always be able to load as a
6629// legal equivalently-sized byte-typed vector instead. This method is
6630// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6631// the load is already correctly-aligned, it returns SDValue().
6632SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6633 SelectionDAG &DAG) const {
6634 auto *Load = cast<LoadSDNode>(Op);
6635 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6636
6638 Load->getMemoryVT(),
6639 *Load->getMemOperand()))
6640 return SDValue();
6641
6642 SDLoc DL(Op);
6643 MVT VT = Op.getSimpleValueType();
6644 unsigned EltSizeBits = VT.getScalarSizeInBits();
6645 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6646 "Unexpected unaligned RVV load type");
6647 MVT NewVT =
6648 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6649 assert(NewVT.isValid() &&
6650 "Expecting equally-sized RVV vector types to be legal");
6651 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6652 Load->getPointerInfo(), Load->getBaseAlign(),
6653 Load->getMemOperand()->getFlags());
6654 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6655}
6656
6657// While RVV has alignment restrictions, we should always be able to store as a
6658// legal equivalently-sized byte-typed vector instead. This method is
6659// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6660// returns SDValue() if the store is already correctly aligned.
6661SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6662 SelectionDAG &DAG) const {
6663 auto *Store = cast<StoreSDNode>(Op);
6664 assert(Store && Store->getValue().getValueType().isVector() &&
6665 "Expected vector store");
6666
6668 Store->getMemoryVT(),
6669 *Store->getMemOperand()))
6670 return SDValue();
6671
6672 SDLoc DL(Op);
6673 SDValue StoredVal = Store->getValue();
6674 MVT VT = StoredVal.getSimpleValueType();
6675 unsigned EltSizeBits = VT.getScalarSizeInBits();
6676 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6677 "Unexpected unaligned RVV store type");
6678 MVT NewVT =
6679 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6680 assert(NewVT.isValid() &&
6681 "Expecting equally-sized RVV vector types to be legal");
6682 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6683 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6684 Store->getPointerInfo(), Store->getBaseAlign(),
6685 Store->getMemOperand()->getFlags());
6686}
6687
6689 const RISCVSubtarget &Subtarget) {
6690 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6691
6692 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6693
6694 // All simm32 constants should be handled by isel.
6695 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6696 // this check redundant, but small immediates are common so this check
6697 // should have better compile time.
6698 if (isInt<32>(Imm))
6699 return Op;
6700
6701 // We only need to cost the immediate, if constant pool lowering is enabled.
6702 if (!Subtarget.useConstantPoolForLargeInts())
6703 return Op;
6704
6706 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6707 return Op;
6708
6709 // Optimizations below are disabled for opt size. If we're optimizing for
6710 // size, use a constant pool.
6711 if (DAG.shouldOptForSize())
6712 return SDValue();
6713
6714 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6715 // that if it will avoid a constant pool.
6716 // It will require an extra temporary register though.
6717 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6718 // low and high 32 bits are the same and bit 31 and 63 are set.
6719 unsigned ShiftAmt, AddOpc;
6720 RISCVMatInt::InstSeq SeqLo =
6721 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6722 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6723 return Op;
6724
6725 return SDValue();
6726}
6727
6728SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6729 SelectionDAG &DAG) const {
6730 MVT VT = Op.getSimpleValueType();
6731 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6732
6733 // Can this constant be selected by a Zfa FLI instruction?
6734 bool Negate = false;
6735 int Index = getLegalZfaFPImm(Imm, VT);
6736
6737 // If the constant is negative, try negating.
6738 if (Index < 0 && Imm.isNegative()) {
6739 Index = getLegalZfaFPImm(-Imm, VT);
6740 Negate = true;
6741 }
6742
6743 // If we couldn't find a FLI lowering, fall back to generic code.
6744 if (Index < 0)
6745 return SDValue();
6746
6747 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6748 SDLoc DL(Op);
6749 SDValue Const =
6750 DAG.getNode(RISCVISD::FLI, DL, VT,
6751 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6752 if (!Negate)
6753 return Const;
6754
6755 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6756}
6757
6759 SelectionDAG &DAG) {
6760
6761 unsigned IsData = Op.getConstantOperandVal(4);
6762
6763 // mips-p8700 we support data prefetch for now.
6764 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6765 return Op.getOperand(0);
6766 return Op;
6767}
6768
6770 const RISCVSubtarget &Subtarget) {
6771 SDLoc dl(Op);
6772 AtomicOrdering FenceOrdering =
6773 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6774 SyncScope::ID FenceSSID =
6775 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6776
6777 if (Subtarget.hasStdExtZtso()) {
6778 // The only fence that needs an instruction is a sequentially-consistent
6779 // cross-thread fence.
6780 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6781 FenceSSID == SyncScope::System)
6782 return Op;
6783
6784 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6785 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6786 }
6787
6788 // singlethread fences only synchronize with signal handlers on the same
6789 // thread and thus only need to preserve instruction order, not actually
6790 // enforce memory ordering.
6791 if (FenceSSID == SyncScope::SingleThread)
6792 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6793 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6794
6795 return Op;
6796}
6797
6798SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6799 SelectionDAG &DAG) const {
6800 SDLoc DL(Op);
6801 MVT VT = Op.getSimpleValueType();
6802 MVT XLenVT = Subtarget.getXLenVT();
6803 unsigned Check = Op.getConstantOperandVal(1);
6804 unsigned TDCMask = 0;
6805 if (Check & fcSNan)
6806 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6807 if (Check & fcQNan)
6808 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6809 if (Check & fcPosInf)
6811 if (Check & fcNegInf)
6813 if (Check & fcPosNormal)
6815 if (Check & fcNegNormal)
6817 if (Check & fcPosSubnormal)
6819 if (Check & fcNegSubnormal)
6821 if (Check & fcPosZero)
6822 TDCMask |= RISCV::FPMASK_Positive_Zero;
6823 if (Check & fcNegZero)
6824 TDCMask |= RISCV::FPMASK_Negative_Zero;
6825
6826 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6827
6828 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6829
6830 if (VT.isVector()) {
6831 SDValue Op0 = Op.getOperand(0);
6832 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6833
6834 if (VT.isScalableVector()) {
6835 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6836 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6837 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6838 Mask = Op.getOperand(2);
6839 VL = Op.getOperand(3);
6840 }
6841 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6842 VL, Op->getFlags());
6843 if (IsOneBitMask)
6844 return DAG.getSetCC(DL, VT, FPCLASS,
6845 DAG.getConstant(TDCMask, DL, DstVT),
6847 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6848 DAG.getConstant(TDCMask, DL, DstVT));
6849 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6850 ISD::SETNE);
6851 }
6852
6853 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6854 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6855 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6856 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6857 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6858 Mask = Op.getOperand(2);
6859 MVT MaskContainerVT =
6860 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6861 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6862 VL = Op.getOperand(3);
6863 }
6864 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6865
6866 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6867 Mask, VL, Op->getFlags());
6868
6869 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6870 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6871 if (IsOneBitMask) {
6872 SDValue VMSEQ =
6873 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6874 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6875 DAG.getUNDEF(ContainerVT), Mask, VL});
6876 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6877 }
6878 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6879 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6880
6881 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6882 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6883 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6884
6885 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6886 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6887 DAG.getUNDEF(ContainerVT), Mask, VL});
6888 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6889 }
6890
6891 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6892 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6893 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6895 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6896}
6897
6898// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6899// operations propagate nans.
6901 const RISCVSubtarget &Subtarget) {
6902 SDLoc DL(Op);
6903 MVT VT = Op.getSimpleValueType();
6904
6905 SDValue X = Op.getOperand(0);
6906 SDValue Y = Op.getOperand(1);
6907
6908 if (!VT.isVector()) {
6909 MVT XLenVT = Subtarget.getXLenVT();
6910
6911 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6912 // ensures that when one input is a nan, the other will also be a nan
6913 // allowing the nan to propagate. If both inputs are nan, this will swap the
6914 // inputs which is harmless.
6915
6916 SDValue NewY = Y;
6917 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6918 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6919 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6920 }
6921
6922 SDValue NewX = X;
6923 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6924 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6925 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6926 }
6927
6928 unsigned Opc =
6929 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6930 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6931 }
6932
6933 // Check no NaNs before converting to fixed vector scalable.
6934 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6935 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6936
6937 MVT ContainerVT = VT;
6938 if (VT.isFixedLengthVector()) {
6939 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6940 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6941 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6942 }
6943
6944 SDValue Mask, VL;
6945 if (Op->isVPOpcode()) {
6946 Mask = Op.getOperand(2);
6947 if (VT.isFixedLengthVector())
6948 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6949 Subtarget);
6950 VL = Op.getOperand(3);
6951 } else {
6952 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6953 }
6954
6955 SDValue NewY = Y;
6956 if (!XIsNeverNan) {
6957 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6958 {X, X, DAG.getCondCode(ISD::SETOEQ),
6959 DAG.getUNDEF(ContainerVT), Mask, VL});
6960 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6961 DAG.getUNDEF(ContainerVT), VL);
6962 }
6963
6964 SDValue NewX = X;
6965 if (!YIsNeverNan) {
6966 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6967 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6968 DAG.getUNDEF(ContainerVT), Mask, VL});
6969 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6970 DAG.getUNDEF(ContainerVT), VL);
6971 }
6972
6973 unsigned Opc =
6974 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6975 ? RISCVISD::VFMAX_VL
6976 : RISCVISD::VFMIN_VL;
6977 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6978 DAG.getUNDEF(ContainerVT), Mask, VL);
6979 if (VT.isFixedLengthVector())
6980 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6981 return Res;
6982}
6983
6985 const RISCVSubtarget &Subtarget) {
6986 bool IsFABS = Op.getOpcode() == ISD::FABS;
6987 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6988 "Wrong opcode for lowering FABS or FNEG.");
6989
6990 MVT XLenVT = Subtarget.getXLenVT();
6991 MVT VT = Op.getSimpleValueType();
6992 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6993
6994 SDLoc DL(Op);
6995 SDValue Fmv =
6996 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6997
6998 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6999 Mask = Mask.sext(Subtarget.getXLen());
7000
7001 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7002 SDValue Logic =
7003 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7004 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7005}
7006
7008 const RISCVSubtarget &Subtarget) {
7009 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7010
7011 MVT XLenVT = Subtarget.getXLenVT();
7012 MVT VT = Op.getSimpleValueType();
7013 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7014
7015 SDValue Mag = Op.getOperand(0);
7016 SDValue Sign = Op.getOperand(1);
7017
7018 SDLoc DL(Op);
7019
7020 // Get sign bit into an integer value.
7021 unsigned SignSize = Sign.getValueSizeInBits();
7022 SDValue SignAsInt = [&]() {
7023 if (SignSize == Subtarget.getXLen())
7024 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7025 switch (SignSize) {
7026 case 16:
7027 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7028 case 32:
7029 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7030 case 64: {
7031 assert(XLenVT == MVT::i32 && "Unexpected type");
7032 // Copy the upper word to integer.
7033 SignSize = 32;
7034 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7035 .getValue(1);
7036 }
7037 default:
7038 llvm_unreachable("Unexpected sign size");
7039 }
7040 }();
7041
7042 // Get the signbit at the right position for MagAsInt.
7043 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7044 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7045 SignAsInt,
7046 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7047
7048 // Mask the sign bit and any bits above it. The extra bits will be dropped
7049 // when we convert back to FP.
7050 SDValue SignMask = DAG.getConstant(
7051 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7052 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7053
7054 // Transform Mag value to integer, and clear the sign bit.
7055 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7056 SDValue ClearSignMask = DAG.getConstant(
7057 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7058 SDValue ClearedSign =
7059 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7060
7061 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7063
7064 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7065}
7066
7067/// Get a RISC-V target specified VL op for a given SDNode.
7068static unsigned getRISCVVLOp(SDValue Op) {
7069#define OP_CASE(NODE) \
7070 case ISD::NODE: \
7071 return RISCVISD::NODE##_VL;
7072#define VP_CASE(NODE) \
7073 case ISD::VP_##NODE: \
7074 return RISCVISD::NODE##_VL;
7075 // clang-format off
7076 switch (Op.getOpcode()) {
7077 default:
7078 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7079 OP_CASE(ADD)
7080 OP_CASE(SUB)
7081 OP_CASE(MUL)
7082 OP_CASE(MULHS)
7083 OP_CASE(MULHU)
7084 OP_CASE(SDIV)
7085 OP_CASE(SREM)
7086 OP_CASE(UDIV)
7087 OP_CASE(UREM)
7088 OP_CASE(SHL)
7089 OP_CASE(SRA)
7090 OP_CASE(SRL)
7091 OP_CASE(ROTL)
7092 OP_CASE(ROTR)
7093 OP_CASE(BSWAP)
7094 OP_CASE(CTTZ)
7095 OP_CASE(CTLZ)
7096 OP_CASE(CTPOP)
7097 OP_CASE(BITREVERSE)
7098 OP_CASE(SADDSAT)
7099 OP_CASE(UADDSAT)
7100 OP_CASE(SSUBSAT)
7101 OP_CASE(USUBSAT)
7102 OP_CASE(AVGFLOORS)
7103 OP_CASE(AVGFLOORU)
7104 OP_CASE(AVGCEILS)
7105 OP_CASE(AVGCEILU)
7106 OP_CASE(FADD)
7107 OP_CASE(FSUB)
7108 OP_CASE(FMUL)
7109 OP_CASE(FDIV)
7110 OP_CASE(FNEG)
7111 OP_CASE(FABS)
7112 OP_CASE(FCOPYSIGN)
7113 OP_CASE(FSQRT)
7114 OP_CASE(SMIN)
7115 OP_CASE(SMAX)
7116 OP_CASE(UMIN)
7117 OP_CASE(UMAX)
7118 OP_CASE(STRICT_FADD)
7119 OP_CASE(STRICT_FSUB)
7120 OP_CASE(STRICT_FMUL)
7121 OP_CASE(STRICT_FDIV)
7122 OP_CASE(STRICT_FSQRT)
7123 VP_CASE(ADD) // VP_ADD
7124 VP_CASE(SUB) // VP_SUB
7125 VP_CASE(MUL) // VP_MUL
7126 VP_CASE(SDIV) // VP_SDIV
7127 VP_CASE(SREM) // VP_SREM
7128 VP_CASE(UDIV) // VP_UDIV
7129 VP_CASE(UREM) // VP_UREM
7130 VP_CASE(SHL) // VP_SHL
7131 VP_CASE(FADD) // VP_FADD
7132 VP_CASE(FSUB) // VP_FSUB
7133 VP_CASE(FMUL) // VP_FMUL
7134 VP_CASE(FDIV) // VP_FDIV
7135 VP_CASE(FNEG) // VP_FNEG
7136 VP_CASE(FABS) // VP_FABS
7137 VP_CASE(SMIN) // VP_SMIN
7138 VP_CASE(SMAX) // VP_SMAX
7139 VP_CASE(UMIN) // VP_UMIN
7140 VP_CASE(UMAX) // VP_UMAX
7141 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7142 VP_CASE(SETCC) // VP_SETCC
7143 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7144 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7145 VP_CASE(BITREVERSE) // VP_BITREVERSE
7146 VP_CASE(SADDSAT) // VP_SADDSAT
7147 VP_CASE(UADDSAT) // VP_UADDSAT
7148 VP_CASE(SSUBSAT) // VP_SSUBSAT
7149 VP_CASE(USUBSAT) // VP_USUBSAT
7150 VP_CASE(BSWAP) // VP_BSWAP
7151 VP_CASE(CTLZ) // VP_CTLZ
7152 VP_CASE(CTTZ) // VP_CTTZ
7153 VP_CASE(CTPOP) // VP_CTPOP
7155 case ISD::VP_CTLZ_ZERO_UNDEF:
7156 return RISCVISD::CTLZ_VL;
7158 case ISD::VP_CTTZ_ZERO_UNDEF:
7159 return RISCVISD::CTTZ_VL;
7160 case ISD::FMA:
7161 case ISD::VP_FMA:
7162 return RISCVISD::VFMADD_VL;
7163 case ISD::STRICT_FMA:
7164 return RISCVISD::STRICT_VFMADD_VL;
7165 case ISD::AND:
7166 case ISD::VP_AND:
7167 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7168 return RISCVISD::VMAND_VL;
7169 return RISCVISD::AND_VL;
7170 case ISD::OR:
7171 case ISD::VP_OR:
7172 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7173 return RISCVISD::VMOR_VL;
7174 return RISCVISD::OR_VL;
7175 case ISD::XOR:
7176 case ISD::VP_XOR:
7177 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7178 return RISCVISD::VMXOR_VL;
7179 return RISCVISD::XOR_VL;
7180 case ISD::ANY_EXTEND:
7181 case ISD::ZERO_EXTEND:
7182 return RISCVISD::VZEXT_VL;
7183 case ISD::SIGN_EXTEND:
7184 return RISCVISD::VSEXT_VL;
7185 case ISD::SETCC:
7186 return RISCVISD::SETCC_VL;
7187 case ISD::VSELECT:
7188 return RISCVISD::VMERGE_VL;
7189 case ISD::VP_SELECT:
7190 case ISD::VP_MERGE:
7191 return RISCVISD::VMERGE_VL;
7192 case ISD::VP_SRA:
7193 return RISCVISD::SRA_VL;
7194 case ISD::VP_SRL:
7195 return RISCVISD::SRL_VL;
7196 case ISD::VP_SQRT:
7197 return RISCVISD::FSQRT_VL;
7198 case ISD::VP_SIGN_EXTEND:
7199 return RISCVISD::VSEXT_VL;
7200 case ISD::VP_ZERO_EXTEND:
7201 return RISCVISD::VZEXT_VL;
7202 case ISD::VP_FP_TO_SINT:
7203 return RISCVISD::VFCVT_RTZ_X_F_VL;
7204 case ISD::VP_FP_TO_UINT:
7205 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7206 case ISD::FMINNUM:
7207 case ISD::FMINIMUMNUM:
7208 case ISD::VP_FMINNUM:
7209 return RISCVISD::VFMIN_VL;
7210 case ISD::FMAXNUM:
7211 case ISD::FMAXIMUMNUM:
7212 case ISD::VP_FMAXNUM:
7213 return RISCVISD::VFMAX_VL;
7214 case ISD::LRINT:
7215 case ISD::VP_LRINT:
7216 case ISD::LLRINT:
7217 case ISD::VP_LLRINT:
7218 return RISCVISD::VFCVT_RM_X_F_VL;
7219 }
7220 // clang-format on
7221#undef OP_CASE
7222#undef VP_CASE
7223}
7224
7226 const RISCVSubtarget &Subtarget) {
7227 return (Op.getValueType() == MVT::nxv32f16 &&
7228 (Subtarget.hasVInstructionsF16Minimal() &&
7229 !Subtarget.hasVInstructionsF16())) ||
7230 Op.getValueType() == MVT::nxv32bf16;
7231}
7232
7234 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7235 SDLoc DL(Op);
7236
7237 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7238 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7239
7240 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7241 if (!Op.getOperand(j).getValueType().isVector()) {
7242 LoOperands[j] = Op.getOperand(j);
7243 HiOperands[j] = Op.getOperand(j);
7244 continue;
7245 }
7246 std::tie(LoOperands[j], HiOperands[j]) =
7247 DAG.SplitVector(Op.getOperand(j), DL);
7248 }
7249
7250 SDValue LoRes =
7251 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7252 SDValue HiRes =
7253 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7254
7255 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7256}
7257
7259 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7260 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7261 SDLoc DL(Op);
7262
7263 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7264 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7265
7266 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7267 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7268 std::tie(LoOperands[j], HiOperands[j]) =
7269 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7270 continue;
7271 }
7272 if (!Op.getOperand(j).getValueType().isVector()) {
7273 LoOperands[j] = Op.getOperand(j);
7274 HiOperands[j] = Op.getOperand(j);
7275 continue;
7276 }
7277 std::tie(LoOperands[j], HiOperands[j]) =
7278 DAG.SplitVector(Op.getOperand(j), DL);
7279 }
7280
7281 SDValue LoRes =
7282 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7283 SDValue HiRes =
7284 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7285
7286 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7287}
7288
7290 SDLoc DL(Op);
7291
7292 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7293 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7294 auto [EVLLo, EVLHi] =
7295 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7296
7297 SDValue ResLo =
7298 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7299 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7300 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7301 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7302}
7303
7305
7306 assert(Op->isStrictFPOpcode());
7307
7308 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7309
7310 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7311 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7312
7313 SDLoc DL(Op);
7314
7315 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7316 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7317
7318 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7319 if (!Op.getOperand(j).getValueType().isVector()) {
7320 LoOperands[j] = Op.getOperand(j);
7321 HiOperands[j] = Op.getOperand(j);
7322 continue;
7323 }
7324 std::tie(LoOperands[j], HiOperands[j]) =
7325 DAG.SplitVector(Op.getOperand(j), DL);
7326 }
7327
7328 SDValue LoRes =
7329 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7330 HiOperands[0] = LoRes.getValue(1);
7331 SDValue HiRes =
7332 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7333
7334 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7335 LoRes.getValue(0), HiRes.getValue(0));
7336 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7337}
7338
7339SDValue
7340RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7341 SelectionDAG &DAG) const {
7342 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7343 "Unexpected bfloat16 load lowering");
7344
7345 SDLoc DL(Op);
7346 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7347 EVT MemVT = LD->getMemoryVT();
7348 SDValue Load = DAG.getExtLoad(
7349 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7350 LD->getBasePtr(),
7352 LD->getMemOperand());
7353 // Using mask to make bf16 nan-boxing valid when we don't have flh
7354 // instruction. -65536 would be treat as a small number and thus it can be
7355 // directly used lui to get the constant.
7356 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7357 SDValue OrSixteenOne =
7358 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7359 SDValue ConvertedResult =
7360 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7361 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7362}
7363
7364SDValue
7365RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7366 SelectionDAG &DAG) const {
7367 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7368 "Unexpected bfloat16 store lowering");
7369
7370 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7371 SDLoc DL(Op);
7372 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7373 Subtarget.getXLenVT(), ST->getValue());
7374 return DAG.getTruncStore(
7375 ST->getChain(), DL, FMV, ST->getBasePtr(),
7376 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7377 ST->getMemOperand());
7378}
7379
7381 SelectionDAG &DAG) const {
7382 switch (Op.getOpcode()) {
7383 default:
7385 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7386 case ISD::PREFETCH:
7387 return LowerPREFETCH(Op, Subtarget, DAG);
7388 case ISD::ATOMIC_FENCE:
7389 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7390 case ISD::GlobalAddress:
7391 return lowerGlobalAddress(Op, DAG);
7392 case ISD::BlockAddress:
7393 return lowerBlockAddress(Op, DAG);
7394 case ISD::ConstantPool:
7395 return lowerConstantPool(Op, DAG);
7396 case ISD::JumpTable:
7397 return lowerJumpTable(Op, DAG);
7399 return lowerGlobalTLSAddress(Op, DAG);
7400 case ISD::Constant:
7401 return lowerConstant(Op, DAG, Subtarget);
7402 case ISD::ConstantFP:
7403 return lowerConstantFP(Op, DAG);
7404 case ISD::SELECT:
7405 return lowerSELECT(Op, DAG);
7406 case ISD::BRCOND:
7407 return lowerBRCOND(Op, DAG);
7408 case ISD::VASTART:
7409 return lowerVASTART(Op, DAG);
7410 case ISD::FRAMEADDR:
7411 return lowerFRAMEADDR(Op, DAG);
7412 case ISD::RETURNADDR:
7413 return lowerRETURNADDR(Op, DAG);
7414 case ISD::SHL_PARTS:
7415 return lowerShiftLeftParts(Op, DAG);
7416 case ISD::SRA_PARTS:
7417 return lowerShiftRightParts(Op, DAG, true);
7418 case ISD::SRL_PARTS:
7419 return lowerShiftRightParts(Op, DAG, false);
7420 case ISD::ROTL:
7421 case ISD::ROTR:
7422 if (Op.getValueType().isFixedLengthVector()) {
7423 assert(Subtarget.hasStdExtZvkb());
7424 return lowerToScalableOp(Op, DAG);
7425 }
7426 assert(Subtarget.hasVendorXTHeadBb() &&
7427 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7428 "Unexpected custom legalization");
7429 // XTHeadBb only supports rotate by constant.
7430 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7431 return SDValue();
7432 return Op;
7433 case ISD::BITCAST: {
7434 SDLoc DL(Op);
7435 EVT VT = Op.getValueType();
7436 SDValue Op0 = Op.getOperand(0);
7437 EVT Op0VT = Op0.getValueType();
7438 MVT XLenVT = Subtarget.getXLenVT();
7439 if (Op0VT == MVT::i16 &&
7440 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7441 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7442 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7443 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7444 }
7445 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7446 Subtarget.hasStdExtFOrZfinx()) {
7447 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7448 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7449 }
7450 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7451 Subtarget.hasStdExtDOrZdinx()) {
7452 SDValue Lo, Hi;
7453 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7454 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7455 }
7456
7457 // Consider other scalar<->scalar casts as legal if the types are legal.
7458 // Otherwise expand them.
7459 if (!VT.isVector() && !Op0VT.isVector()) {
7460 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7461 return Op;
7462 return SDValue();
7463 }
7464
7465 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7466 "Unexpected types");
7467
7468 if (VT.isFixedLengthVector()) {
7469 // We can handle fixed length vector bitcasts with a simple replacement
7470 // in isel.
7471 if (Op0VT.isFixedLengthVector())
7472 return Op;
7473 // When bitcasting from scalar to fixed-length vector, insert the scalar
7474 // into a one-element vector of the result type, and perform a vector
7475 // bitcast.
7476 if (!Op0VT.isVector()) {
7477 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7478 if (!isTypeLegal(BVT))
7479 return SDValue();
7480 return DAG.getBitcast(
7481 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7482 }
7483 return SDValue();
7484 }
7485 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7486 // thus: bitcast the vector to a one-element vector type whose element type
7487 // is the same as the result type, and extract the first element.
7488 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7489 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7490 if (!isTypeLegal(BVT))
7491 return SDValue();
7492 SDValue BVec = DAG.getBitcast(BVT, Op0);
7493 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7494 }
7495 return SDValue();
7496 }
7498 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7500 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7502 return LowerINTRINSIC_VOID(Op, DAG);
7503 case ISD::IS_FPCLASS:
7504 return LowerIS_FPCLASS(Op, DAG);
7505 case ISD::BITREVERSE: {
7506 MVT VT = Op.getSimpleValueType();
7507 if (VT.isFixedLengthVector()) {
7508 assert(Subtarget.hasStdExtZvbb());
7509 return lowerToScalableOp(Op, DAG);
7510 }
7511 SDLoc DL(Op);
7512 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7513 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7514 // Expand bitreverse to a bswap(rev8) followed by brev8.
7515 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7516 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7517 }
7518 case ISD::TRUNCATE:
7521 // Only custom-lower vector truncates
7522 if (!Op.getSimpleValueType().isVector())
7523 return Op;
7524 return lowerVectorTruncLike(Op, DAG);
7525 case ISD::ANY_EXTEND:
7526 case ISD::ZERO_EXTEND:
7527 if (Op.getOperand(0).getValueType().isVector() &&
7528 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7529 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7530 if (Op.getValueType().isScalableVector())
7531 return Op;
7532 return lowerToScalableOp(Op, DAG);
7533 case ISD::SIGN_EXTEND:
7534 if (Op.getOperand(0).getValueType().isVector() &&
7535 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7536 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7537 if (Op.getValueType().isScalableVector())
7538 return Op;
7539 return lowerToScalableOp(Op, DAG);
7541 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7543 return lowerINSERT_VECTOR_ELT(Op, DAG);
7545 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7546 case ISD::SCALAR_TO_VECTOR: {
7547 MVT VT = Op.getSimpleValueType();
7548 SDLoc DL(Op);
7549 SDValue Scalar = Op.getOperand(0);
7550 if (VT.getVectorElementType() == MVT::i1) {
7551 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7552 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7553 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7554 }
7555 MVT ContainerVT = VT;
7556 if (VT.isFixedLengthVector())
7557 ContainerVT = getContainerForFixedLengthVector(VT);
7558 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7559
7560 SDValue V;
7561 if (VT.isFloatingPoint()) {
7562 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7563 DAG.getUNDEF(ContainerVT), Scalar, VL);
7564 } else {
7565 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7566 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7567 DAG.getUNDEF(ContainerVT), Scalar, VL);
7568 }
7569 if (VT.isFixedLengthVector())
7570 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7571 return V;
7572 }
7573 case ISD::VSCALE: {
7574 MVT XLenVT = Subtarget.getXLenVT();
7575 MVT VT = Op.getSimpleValueType();
7576 SDLoc DL(Op);
7577 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7578 // We define our scalable vector types for lmul=1 to use a 64 bit known
7579 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7580 // vscale as VLENB / 8.
7581 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7582 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7583 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7584 // We assume VLENB is a multiple of 8. We manually choose the best shift
7585 // here because SimplifyDemandedBits isn't always able to simplify it.
7586 uint64_t Val = Op.getConstantOperandVal(0);
7587 if (isPowerOf2_64(Val)) {
7588 uint64_t Log2 = Log2_64(Val);
7589 if (Log2 < 3) {
7590 SDNodeFlags Flags;
7591 Flags.setExact(true);
7592 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7593 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7594 } else if (Log2 > 3) {
7595 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7596 DAG.getConstant(Log2 - 3, DL, XLenVT));
7597 }
7598 } else if ((Val % 8) == 0) {
7599 // If the multiplier is a multiple of 8, scale it down to avoid needing
7600 // to shift the VLENB value.
7601 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7602 DAG.getConstant(Val / 8, DL, XLenVT));
7603 } else {
7604 SDNodeFlags Flags;
7605 Flags.setExact(true);
7606 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7607 DAG.getConstant(3, DL, XLenVT), Flags);
7608 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7609 DAG.getConstant(Val, DL, XLenVT));
7610 }
7611 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7612 }
7613 case ISD::FPOWI: {
7614 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7615 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7616 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7617 Op.getOperand(1).getValueType() == MVT::i32) {
7618 SDLoc DL(Op);
7619 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7620 SDValue Powi =
7621 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7622 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7623 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7624 }
7625 return SDValue();
7626 }
7627 case ISD::FMAXIMUM:
7628 case ISD::FMINIMUM:
7629 if (isPromotedOpNeedingSplit(Op, Subtarget))
7630 return SplitVectorOp(Op, DAG);
7631 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7632 case ISD::FP_EXTEND:
7633 case ISD::FP_ROUND:
7634 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7637 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7638 case ISD::SINT_TO_FP:
7639 case ISD::UINT_TO_FP:
7640 if (Op.getValueType().isVector() &&
7641 ((Op.getValueType().getScalarType() == MVT::f16 &&
7642 (Subtarget.hasVInstructionsF16Minimal() &&
7643 !Subtarget.hasVInstructionsF16())) ||
7644 Op.getValueType().getScalarType() == MVT::bf16)) {
7645 if (isPromotedOpNeedingSplit(Op, Subtarget))
7646 return SplitVectorOp(Op, DAG);
7647 // int -> f32
7648 SDLoc DL(Op);
7649 MVT NVT =
7650 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7651 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7652 // f32 -> [b]f16
7653 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7654 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7655 }
7656 [[fallthrough]];
7657 case ISD::FP_TO_SINT:
7658 case ISD::FP_TO_UINT:
7659 if (SDValue Op1 = Op.getOperand(0);
7660 Op1.getValueType().isVector() &&
7661 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7662 (Subtarget.hasVInstructionsF16Minimal() &&
7663 !Subtarget.hasVInstructionsF16())) ||
7664 Op1.getValueType().getScalarType() == MVT::bf16)) {
7665 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7666 return SplitVectorOp(Op, DAG);
7667 // [b]f16 -> f32
7668 SDLoc DL(Op);
7669 MVT NVT = MVT::getVectorVT(MVT::f32,
7670 Op1.getValueType().getVectorElementCount());
7671 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7672 // f32 -> int
7673 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7674 }
7675 [[fallthrough]];
7680 // RVV can only do fp<->int conversions to types half/double the size as
7681 // the source. We custom-lower any conversions that do two hops into
7682 // sequences.
7683 MVT VT = Op.getSimpleValueType();
7684 if (VT.isScalarInteger())
7685 return lowerFP_TO_INT(Op, DAG, Subtarget);
7686 bool IsStrict = Op->isStrictFPOpcode();
7687 SDValue Src = Op.getOperand(0 + IsStrict);
7688 MVT SrcVT = Src.getSimpleValueType();
7689 if (SrcVT.isScalarInteger())
7690 return lowerINT_TO_FP(Op, DAG, Subtarget);
7691 if (!VT.isVector())
7692 return Op;
7693 SDLoc DL(Op);
7694 MVT EltVT = VT.getVectorElementType();
7695 MVT SrcEltVT = SrcVT.getVectorElementType();
7696 unsigned EltSize = EltVT.getSizeInBits();
7697 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7698 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7699 "Unexpected vector element types");
7700
7701 bool IsInt2FP = SrcEltVT.isInteger();
7702 // Widening conversions
7703 if (EltSize > (2 * SrcEltSize)) {
7704 if (IsInt2FP) {
7705 // Do a regular integer sign/zero extension then convert to float.
7706 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7708 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7709 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7712 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7713 if (IsStrict)
7714 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7715 Op.getOperand(0), Ext);
7716 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7717 }
7718 // FP2Int
7719 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7720 // Do one doubling fp_extend then complete the operation by converting
7721 // to int.
7722 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7723 if (IsStrict) {
7724 auto [FExt, Chain] =
7725 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7726 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7727 }
7728 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7729 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7730 }
7731
7732 // Narrowing conversions
7733 if (SrcEltSize > (2 * EltSize)) {
7734 if (IsInt2FP) {
7735 // One narrowing int_to_fp, then an fp_round.
7736 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7737 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7738 if (IsStrict) {
7739 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7740 DAG.getVTList(InterimFVT, MVT::Other),
7741 Op.getOperand(0), Src);
7742 SDValue Chain = Int2FP.getValue(1);
7743 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7744 }
7745 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7746 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7747 }
7748 // FP2Int
7749 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7750 // representable by the integer, the result is poison.
7751 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7753 if (IsStrict) {
7754 SDValue FP2Int =
7755 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7756 Op.getOperand(0), Src);
7757 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7758 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7759 }
7760 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7761 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7762 }
7763
7764 // Scalable vectors can exit here. Patterns will handle equally-sized
7765 // conversions halving/doubling ones.
7766 if (!VT.isFixedLengthVector())
7767 return Op;
7768
7769 // For fixed-length vectors we lower to a custom "VL" node.
7770 unsigned RVVOpc = 0;
7771 switch (Op.getOpcode()) {
7772 default:
7773 llvm_unreachable("Impossible opcode");
7774 case ISD::FP_TO_SINT:
7775 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7776 break;
7777 case ISD::FP_TO_UINT:
7778 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7779 break;
7780 case ISD::SINT_TO_FP:
7781 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7782 break;
7783 case ISD::UINT_TO_FP:
7784 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7785 break;
7787 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7788 break;
7790 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7791 break;
7793 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7794 break;
7796 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7797 break;
7798 }
7799
7800 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7801 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7802 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7803 "Expected same element count");
7804
7805 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7806
7807 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7808 if (IsStrict) {
7809 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7810 Op.getOperand(0), Src, Mask, VL);
7811 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7812 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7813 }
7814 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7815 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7816 }
7819 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7820 case ISD::FP_TO_BF16: {
7821 // Custom lower to ensure the libcall return is passed in an FPR on hard
7822 // float ABIs.
7823 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7824 SDLoc DL(Op);
7825 MakeLibCallOptions CallOptions;
7826 RTLIB::Libcall LC =
7827 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7828 SDValue Res =
7829 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7830 if (Subtarget.is64Bit())
7831 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7832 return DAG.getBitcast(MVT::i32, Res);
7833 }
7834 case ISD::BF16_TO_FP: {
7835 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7836 MVT VT = Op.getSimpleValueType();
7837 SDLoc DL(Op);
7838 Op = DAG.getNode(
7839 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7840 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7841 SDValue Res = Subtarget.is64Bit()
7842 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7843 : DAG.getBitcast(MVT::f32, Op);
7844 // fp_extend if the target VT is bigger than f32.
7845 if (VT != MVT::f32)
7846 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7847 return Res;
7848 }
7849 case ISD::STRICT_FP_TO_FP16:
7850 case ISD::FP_TO_FP16: {
7851 // Custom lower to ensure the libcall return is passed in an FPR on hard
7852 // float ABIs.
7853 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7854 SDLoc DL(Op);
7855 MakeLibCallOptions CallOptions;
7856 bool IsStrict = Op->isStrictFPOpcode();
7857 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7858 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7859 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7860 SDValue Res;
7861 std::tie(Res, Chain) =
7862 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7863 if (Subtarget.is64Bit())
7864 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7865 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7866 if (IsStrict)
7867 return DAG.getMergeValues({Result, Chain}, DL);
7868 return Result;
7869 }
7870 case ISD::STRICT_FP16_TO_FP:
7871 case ISD::FP16_TO_FP: {
7872 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7873 // float ABIs.
7874 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7875 SDLoc DL(Op);
7876 MakeLibCallOptions CallOptions;
7877 bool IsStrict = Op->isStrictFPOpcode();
7878 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7879 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7880 SDValue Arg = Subtarget.is64Bit()
7881 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7882 : DAG.getBitcast(MVT::f32, Op0);
7883 SDValue Res;
7884 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7885 CallOptions, DL, Chain);
7886 if (IsStrict)
7887 return DAG.getMergeValues({Res, Chain}, DL);
7888 return Res;
7889 }
7890 case ISD::FTRUNC:
7891 case ISD::FCEIL:
7892 case ISD::FFLOOR:
7893 case ISD::FNEARBYINT:
7894 case ISD::FRINT:
7895 case ISD::FROUND:
7896 case ISD::FROUNDEVEN:
7897 if (isPromotedOpNeedingSplit(Op, Subtarget))
7898 return SplitVectorOp(Op, DAG);
7899 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7900 case ISD::LRINT:
7901 case ISD::LLRINT:
7902 case ISD::LROUND:
7903 case ISD::LLROUND: {
7904 if (Op.getValueType().isVector())
7905 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7906 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7907 "Unexpected custom legalisation");
7908 SDLoc DL(Op);
7909 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7910 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7911 }
7912 case ISD::STRICT_LRINT:
7913 case ISD::STRICT_LLRINT:
7914 case ISD::STRICT_LROUND:
7915 case ISD::STRICT_LLROUND: {
7916 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7917 "Unexpected custom legalisation");
7918 SDLoc DL(Op);
7919 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7920 {Op.getOperand(0), Op.getOperand(1)});
7921 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7922 {Ext.getValue(1), Ext.getValue(0)});
7923 }
7924 case ISD::VECREDUCE_ADD:
7925 case ISD::VECREDUCE_UMAX:
7926 case ISD::VECREDUCE_SMAX:
7927 case ISD::VECREDUCE_UMIN:
7928 case ISD::VECREDUCE_SMIN:
7929 return lowerVECREDUCE(Op, DAG);
7930 case ISD::VECREDUCE_AND:
7931 case ISD::VECREDUCE_OR:
7932 case ISD::VECREDUCE_XOR:
7933 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7934 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7935 return lowerVECREDUCE(Op, DAG);
7936 case ISD::VECREDUCE_FADD:
7937 case ISD::VECREDUCE_SEQ_FADD:
7938 case ISD::VECREDUCE_FMIN:
7939 case ISD::VECREDUCE_FMAX:
7940 case ISD::VECREDUCE_FMAXIMUM:
7941 case ISD::VECREDUCE_FMINIMUM:
7942 return lowerFPVECREDUCE(Op, DAG);
7943 case ISD::VP_REDUCE_ADD:
7944 case ISD::VP_REDUCE_UMAX:
7945 case ISD::VP_REDUCE_SMAX:
7946 case ISD::VP_REDUCE_UMIN:
7947 case ISD::VP_REDUCE_SMIN:
7948 case ISD::VP_REDUCE_FADD:
7949 case ISD::VP_REDUCE_SEQ_FADD:
7950 case ISD::VP_REDUCE_FMIN:
7951 case ISD::VP_REDUCE_FMAX:
7952 case ISD::VP_REDUCE_FMINIMUM:
7953 case ISD::VP_REDUCE_FMAXIMUM:
7954 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7955 return SplitVectorReductionOp(Op, DAG);
7956 return lowerVPREDUCE(Op, DAG);
7957 case ISD::VP_REDUCE_AND:
7958 case ISD::VP_REDUCE_OR:
7959 case ISD::VP_REDUCE_XOR:
7960 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7961 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7962 return lowerVPREDUCE(Op, DAG);
7963 case ISD::VP_CTTZ_ELTS:
7964 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7965 return lowerVPCttzElements(Op, DAG);
7966 case ISD::UNDEF: {
7967 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7968 return convertFromScalableVector(Op.getSimpleValueType(),
7969 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7970 }
7972 return lowerINSERT_SUBVECTOR(Op, DAG);
7974 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7976 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7978 return lowerVECTOR_INTERLEAVE(Op, DAG);
7979 case ISD::STEP_VECTOR:
7980 return lowerSTEP_VECTOR(Op, DAG);
7982 return lowerVECTOR_REVERSE(Op, DAG);
7983 case ISD::VECTOR_SPLICE:
7984 return lowerVECTOR_SPLICE(Op, DAG);
7985 case ISD::BUILD_VECTOR: {
7986 MVT VT = Op.getSimpleValueType();
7987 MVT EltVT = VT.getVectorElementType();
7988 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7989 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7990 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7991 }
7992 case ISD::SPLAT_VECTOR: {
7993 MVT VT = Op.getSimpleValueType();
7994 MVT EltVT = VT.getVectorElementType();
7995 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7996 EltVT == MVT::bf16) {
7997 SDLoc DL(Op);
7998 SDValue Elt;
7999 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8000 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8001 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8002 Op.getOperand(0));
8003 else
8004 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8005 MVT IVT = VT.changeVectorElementType(MVT::i16);
8006 return DAG.getNode(ISD::BITCAST, DL, VT,
8007 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8008 }
8009
8010 if (EltVT == MVT::i1)
8011 return lowerVectorMaskSplat(Op, DAG);
8012 return SDValue();
8013 }
8015 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8016 case ISD::CONCAT_VECTORS: {
8017 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8018 // better than going through the stack, as the default expansion does.
8019 SDLoc DL(Op);
8020 MVT VT = Op.getSimpleValueType();
8021 MVT ContainerVT = VT;
8022 if (VT.isFixedLengthVector())
8023 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8024
8025 // Recursively split concat_vectors with more than 2 operands:
8026 //
8027 // concat_vector op1, op2, op3, op4
8028 // ->
8029 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8030 //
8031 // This reduces the length of the chain of vslideups and allows us to
8032 // perform the vslideups at a smaller LMUL, limited to MF2.
8033 if (Op.getNumOperands() > 2 &&
8034 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8035 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8036 assert(isPowerOf2_32(Op.getNumOperands()));
8037 size_t HalfNumOps = Op.getNumOperands() / 2;
8038 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8039 Op->ops().take_front(HalfNumOps));
8040 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8041 Op->ops().drop_front(HalfNumOps));
8042 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8043 }
8044
8045 unsigned NumOpElts =
8046 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8047 SDValue Vec = DAG.getUNDEF(VT);
8048 for (const auto &OpIdx : enumerate(Op->ops())) {
8049 SDValue SubVec = OpIdx.value();
8050 // Don't insert undef subvectors.
8051 if (SubVec.isUndef())
8052 continue;
8053 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8054 }
8055 return Vec;
8056 }
8057 case ISD::LOAD: {
8058 auto *Load = cast<LoadSDNode>(Op);
8059 EVT VT = Load->getValueType(0);
8060 if (VT == MVT::f64) {
8061 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8062 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8063
8064 // Replace a double precision load with two i32 loads and a BuildPairF64.
8065 SDLoc DL(Op);
8066 SDValue BasePtr = Load->getBasePtr();
8067 SDValue Chain = Load->getChain();
8068
8069 SDValue Lo =
8070 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8071 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8072 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8073 SDValue Hi = DAG.getLoad(
8074 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8075 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8076 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8077 Hi.getValue(1));
8078
8079 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8080 return DAG.getMergeValues({Pair, Chain}, DL);
8081 }
8082
8083 if (VT == MVT::bf16)
8084 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8085
8086 // Handle normal vector tuple load.
8087 if (VT.isRISCVVectorTuple()) {
8088 SDLoc DL(Op);
8089 MVT XLenVT = Subtarget.getXLenVT();
8090 unsigned NF = VT.getRISCVVectorTupleNumFields();
8091 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8092 unsigned NumElts = Sz / (NF * 8);
8093 int Log2LMUL = Log2_64(NumElts) - 3;
8094
8095 auto Flag = SDNodeFlags();
8096 Flag.setNoUnsignedWrap(true);
8097 SDValue Ret = DAG.getUNDEF(VT);
8098 SDValue BasePtr = Load->getBasePtr();
8099 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8100 VROffset =
8101 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8102 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8103 SmallVector<SDValue, 8> OutChains;
8104
8105 // Load NF vector registers and combine them to a vector tuple.
8106 for (unsigned i = 0; i < NF; ++i) {
8107 SDValue LoadVal = DAG.getLoad(
8108 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8109 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8110 OutChains.push_back(LoadVal.getValue(1));
8111 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8112 DAG.getTargetConstant(i, DL, MVT::i32));
8113 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8114 }
8115 return DAG.getMergeValues(
8116 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8117 }
8118
8119 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8120 return V;
8121 if (Op.getValueType().isFixedLengthVector())
8122 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8123 return Op;
8124 }
8125 case ISD::STORE: {
8126 auto *Store = cast<StoreSDNode>(Op);
8127 SDValue StoredVal = Store->getValue();
8128 EVT VT = StoredVal.getValueType();
8129 if (VT == MVT::f64) {
8130 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8131 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8132
8133 // Replace a double precision store with a SplitF64 and i32 stores.
8134 SDValue DL(Op);
8135 SDValue BasePtr = Store->getBasePtr();
8136 SDValue Chain = Store->getChain();
8137 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8138 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8139
8140 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8141 Store->getPointerInfo(), Store->getBaseAlign(),
8142 Store->getMemOperand()->getFlags());
8143 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8144 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8145 Store->getPointerInfo().getWithOffset(4),
8146 Store->getBaseAlign(),
8147 Store->getMemOperand()->getFlags());
8148 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8149 }
8150 if (VT == MVT::i64) {
8151 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8152 "Unexpected custom legalisation");
8153 if (Store->isTruncatingStore())
8154 return SDValue();
8155
8156 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8157 return SDValue();
8158
8159 SDLoc DL(Op);
8160 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8161 DAG.getTargetConstant(0, DL, MVT::i32));
8162 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8163 DAG.getTargetConstant(1, DL, MVT::i32));
8164
8165 return DAG.getMemIntrinsicNode(
8166 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8167 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8168 Store->getMemOperand());
8169 }
8170
8171 if (VT == MVT::bf16)
8172 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8173
8174 // Handle normal vector tuple store.
8175 if (VT.isRISCVVectorTuple()) {
8176 SDLoc DL(Op);
8177 MVT XLenVT = Subtarget.getXLenVT();
8178 unsigned NF = VT.getRISCVVectorTupleNumFields();
8179 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8180 unsigned NumElts = Sz / (NF * 8);
8181 int Log2LMUL = Log2_64(NumElts) - 3;
8182
8183 auto Flag = SDNodeFlags();
8184 Flag.setNoUnsignedWrap(true);
8185 SDValue Ret;
8186 SDValue Chain = Store->getChain();
8187 SDValue BasePtr = Store->getBasePtr();
8188 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8189 VROffset =
8190 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8191 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8192
8193 // Extract subregisters in a vector tuple and store them individually.
8194 for (unsigned i = 0; i < NF; ++i) {
8195 auto Extract =
8196 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8197 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8198 DAG.getTargetConstant(i, DL, MVT::i32));
8199 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8200 MachinePointerInfo(Store->getAddressSpace()),
8201 Store->getBaseAlign(),
8202 Store->getMemOperand()->getFlags());
8203 Chain = Ret.getValue(0);
8204 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8205 }
8206 return Ret;
8207 }
8208
8209 if (auto V = expandUnalignedRVVStore(Op, DAG))
8210 return V;
8211 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8212 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8213 return Op;
8214 }
8215 case ISD::MLOAD:
8216 case ISD::VP_LOAD:
8217 return lowerMaskedLoad(Op, DAG);
8218 case ISD::VP_LOAD_FF:
8219 return lowerLoadFF(Op, DAG);
8220 case ISD::MSTORE:
8221 case ISD::VP_STORE:
8222 return lowerMaskedStore(Op, DAG);
8224 return lowerVectorCompress(Op, DAG);
8225 case ISD::SELECT_CC: {
8226 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8227 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8228 // into separate SETCC+SELECT just like LegalizeDAG.
8229 SDValue Tmp1 = Op.getOperand(0);
8230 SDValue Tmp2 = Op.getOperand(1);
8231 SDValue True = Op.getOperand(2);
8232 SDValue False = Op.getOperand(3);
8233 EVT VT = Op.getValueType();
8234 SDValue CC = Op.getOperand(4);
8235 EVT CmpVT = Tmp1.getValueType();
8236 EVT CCVT =
8237 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8238 SDLoc DL(Op);
8239 SDValue Cond =
8240 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8241 return DAG.getSelect(DL, VT, Cond, True, False);
8242 }
8243 case ISD::SETCC: {
8244 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8245 if (OpVT.isScalarInteger()) {
8246 MVT VT = Op.getSimpleValueType();
8247 SDValue LHS = Op.getOperand(0);
8248 SDValue RHS = Op.getOperand(1);
8249 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8250 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8251 "Unexpected CondCode");
8252
8253 SDLoc DL(Op);
8254
8255 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8256 // convert this to the equivalent of (set(u)ge X, C+1) by using
8257 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8258 // in a register.
8259 if (isa<ConstantSDNode>(RHS)) {
8260 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8261 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8262 // If this is an unsigned compare and the constant is -1, incrementing
8263 // the constant would change behavior. The result should be false.
8264 if (CCVal == ISD::SETUGT && Imm == -1)
8265 return DAG.getConstant(0, DL, VT);
8266 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8267 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8268 SDValue SetCC = DAG.getSetCC(
8269 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8270 return DAG.getLogicalNOT(DL, SetCC, VT);
8271 }
8272 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8273 if (CCVal == ISD::SETUGT && Imm == 2047) {
8274 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8275 DAG.getShiftAmountConstant(11, OpVT, DL));
8276 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8277 ISD::SETNE);
8278 }
8279 }
8280
8281 // Not a constant we could handle, swap the operands and condition code to
8282 // SETLT/SETULT.
8283 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8284 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8285 }
8286
8287 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8288 return SplitVectorOp(Op, DAG);
8289
8290 return lowerToScalableOp(Op, DAG);
8291 }
8292 case ISD::ADD:
8293 case ISD::SUB:
8294 case ISD::MUL:
8295 case ISD::MULHS:
8296 case ISD::MULHU:
8297 case ISD::AND:
8298 case ISD::OR:
8299 case ISD::XOR:
8300 case ISD::SDIV:
8301 case ISD::SREM:
8302 case ISD::UDIV:
8303 case ISD::UREM:
8304 case ISD::BSWAP:
8305 case ISD::CTPOP:
8306 case ISD::VSELECT:
8307 return lowerToScalableOp(Op, DAG);
8308 case ISD::SHL:
8309 case ISD::SRA:
8310 case ISD::SRL:
8311 if (Op.getSimpleValueType().isFixedLengthVector())
8312 return lowerToScalableOp(Op, DAG);
8313 // This can be called for an i32 shift amount that needs to be promoted.
8314 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8315 "Unexpected custom legalisation");
8316 return SDValue();
8317 case ISD::FABS:
8318 case ISD::FNEG:
8319 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8320 return lowerFABSorFNEG(Op, DAG, Subtarget);
8321 [[fallthrough]];
8322 case ISD::FADD:
8323 case ISD::FSUB:
8324 case ISD::FMUL:
8325 case ISD::FDIV:
8326 case ISD::FSQRT:
8327 case ISD::FMA:
8328 case ISD::FMINNUM:
8329 case ISD::FMAXNUM:
8330 case ISD::FMINIMUMNUM:
8331 case ISD::FMAXIMUMNUM:
8332 if (isPromotedOpNeedingSplit(Op, Subtarget))
8333 return SplitVectorOp(Op, DAG);
8334 [[fallthrough]];
8335 case ISD::AVGFLOORS:
8336 case ISD::AVGFLOORU:
8337 case ISD::AVGCEILS:
8338 case ISD::AVGCEILU:
8339 case ISD::SMIN:
8340 case ISD::SMAX:
8341 case ISD::UMIN:
8342 case ISD::UMAX:
8343 case ISD::UADDSAT:
8344 case ISD::USUBSAT:
8345 case ISD::SADDSAT:
8346 case ISD::SSUBSAT:
8347 return lowerToScalableOp(Op, DAG);
8348 case ISD::ABDS:
8349 case ISD::ABDU: {
8350 SDLoc dl(Op);
8351 EVT VT = Op->getValueType(0);
8352 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8353 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8354 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8355
8356 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8357 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8358 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8359 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8360 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8361 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8362 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8363 }
8364 case ISD::ABS:
8365 case ISD::VP_ABS:
8366 return lowerABS(Op, DAG);
8367 case ISD::CTLZ:
8369 case ISD::CTTZ:
8371 if (Subtarget.hasStdExtZvbb())
8372 return lowerToScalableOp(Op, DAG);
8373 assert(Op.getOpcode() != ISD::CTTZ);
8374 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8375 case ISD::FCOPYSIGN:
8376 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8377 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8378 if (isPromotedOpNeedingSplit(Op, Subtarget))
8379 return SplitVectorOp(Op, DAG);
8380 return lowerToScalableOp(Op, DAG);
8381 case ISD::STRICT_FADD:
8382 case ISD::STRICT_FSUB:
8383 case ISD::STRICT_FMUL:
8384 case ISD::STRICT_FDIV:
8385 case ISD::STRICT_FSQRT:
8386 case ISD::STRICT_FMA:
8387 if (isPromotedOpNeedingSplit(Op, Subtarget))
8388 return SplitStrictFPVectorOp(Op, DAG);
8389 return lowerToScalableOp(Op, DAG);
8390 case ISD::STRICT_FSETCC:
8392 return lowerVectorStrictFSetcc(Op, DAG);
8393 case ISD::STRICT_FCEIL:
8394 case ISD::STRICT_FRINT:
8395 case ISD::STRICT_FFLOOR:
8396 case ISD::STRICT_FTRUNC:
8398 case ISD::STRICT_FROUND:
8400 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8401 case ISD::MGATHER:
8402 case ISD::VP_GATHER:
8403 return lowerMaskedGather(Op, DAG);
8404 case ISD::MSCATTER:
8405 case ISD::VP_SCATTER:
8406 return lowerMaskedScatter(Op, DAG);
8407 case ISD::GET_ROUNDING:
8408 return lowerGET_ROUNDING(Op, DAG);
8409 case ISD::SET_ROUNDING:
8410 return lowerSET_ROUNDING(Op, DAG);
8411 case ISD::GET_FPENV:
8412 return lowerGET_FPENV(Op, DAG);
8413 case ISD::SET_FPENV:
8414 return lowerSET_FPENV(Op, DAG);
8415 case ISD::RESET_FPENV:
8416 return lowerRESET_FPENV(Op, DAG);
8417 case ISD::GET_FPMODE:
8418 return lowerGET_FPMODE(Op, DAG);
8419 case ISD::SET_FPMODE:
8420 return lowerSET_FPMODE(Op, DAG);
8421 case ISD::RESET_FPMODE:
8422 return lowerRESET_FPMODE(Op, DAG);
8423 case ISD::EH_DWARF_CFA:
8424 return lowerEH_DWARF_CFA(Op, DAG);
8425 case ISD::VP_MERGE:
8426 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8427 return lowerVPMergeMask(Op, DAG);
8428 [[fallthrough]];
8429 case ISD::VP_SELECT:
8430 case ISD::VP_ADD:
8431 case ISD::VP_SUB:
8432 case ISD::VP_MUL:
8433 case ISD::VP_SDIV:
8434 case ISD::VP_UDIV:
8435 case ISD::VP_SREM:
8436 case ISD::VP_UREM:
8437 case ISD::VP_UADDSAT:
8438 case ISD::VP_USUBSAT:
8439 case ISD::VP_SADDSAT:
8440 case ISD::VP_SSUBSAT:
8441 case ISD::VP_LRINT:
8442 case ISD::VP_LLRINT:
8443 return lowerVPOp(Op, DAG);
8444 case ISD::VP_AND:
8445 case ISD::VP_OR:
8446 case ISD::VP_XOR:
8447 return lowerLogicVPOp(Op, DAG);
8448 case ISD::VP_FADD:
8449 case ISD::VP_FSUB:
8450 case ISD::VP_FMUL:
8451 case ISD::VP_FDIV:
8452 case ISD::VP_FNEG:
8453 case ISD::VP_FABS:
8454 case ISD::VP_SQRT:
8455 case ISD::VP_FMA:
8456 case ISD::VP_FMINNUM:
8457 case ISD::VP_FMAXNUM:
8458 case ISD::VP_FCOPYSIGN:
8459 if (isPromotedOpNeedingSplit(Op, Subtarget))
8460 return SplitVPOp(Op, DAG);
8461 [[fallthrough]];
8462 case ISD::VP_SRA:
8463 case ISD::VP_SRL:
8464 case ISD::VP_SHL:
8465 return lowerVPOp(Op, DAG);
8466 case ISD::VP_IS_FPCLASS:
8467 return LowerIS_FPCLASS(Op, DAG);
8468 case ISD::VP_SIGN_EXTEND:
8469 case ISD::VP_ZERO_EXTEND:
8470 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8471 return lowerVPExtMaskOp(Op, DAG);
8472 return lowerVPOp(Op, DAG);
8473 case ISD::VP_TRUNCATE:
8474 return lowerVectorTruncLike(Op, DAG);
8475 case ISD::VP_FP_EXTEND:
8476 case ISD::VP_FP_ROUND:
8477 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8478 case ISD::VP_SINT_TO_FP:
8479 case ISD::VP_UINT_TO_FP:
8480 if (Op.getValueType().isVector() &&
8481 ((Op.getValueType().getScalarType() == MVT::f16 &&
8482 (Subtarget.hasVInstructionsF16Minimal() &&
8483 !Subtarget.hasVInstructionsF16())) ||
8484 Op.getValueType().getScalarType() == MVT::bf16)) {
8485 if (isPromotedOpNeedingSplit(Op, Subtarget))
8486 return SplitVectorOp(Op, DAG);
8487 // int -> f32
8488 SDLoc DL(Op);
8489 MVT NVT =
8490 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8491 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8492 // f32 -> [b]f16
8493 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8494 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8495 }
8496 [[fallthrough]];
8497 case ISD::VP_FP_TO_SINT:
8498 case ISD::VP_FP_TO_UINT:
8499 if (SDValue Op1 = Op.getOperand(0);
8500 Op1.getValueType().isVector() &&
8501 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8502 (Subtarget.hasVInstructionsF16Minimal() &&
8503 !Subtarget.hasVInstructionsF16())) ||
8504 Op1.getValueType().getScalarType() == MVT::bf16)) {
8505 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8506 return SplitVectorOp(Op, DAG);
8507 // [b]f16 -> f32
8508 SDLoc DL(Op);
8509 MVT NVT = MVT::getVectorVT(MVT::f32,
8510 Op1.getValueType().getVectorElementCount());
8511 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8512 // f32 -> int
8513 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8514 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8515 }
8516 return lowerVPFPIntConvOp(Op, DAG);
8517 case ISD::VP_SETCC:
8518 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8519 return SplitVPOp(Op, DAG);
8520 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8521 return lowerVPSetCCMaskOp(Op, DAG);
8522 [[fallthrough]];
8523 case ISD::VP_SMIN:
8524 case ISD::VP_SMAX:
8525 case ISD::VP_UMIN:
8526 case ISD::VP_UMAX:
8527 case ISD::VP_BITREVERSE:
8528 case ISD::VP_BSWAP:
8529 return lowerVPOp(Op, DAG);
8530 case ISD::VP_CTLZ:
8531 case ISD::VP_CTLZ_ZERO_UNDEF:
8532 if (Subtarget.hasStdExtZvbb())
8533 return lowerVPOp(Op, DAG);
8534 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8535 case ISD::VP_CTTZ:
8536 case ISD::VP_CTTZ_ZERO_UNDEF:
8537 if (Subtarget.hasStdExtZvbb())
8538 return lowerVPOp(Op, DAG);
8539 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8540 case ISD::VP_CTPOP:
8541 return lowerVPOp(Op, DAG);
8542 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8543 return lowerVPStridedLoad(Op, DAG);
8544 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8545 return lowerVPStridedStore(Op, DAG);
8546 case ISD::VP_FCEIL:
8547 case ISD::VP_FFLOOR:
8548 case ISD::VP_FRINT:
8549 case ISD::VP_FNEARBYINT:
8550 case ISD::VP_FROUND:
8551 case ISD::VP_FROUNDEVEN:
8552 case ISD::VP_FROUNDTOZERO:
8553 if (isPromotedOpNeedingSplit(Op, Subtarget))
8554 return SplitVPOp(Op, DAG);
8555 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8556 case ISD::VP_FMAXIMUM:
8557 case ISD::VP_FMINIMUM:
8558 if (isPromotedOpNeedingSplit(Op, Subtarget))
8559 return SplitVPOp(Op, DAG);
8560 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8561 case ISD::EXPERIMENTAL_VP_SPLICE:
8562 return lowerVPSpliceExperimental(Op, DAG);
8563 case ISD::EXPERIMENTAL_VP_REVERSE:
8564 return lowerVPReverseExperimental(Op, DAG);
8565 case ISD::EXPERIMENTAL_VP_SPLAT:
8566 return lowerVPSplatExperimental(Op, DAG);
8567 case ISD::CLEAR_CACHE: {
8568 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8569 "llvm.clear_cache only needs custom lower on Linux targets");
8570 SDLoc DL(Op);
8571 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8572 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8573 Op.getOperand(2), Flags, DL);
8574 }
8575 case ISD::DYNAMIC_STACKALLOC:
8576 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8577 case ISD::INIT_TRAMPOLINE:
8578 return lowerINIT_TRAMPOLINE(Op, DAG);
8579 case ISD::ADJUST_TRAMPOLINE:
8580 return lowerADJUST_TRAMPOLINE(Op, DAG);
8581 case ISD::PARTIAL_REDUCE_UMLA:
8582 case ISD::PARTIAL_REDUCE_SMLA:
8583 case ISD::PARTIAL_REDUCE_SUMLA:
8584 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8585 }
8586}
8587
8588SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8589 SDValue Start, SDValue End,
8590 SDValue Flags, SDLoc DL) const {
8591 MakeLibCallOptions CallOptions;
8592 std::pair<SDValue, SDValue> CallResult =
8593 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8594 {Start, End, Flags}, CallOptions, DL, InChain);
8595
8596 // This function returns void so only the out chain matters.
8597 return CallResult.second;
8598}
8599
8600SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8601 SelectionDAG &DAG) const {
8602 if (!Subtarget.is64Bit())
8603 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8604
8605 // Create an MCCodeEmitter to encode instructions.
8606 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8607 assert(TLO);
8608 MCContext &MCCtx = TLO->getContext();
8609
8610 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8611 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8612
8613 SDValue Root = Op.getOperand(0);
8614 SDValue Trmp = Op.getOperand(1); // trampoline
8615 SDLoc dl(Op);
8616
8617 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8618
8619 // We store in the trampoline buffer the following instructions and data.
8620 // Offset:
8621 // 0: auipc t2, 0
8622 // 4: ld t0, 24(t2)
8623 // 8: ld t2, 16(t2)
8624 // 12: jalr t0
8625 // 16: <StaticChainOffset>
8626 // 24: <FunctionAddressOffset>
8627 // 32:
8628 // Offset with branch control flow protection enabled:
8629 // 0: lpad <imm20>
8630 // 4: auipc t3, 0
8631 // 8: ld t2, 28(t3)
8632 // 12: ld t3, 20(t3)
8633 // 16: jalr t2
8634 // 20: <StaticChainOffset>
8635 // 28: <FunctionAddressOffset>
8636 // 36:
8637
8638 const bool HasCFBranch =
8639 Subtarget.hasStdExtZicfilp() &&
8641 "cf-protection-branch");
8642 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8643 const unsigned StaticChainOffset = StaticChainIdx * 4;
8644 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8645
8646 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8647 assert(STI);
8648 auto GetEncoding = [&](const MCInst &MC) {
8651 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8652 uint32_t Encoding = support::endian::read32le(CB.data());
8653 return Encoding;
8654 };
8655
8656 SmallVector<SDValue> OutChains;
8657
8658 SmallVector<uint32_t> Encodings;
8659 if (!HasCFBranch) {
8660 Encodings.append(
8661 {// auipc t2, 0
8662 // Loads the current PC into t2.
8663 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8664 // ld t0, 24(t2)
8665 // Loads the function address into t0. Note that we are using offsets
8666 // pc-relative to the first instruction of the trampoline.
8667 GetEncoding(MCInstBuilder(RISCV::LD)
8668 .addReg(RISCV::X5)
8669 .addReg(RISCV::X7)
8670 .addImm(FunctionAddressOffset)),
8671 // ld t2, 16(t2)
8672 // Load the value of the static chain.
8673 GetEncoding(MCInstBuilder(RISCV::LD)
8674 .addReg(RISCV::X7)
8675 .addReg(RISCV::X7)
8676 .addImm(StaticChainOffset)),
8677 // jalr t0
8678 // Jump to the function.
8679 GetEncoding(MCInstBuilder(RISCV::JALR)
8680 .addReg(RISCV::X0)
8681 .addReg(RISCV::X5)
8682 .addImm(0))});
8683 } else {
8684 Encodings.append(
8685 {// auipc x0, <imm20> (lpad <imm20>)
8686 // Landing pad.
8687 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8688 // auipc t3, 0
8689 // Loads the current PC into t3.
8690 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8691 // ld t2, (FunctionAddressOffset - 4)(t3)
8692 // Loads the function address into t2. Note that we are using offsets
8693 // pc-relative to the SECOND instruction of the trampoline.
8694 GetEncoding(MCInstBuilder(RISCV::LD)
8695 .addReg(RISCV::X7)
8696 .addReg(RISCV::X28)
8697 .addImm(FunctionAddressOffset - 4)),
8698 // ld t3, (StaticChainOffset - 4)(t3)
8699 // Load the value of the static chain.
8700 GetEncoding(MCInstBuilder(RISCV::LD)
8701 .addReg(RISCV::X28)
8702 .addReg(RISCV::X28)
8703 .addImm(StaticChainOffset - 4)),
8704 // jalr t2
8705 // Software-guarded jump to the function.
8706 GetEncoding(MCInstBuilder(RISCV::JALR)
8707 .addReg(RISCV::X0)
8708 .addReg(RISCV::X7)
8709 .addImm(0))});
8710 }
8711
8712 // Store encoded instructions.
8713 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8714 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8715 DAG.getConstant(Idx * 4, dl, MVT::i64))
8716 : Trmp;
8717 OutChains.push_back(DAG.getTruncStore(
8718 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8719 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8720 }
8721
8722 // Now store the variable part of the trampoline.
8723 SDValue FunctionAddress = Op.getOperand(2);
8724 SDValue StaticChain = Op.getOperand(3);
8725
8726 // Store the given static chain and function pointer in the trampoline buffer.
8727 struct OffsetValuePair {
8728 const unsigned Offset;
8729 const SDValue Value;
8730 SDValue Addr = SDValue(); // Used to cache the address.
8731 } OffsetValues[] = {
8732 {StaticChainOffset, StaticChain},
8733 {FunctionAddressOffset, FunctionAddress},
8734 };
8735 for (auto &OffsetValue : OffsetValues) {
8736 SDValue Addr =
8737 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8738 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8739 OffsetValue.Addr = Addr;
8740 OutChains.push_back(
8741 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8742 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8743 }
8744
8745 assert(OutChains.size() == StaticChainIdx + 2 &&
8746 "Size of OutChains mismatch");
8747 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8748
8749 // The end of instructions of trampoline is the same as the static chain
8750 // address that we computed earlier.
8751 SDValue EndOfTrmp = OffsetValues[0].Addr;
8752
8753 // Call clear cache on the trampoline instructions.
8754 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8755 Trmp, EndOfTrmp);
8756
8757 return Chain;
8758}
8759
8760SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8761 SelectionDAG &DAG) const {
8762 if (!Subtarget.is64Bit())
8763 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8764
8765 return Op.getOperand(0);
8766}
8767
8768SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8769 SelectionDAG &DAG) const {
8770 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8771 // TODO: There are many other sub-cases we could potentially lower, are
8772 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8773 SDLoc DL(Op);
8774 MVT VT = Op.getSimpleValueType();
8775 SDValue Accum = Op.getOperand(0);
8776 assert(Accum.getSimpleValueType() == VT &&
8777 VT.getVectorElementType() == MVT::i32);
8778 SDValue A = Op.getOperand(1);
8779 SDValue B = Op.getOperand(2);
8780 MVT ArgVT = A.getSimpleValueType();
8781 assert(ArgVT == B.getSimpleValueType() &&
8782 ArgVT.getVectorElementType() == MVT::i8);
8783 (void)ArgVT;
8784
8785 // The zvqdotq pseudos are defined with sources and destination both
8786 // being i32. This cast is needed for correctness to avoid incorrect
8787 // .vx matching of i8 splats.
8788 A = DAG.getBitcast(VT, A);
8789 B = DAG.getBitcast(VT, B);
8790
8791 MVT ContainerVT = VT;
8792 if (VT.isFixedLengthVector()) {
8793 ContainerVT = getContainerForFixedLengthVector(VT);
8794 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8795 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8796 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8797 }
8798
8799 unsigned Opc;
8800 switch (Op.getOpcode()) {
8801 case ISD::PARTIAL_REDUCE_SMLA:
8802 Opc = RISCVISD::VQDOT_VL;
8803 break;
8804 case ISD::PARTIAL_REDUCE_UMLA:
8805 Opc = RISCVISD::VQDOTU_VL;
8806 break;
8807 case ISD::PARTIAL_REDUCE_SUMLA:
8808 Opc = RISCVISD::VQDOTSU_VL;
8809 break;
8810 default:
8811 llvm_unreachable("Unexpected opcode");
8812 }
8813 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8814 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8815 if (VT.isFixedLengthVector())
8816 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8817 return Res;
8818}
8819
8821 SelectionDAG &DAG, unsigned Flags) {
8822 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8823}
8824
8826 SelectionDAG &DAG, unsigned Flags) {
8827 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8828 Flags);
8829}
8830
8832 SelectionDAG &DAG, unsigned Flags) {
8833 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8834 N->getOffset(), Flags);
8835}
8836
8838 SelectionDAG &DAG, unsigned Flags) {
8839 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8840}
8841
8843 EVT Ty, SelectionDAG &DAG) {
8845 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8846 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8847 return DAG.getLoad(
8848 Ty, DL, DAG.getEntryNode(), LC,
8850}
8851
8853 EVT Ty, SelectionDAG &DAG) {
8855 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8856 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8857 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8858 return DAG.getLoad(
8859 Ty, DL, DAG.getEntryNode(), LC,
8861}
8862
8863template <class NodeTy>
8864SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8865 bool IsLocal, bool IsExternWeak) const {
8866 SDLoc DL(N);
8867 EVT Ty = getPointerTy(DAG.getDataLayout());
8868
8869 // When HWASAN is used and tagging of global variables is enabled
8870 // they should be accessed via the GOT, since the tagged address of a global
8871 // is incompatible with existing code models. This also applies to non-pic
8872 // mode.
8873 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8874 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8875 if (IsLocal && !Subtarget.allowTaggedGlobals())
8876 // Use PC-relative addressing to access the symbol. This generates the
8877 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8878 // %pcrel_lo(auipc)).
8879 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8880
8881 // Use PC-relative addressing to access the GOT for this symbol, then load
8882 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8883 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8884 SDValue Load =
8885 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8886 MachineFunction &MF = DAG.getMachineFunction();
8887 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8891 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8892 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8893 return Load;
8894 }
8895
8896 switch (getTargetMachine().getCodeModel()) {
8897 default:
8898 reportFatalUsageError("Unsupported code model for lowering");
8899 case CodeModel::Small: {
8900 // Generate a sequence for accessing addresses within the first 2 GiB of
8901 // address space.
8902 if (Subtarget.hasVendorXqcili()) {
8903 // Use QC.E.LI to generate the address, as this is easier to relax than
8904 // LUI/ADDI.
8905 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8906 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8907 }
8908
8909 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8910 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8911 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8912 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8913 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8914 }
8915 case CodeModel::Medium: {
8916 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8917 if (IsExternWeak) {
8918 // An extern weak symbol may be undefined, i.e. have value 0, which may
8919 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8920 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8921 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8922 SDValue Load =
8923 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8924 MachineFunction &MF = DAG.getMachineFunction();
8925 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8929 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8930 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8931 return Load;
8932 }
8933
8934 // Generate a sequence for accessing addresses within any 2GiB range within
8935 // the address space. This generates the pattern (PseudoLLA sym), which
8936 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8937 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8938 }
8939 case CodeModel::Large: {
8940 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8941 return getLargeGlobalAddress(G, DL, Ty, DAG);
8942
8943 // Using pc-relative mode for other node type.
8944 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8945 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8946 }
8947 }
8948}
8949
8950SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8951 SelectionDAG &DAG) const {
8952 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8953 assert(N->getOffset() == 0 && "unexpected offset in global node");
8954 const GlobalValue *GV = N->getGlobal();
8955 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8956}
8957
8958SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8959 SelectionDAG &DAG) const {
8960 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8961
8962 return getAddr(N, DAG);
8963}
8964
8965SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8966 SelectionDAG &DAG) const {
8967 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8968
8969 return getAddr(N, DAG);
8970}
8971
8972SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8973 SelectionDAG &DAG) const {
8974 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8975
8976 return getAddr(N, DAG);
8977}
8978
8979SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8980 SelectionDAG &DAG,
8981 bool UseGOT) const {
8982 SDLoc DL(N);
8983 EVT Ty = getPointerTy(DAG.getDataLayout());
8984 const GlobalValue *GV = N->getGlobal();
8985 MVT XLenVT = Subtarget.getXLenVT();
8986
8987 if (UseGOT) {
8988 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8989 // load the address from the GOT and add the thread pointer. This generates
8990 // the pattern (PseudoLA_TLS_IE sym), which expands to
8991 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8992 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8993 SDValue Load =
8994 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8995 MachineFunction &MF = DAG.getMachineFunction();
8996 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9000 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9001 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9002
9003 // Add the thread pointer.
9004 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9005 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9006 }
9007
9008 // Generate a sequence for accessing the address relative to the thread
9009 // pointer, with the appropriate adjustment for the thread pointer offset.
9010 // This generates the pattern
9011 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9012 SDValue AddrHi =
9014 SDValue AddrAdd =
9016 SDValue AddrLo =
9018
9019 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9020 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9021 SDValue MNAdd =
9022 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9023 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9024}
9025
9026SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9027 SelectionDAG &DAG) const {
9028 SDLoc DL(N);
9029 EVT Ty = getPointerTy(DAG.getDataLayout());
9030 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9031 const GlobalValue *GV = N->getGlobal();
9032
9033 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9034 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9035 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9036 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9037 SDValue Load =
9038 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9039
9040 // Prepare argument list to generate call.
9042 Args.emplace_back(Load, CallTy);
9043
9044 // Setup call to __tls_get_addr.
9045 TargetLowering::CallLoweringInfo CLI(DAG);
9046 CLI.setDebugLoc(DL)
9047 .setChain(DAG.getEntryNode())
9048 .setLibCallee(CallingConv::C, CallTy,
9049 DAG.getExternalSymbol("__tls_get_addr", Ty),
9050 std::move(Args));
9051
9052 return LowerCallTo(CLI).first;
9053}
9054
9055SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9056 SelectionDAG &DAG) const {
9057 SDLoc DL(N);
9058 EVT Ty = getPointerTy(DAG.getDataLayout());
9059 const GlobalValue *GV = N->getGlobal();
9060
9061 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9062 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9063 //
9064 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9065 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9066 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9067 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9068 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9069 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9070}
9071
9072SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9073 SelectionDAG &DAG) const {
9074 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9075 assert(N->getOffset() == 0 && "unexpected offset in global node");
9076
9077 if (DAG.getTarget().useEmulatedTLS())
9078 return LowerToTLSEmulatedModel(N, DAG);
9079
9081
9084 reportFatalUsageError("In GHC calling convention TLS is not supported");
9085
9086 SDValue Addr;
9087 switch (Model) {
9089 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9090 break;
9092 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9093 break;
9096 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9097 : getDynamicTLSAddr(N, DAG);
9098 break;
9099 }
9100
9101 return Addr;
9102}
9103
9104// Return true if Val is equal to (setcc LHS, RHS, CC).
9105// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9106// Otherwise, return std::nullopt.
9107static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9108 ISD::CondCode CC, SDValue Val) {
9109 assert(Val->getOpcode() == ISD::SETCC);
9110 SDValue LHS2 = Val.getOperand(0);
9111 SDValue RHS2 = Val.getOperand(1);
9112 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9113
9114 if (LHS == LHS2 && RHS == RHS2) {
9115 if (CC == CC2)
9116 return true;
9117 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9118 return false;
9119 } else if (LHS == RHS2 && RHS == LHS2) {
9121 if (CC == CC2)
9122 return true;
9123 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9124 return false;
9125 }
9126
9127 return std::nullopt;
9128}
9129
9131 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9132}
9133
9135 const RISCVSubtarget &Subtarget) {
9136 SDValue CondV = N->getOperand(0);
9137 SDValue TrueV = N->getOperand(1);
9138 SDValue FalseV = N->getOperand(2);
9139 MVT VT = N->getSimpleValueType(0);
9140 SDLoc DL(N);
9141
9142 if (!Subtarget.hasConditionalMoveFusion()) {
9143 // (select c, -1, y) -> -c | y
9144 if (isAllOnesConstant(TrueV)) {
9145 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9146 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9147 }
9148 // (select c, y, -1) -> (c-1) | y
9149 if (isAllOnesConstant(FalseV)) {
9150 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9151 DAG.getAllOnesConstant(DL, VT));
9152 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9153 }
9154
9155 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9156
9157 // (select c, 0, y) -> (c-1) & y
9158 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9159 SDValue Neg =
9160 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9161 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9162 }
9163 if (isNullConstant(FalseV)) {
9164 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9165 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9166 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9167 if (isPowerOf2_64(TrueM1)) {
9168 unsigned ShAmount = Log2_64(TrueM1);
9169 if (Subtarget.hasShlAdd(ShAmount))
9170 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9171 DAG.getConstant(ShAmount, DL, VT), CondV);
9172 }
9173 }
9174 // (select c, y, 0) -> -c & y
9175 if (!HasCZero || isSimm12Constant(TrueV)) {
9176 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9177 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9178 }
9179 }
9180 }
9181
9182 // select c, ~x, x --> xor -c, x
9183 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9184 const APInt &TrueVal = TrueV->getAsAPIntVal();
9185 const APInt &FalseVal = FalseV->getAsAPIntVal();
9186 if (~TrueVal == FalseVal) {
9187 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9188 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9189 }
9190 }
9191
9192 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9193 // when both truev and falsev are also setcc.
9194 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9195 FalseV.getOpcode() == ISD::SETCC) {
9196 SDValue LHS = CondV.getOperand(0);
9197 SDValue RHS = CondV.getOperand(1);
9198 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9199
9200 // (select x, x, y) -> x | y
9201 // (select !x, x, y) -> x & y
9202 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9203 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9204 DAG.getFreeze(FalseV));
9205 }
9206 // (select x, y, x) -> x & y
9207 // (select !x, y, x) -> x | y
9208 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9209 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9210 DAG.getFreeze(TrueV), FalseV);
9211 }
9212 }
9213
9214 return SDValue();
9215}
9216
9217// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9218// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9219// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9220// being `0` or `-1`. In such cases we can replace `select` with `and`.
9221// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9222// than `c0`?
9223static SDValue
9225 const RISCVSubtarget &Subtarget) {
9226 if (Subtarget.hasShortForwardBranchOpt())
9227 return SDValue();
9228
9229 unsigned SelOpNo = 0;
9230 SDValue Sel = BO->getOperand(0);
9231 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9232 SelOpNo = 1;
9233 Sel = BO->getOperand(1);
9234 }
9235
9236 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9237 return SDValue();
9238
9239 unsigned ConstSelOpNo = 1;
9240 unsigned OtherSelOpNo = 2;
9241 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9242 ConstSelOpNo = 2;
9243 OtherSelOpNo = 1;
9244 }
9245 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9246 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9247 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9248 return SDValue();
9249
9250 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9251 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9252 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9253 return SDValue();
9254
9255 SDLoc DL(Sel);
9256 EVT VT = BO->getValueType(0);
9257
9258 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9259 if (SelOpNo == 1)
9260 std::swap(NewConstOps[0], NewConstOps[1]);
9261
9262 SDValue NewConstOp =
9263 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9264 if (!NewConstOp)
9265 return SDValue();
9266
9267 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9268 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9269 return SDValue();
9270
9271 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9272 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9273 if (SelOpNo == 1)
9274 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9275 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9276
9277 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9278 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9279 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9280}
9281
9282SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9283 SDValue CondV = Op.getOperand(0);
9284 SDValue TrueV = Op.getOperand(1);
9285 SDValue FalseV = Op.getOperand(2);
9286 SDLoc DL(Op);
9287 MVT VT = Op.getSimpleValueType();
9288 MVT XLenVT = Subtarget.getXLenVT();
9289
9290 // Lower vector SELECTs to VSELECTs by splatting the condition.
9291 if (VT.isVector()) {
9292 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9293 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9294 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9295 }
9296
9297 // Try some other optimizations before falling back to generic lowering.
9298 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9299 return V;
9300
9301 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9302 // nodes to implement the SELECT. Performing the lowering here allows for
9303 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9304 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9305 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9306
9307 // (select c, t, 0) -> (czero_eqz t, c)
9308 if (isNullConstant(FalseV))
9309 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9310 // (select c, 0, f) -> (czero_nez f, c)
9311 if (isNullConstant(TrueV))
9312 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9313
9314 // Check to see if a given operation is a 'NOT', if so return the negated
9315 // operand
9316 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9317 using namespace llvm::SDPatternMatch;
9318 SDValue Xor;
9319 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9320 return Xor;
9321 }
9322 return std::nullopt;
9323 };
9324 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9325 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9326 if (TrueV.getOpcode() == ISD::AND &&
9327 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9328 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9329 ? getNotOperand(TrueV.getOperand(1))
9330 : getNotOperand(TrueV.getOperand(0));
9331 if (NotOperand) {
9332 SDValue CMOV =
9333 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9334 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9335 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9336 }
9337 return DAG.getNode(
9338 ISD::OR, DL, VT, TrueV,
9339 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9340 }
9341
9342 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9343 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9344 if (FalseV.getOpcode() == ISD::AND &&
9345 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9346 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9347 ? getNotOperand(FalseV.getOperand(1))
9348 : getNotOperand(FalseV.getOperand(0));
9349 if (NotOperand) {
9350 SDValue CMOV =
9351 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9352 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9353 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9354 }
9355 return DAG.getNode(
9356 ISD::OR, DL, VT, FalseV,
9357 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9358 }
9359
9360 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9361 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9362 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9363 const APInt &TrueVal = TrueV->getAsAPIntVal();
9364 const APInt &FalseVal = FalseV->getAsAPIntVal();
9365
9366 // Prefer these over Zicond to avoid materializing an immediate:
9367 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9368 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9369 if (CondV.getOpcode() == ISD::SETCC &&
9370 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9371 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9372 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9373 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9374 int64_t TrueImm = TrueVal.getSExtValue();
9375 int64_t FalseImm = FalseVal.getSExtValue();
9376 if (CCVal == ISD::SETGT)
9377 std::swap(TrueImm, FalseImm);
9378 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9379 isInt<12>(TrueImm - FalseImm)) {
9380 SDValue SRA =
9381 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9382 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9383 SDValue AND =
9384 DAG.getNode(ISD::AND, DL, VT, SRA,
9385 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9386 return DAG.getNode(ISD::ADD, DL, VT, AND,
9387 DAG.getSignedConstant(FalseImm, DL, VT));
9388 }
9389 }
9390 }
9391
9392 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9393 // a constant in register
9394 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9395 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9396 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9397 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9398 }
9399 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9400 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9401 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9402 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9403 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9404 }
9405
9406 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9407 const int DeltaCost = RISCVMatInt::getIntMatCost(
9408 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9409 // Does the addend fold into an ADDI
9410 if (Addend.isSignedIntN(12))
9411 return DeltaCost;
9412 const int AddendCost = RISCVMatInt::getIntMatCost(
9413 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9414 return AddendCost + DeltaCost;
9415 };
9416 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9417 getCost(TrueVal - FalseVal, FalseVal);
9418 SDValue LHSVal = DAG.getConstant(
9419 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9420 SDValue CMOV =
9421 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9422 DL, VT, LHSVal, CondV);
9423 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9424 }
9425
9426 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9427 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9428 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9429 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9430 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9431 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9432 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9433 // Fall back to XORI if Const == -0x800
9434 if (RawConstVal == -0x800) {
9435 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9436 SDValue CMOV =
9437 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9438 DL, VT, XorOp, CondV);
9439 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9440 }
9441 // Efficient only if the constant and its negation fit into `ADDI`
9442 // Prefer Add/Sub over Xor since can be compressed for small immediates
9443 if (isInt<12>(RawConstVal)) {
9444 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9445 SDValue CMOV =
9446 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9447 DL, VT, SubOp, CondV);
9448 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9449 }
9450 }
9451
9452 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9453 // Unless we have the short forward branch optimization.
9454 if (!Subtarget.hasConditionalMoveFusion())
9455 return DAG.getNode(
9456 ISD::OR, DL, VT,
9457 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9458 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9460 }
9461
9462 if (Op.hasOneUse()) {
9463 unsigned UseOpc = Op->user_begin()->getOpcode();
9464 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9465 SDNode *BinOp = *Op->user_begin();
9466 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9467 DAG, Subtarget)) {
9468 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9469 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9470 // may return a constant node and cause crash in lowerSELECT.
9471 if (NewSel.getOpcode() == ISD::SELECT)
9472 return lowerSELECT(NewSel, DAG);
9473 return NewSel;
9474 }
9475 }
9476 }
9477
9478 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9479 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9480 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9481 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9482 if (FPTV && FPFV) {
9483 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9484 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9485 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9486 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9487 DAG.getConstant(1, DL, XLenVT));
9488 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9489 }
9490 }
9491
9492 // If the condition is not an integer SETCC which operates on XLenVT, we need
9493 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9494 // (select condv, truev, falsev)
9495 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9496 if (CondV.getOpcode() != ISD::SETCC ||
9497 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9498 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9499 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9500
9501 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9502
9503 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9504 }
9505
9506 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9507 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9508 // advantage of the integer compare+branch instructions. i.e.:
9509 // (select (setcc lhs, rhs, cc), truev, falsev)
9510 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9511 SDValue LHS = CondV.getOperand(0);
9512 SDValue RHS = CondV.getOperand(1);
9513 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9514
9515 // Special case for a select of 2 constants that have a difference of 1.
9516 // Normally this is done by DAGCombine, but if the select is introduced by
9517 // type legalization or op legalization, we miss it. Restricting to SETLT
9518 // case for now because that is what signed saturating add/sub need.
9519 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9520 // but we would probably want to swap the true/false values if the condition
9521 // is SETGE/SETLE to avoid an XORI.
9522 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9523 CCVal == ISD::SETLT) {
9524 const APInt &TrueVal = TrueV->getAsAPIntVal();
9525 const APInt &FalseVal = FalseV->getAsAPIntVal();
9526 if (TrueVal - 1 == FalseVal)
9527 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9528 if (TrueVal + 1 == FalseVal)
9529 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9530 }
9531
9532 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9533 // 1 < x ? x : 1 -> 0 < x ? x : 1
9534 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9535 RHS == TrueV && LHS == FalseV) {
9536 LHS = DAG.getConstant(0, DL, VT);
9537 // 0 <u x is the same as x != 0.
9538 if (CCVal == ISD::SETULT) {
9539 std::swap(LHS, RHS);
9540 CCVal = ISD::SETNE;
9541 }
9542 }
9543
9544 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9545 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9546 RHS == FalseV) {
9547 RHS = DAG.getConstant(0, DL, VT);
9548 }
9549
9550 SDValue TargetCC = DAG.getCondCode(CCVal);
9551
9552 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9553 // (select (setcc lhs, rhs, CC), constant, falsev)
9554 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9555 std::swap(TrueV, FalseV);
9556 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9557 }
9558
9559 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9560 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9561}
9562
9563SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9564 SDValue CondV = Op.getOperand(1);
9565 SDLoc DL(Op);
9566 MVT XLenVT = Subtarget.getXLenVT();
9567
9568 if (CondV.getOpcode() == ISD::SETCC &&
9569 CondV.getOperand(0).getValueType() == XLenVT) {
9570 SDValue LHS = CondV.getOperand(0);
9571 SDValue RHS = CondV.getOperand(1);
9572 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9573
9574 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9575
9576 SDValue TargetCC = DAG.getCondCode(CCVal);
9577 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9578 LHS, RHS, TargetCC, Op.getOperand(2));
9579 }
9580
9581 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9582 CondV, DAG.getConstant(0, DL, XLenVT),
9583 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9584}
9585
9586SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9587 MachineFunction &MF = DAG.getMachineFunction();
9588 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9589
9590 SDLoc DL(Op);
9591 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9593
9594 // vastart just stores the address of the VarArgsFrameIndex slot into the
9595 // memory location argument.
9596 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9597 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9598 MachinePointerInfo(SV));
9599}
9600
9601SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9602 SelectionDAG &DAG) const {
9603 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9604 MachineFunction &MF = DAG.getMachineFunction();
9605 MachineFrameInfo &MFI = MF.getFrameInfo();
9606 MFI.setFrameAddressIsTaken(true);
9607 Register FrameReg = RI.getFrameRegister(MF);
9608 int XLenInBytes = Subtarget.getXLen() / 8;
9609
9610 EVT VT = Op.getValueType();
9611 SDLoc DL(Op);
9612 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9613 unsigned Depth = Op.getConstantOperandVal(0);
9614 while (Depth--) {
9615 int Offset = -(XLenInBytes * 2);
9616 SDValue Ptr = DAG.getNode(
9617 ISD::ADD, DL, VT, FrameAddr,
9619 FrameAddr =
9620 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9621 }
9622 return FrameAddr;
9623}
9624
9625SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9626 SelectionDAG &DAG) const {
9627 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9628 MachineFunction &MF = DAG.getMachineFunction();
9629 MachineFrameInfo &MFI = MF.getFrameInfo();
9630 MFI.setReturnAddressIsTaken(true);
9631 MVT XLenVT = Subtarget.getXLenVT();
9632 int XLenInBytes = Subtarget.getXLen() / 8;
9633
9634 EVT VT = Op.getValueType();
9635 SDLoc DL(Op);
9636 unsigned Depth = Op.getConstantOperandVal(0);
9637 if (Depth) {
9638 int Off = -XLenInBytes;
9639 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9640 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9641 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9642 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9643 MachinePointerInfo());
9644 }
9645
9646 // Return the value of the return address register, marking it an implicit
9647 // live-in.
9648 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9649 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9650}
9651
9652SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9653 SelectionDAG &DAG) const {
9654 SDLoc DL(Op);
9655 SDValue Lo = Op.getOperand(0);
9656 SDValue Hi = Op.getOperand(1);
9657 SDValue Shamt = Op.getOperand(2);
9658 EVT VT = Lo.getValueType();
9659
9660 // if Shamt-XLEN < 0: // Shamt < XLEN
9661 // Lo = Lo << Shamt
9662 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9663 // else:
9664 // Lo = 0
9665 // Hi = Lo << (Shamt-XLEN)
9666
9667 SDValue Zero = DAG.getConstant(0, DL, VT);
9668 SDValue One = DAG.getConstant(1, DL, VT);
9669 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9670 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9671 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9672 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9673
9674 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9675 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9676 SDValue ShiftRightLo =
9677 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9678 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9679 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9680 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9681
9682 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9683
9684 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9685 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9686
9687 SDValue Parts[2] = {Lo, Hi};
9688 return DAG.getMergeValues(Parts, DL);
9689}
9690
9691SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9692 bool IsSRA) const {
9693 SDLoc DL(Op);
9694 SDValue Lo = Op.getOperand(0);
9695 SDValue Hi = Op.getOperand(1);
9696 SDValue Shamt = Op.getOperand(2);
9697 EVT VT = Lo.getValueType();
9698
9699 // SRA expansion:
9700 // if Shamt-XLEN < 0: // Shamt < XLEN
9701 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9702 // Hi = Hi >>s Shamt
9703 // else:
9704 // Lo = Hi >>s (Shamt-XLEN);
9705 // Hi = Hi >>s (XLEN-1)
9706 //
9707 // SRL expansion:
9708 // if Shamt-XLEN < 0: // Shamt < XLEN
9709 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9710 // Hi = Hi >>u Shamt
9711 // else:
9712 // Lo = Hi >>u (Shamt-XLEN);
9713 // Hi = 0;
9714
9715 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9716
9717 SDValue Zero = DAG.getConstant(0, DL, VT);
9718 SDValue One = DAG.getConstant(1, DL, VT);
9719 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9720 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9721 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9722 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9723
9724 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9725 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9726 SDValue ShiftLeftHi =
9727 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9728 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9729 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9730 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9731 SDValue HiFalse =
9732 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9733
9734 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9735
9736 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9737 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9738
9739 SDValue Parts[2] = {Lo, Hi};
9740 return DAG.getMergeValues(Parts, DL);
9741}
9742
9743// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9744// legal equivalently-sized i8 type, so we can use that as a go-between.
9745SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9746 SelectionDAG &DAG) const {
9747 SDLoc DL(Op);
9748 MVT VT = Op.getSimpleValueType();
9749 SDValue SplatVal = Op.getOperand(0);
9750 // All-zeros or all-ones splats are handled specially.
9751 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9752 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9753 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9754 }
9755 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9756 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9757 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9758 }
9759 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9760 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9761 DAG.getConstant(1, DL, SplatVal.getValueType()));
9762 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9763 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9764 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9765}
9766
9767// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9768// illegal (currently only vXi64 RV32).
9769// FIXME: We could also catch non-constant sign-extended i32 values and lower
9770// them to VMV_V_X_VL.
9771SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9772 SelectionDAG &DAG) const {
9773 SDLoc DL(Op);
9774 MVT VecVT = Op.getSimpleValueType();
9775 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9776 "Unexpected SPLAT_VECTOR_PARTS lowering");
9777
9778 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9779 SDValue Lo = Op.getOperand(0);
9780 SDValue Hi = Op.getOperand(1);
9781
9782 MVT ContainerVT = VecVT;
9783 if (VecVT.isFixedLengthVector())
9784 ContainerVT = getContainerForFixedLengthVector(VecVT);
9785
9786 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9787
9788 SDValue Res =
9789 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9790
9791 if (VecVT.isFixedLengthVector())
9792 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9793
9794 return Res;
9795}
9796
9797// Custom-lower extensions from mask vectors by using a vselect either with 1
9798// for zero/any-extension or -1 for sign-extension:
9799// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9800// Note that any-extension is lowered identically to zero-extension.
9801SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9802 int64_t ExtTrueVal) const {
9803 SDLoc DL(Op);
9804 MVT VecVT = Op.getSimpleValueType();
9805 SDValue Src = Op.getOperand(0);
9806 // Only custom-lower extensions from mask types
9807 assert(Src.getValueType().isVector() &&
9808 Src.getValueType().getVectorElementType() == MVT::i1);
9809
9810 if (VecVT.isScalableVector()) {
9811 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9812 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9813 if (Src.getOpcode() == ISD::XOR &&
9814 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9815 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9816 SplatTrueVal);
9817 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9818 }
9819
9820 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9821 MVT I1ContainerVT =
9822 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9823
9824 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9825
9826 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9827
9828 MVT XLenVT = Subtarget.getXLenVT();
9829 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9830 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9831
9832 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9833 SDValue Xor = Src.getOperand(0);
9834 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9835 SDValue ScalableOnes = Xor.getOperand(1);
9836 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9837 ScalableOnes.getOperand(0).isUndef() &&
9839 ScalableOnes.getOperand(1).getNode())) {
9840 CC = Xor.getOperand(0);
9841 std::swap(SplatZero, SplatTrueVal);
9842 }
9843 }
9844 }
9845
9846 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9847 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9848 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9849 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9850 SDValue Select =
9851 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9852 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9853
9854 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9855}
9856
9857// Custom-lower truncations from vectors to mask vectors by using a mask and a
9858// setcc operation:
9859// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9860SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9861 SelectionDAG &DAG) const {
9862 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9863 SDLoc DL(Op);
9864 EVT MaskVT = Op.getValueType();
9865 // Only expect to custom-lower truncations to mask types
9866 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9867 "Unexpected type for vector mask lowering");
9868 SDValue Src = Op.getOperand(0);
9869 MVT VecVT = Src.getSimpleValueType();
9870 SDValue Mask, VL;
9871 if (IsVPTrunc) {
9872 Mask = Op.getOperand(1);
9873 VL = Op.getOperand(2);
9874 }
9875 // If this is a fixed vector, we need to convert it to a scalable vector.
9876 MVT ContainerVT = VecVT;
9877
9878 if (VecVT.isFixedLengthVector()) {
9879 ContainerVT = getContainerForFixedLengthVector(VecVT);
9880 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9881 if (IsVPTrunc) {
9882 MVT MaskContainerVT =
9883 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9884 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9885 }
9886 }
9887
9888 if (!IsVPTrunc) {
9889 std::tie(Mask, VL) =
9890 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9891 }
9892
9893 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9894 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9895
9896 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9897 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9898 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9899 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9900
9901 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9902 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9903 DAG.getUNDEF(ContainerVT), Mask, VL);
9904 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9905 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9906 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9907 if (MaskVT.isFixedLengthVector())
9908 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9909 return Trunc;
9910}
9911
9912SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9913 SelectionDAG &DAG) const {
9914 unsigned Opc = Op.getOpcode();
9915 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9916 SDLoc DL(Op);
9917
9918 MVT VT = Op.getSimpleValueType();
9919 // Only custom-lower vector truncates
9920 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9921
9922 // Truncates to mask types are handled differently
9923 if (VT.getVectorElementType() == MVT::i1)
9924 return lowerVectorMaskTruncLike(Op, DAG);
9925
9926 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9927 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9928 // truncate by one power of two at a time.
9929 MVT DstEltVT = VT.getVectorElementType();
9930
9931 SDValue Src = Op.getOperand(0);
9932 MVT SrcVT = Src.getSimpleValueType();
9933 MVT SrcEltVT = SrcVT.getVectorElementType();
9934
9935 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9936 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9937 "Unexpected vector truncate lowering");
9938
9939 MVT ContainerVT = SrcVT;
9940 SDValue Mask, VL;
9941 if (IsVPTrunc) {
9942 Mask = Op.getOperand(1);
9943 VL = Op.getOperand(2);
9944 }
9945 if (SrcVT.isFixedLengthVector()) {
9946 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9947 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9948 if (IsVPTrunc) {
9949 MVT MaskVT = getMaskTypeFor(ContainerVT);
9950 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9951 }
9952 }
9953
9954 SDValue Result = Src;
9955 if (!IsVPTrunc) {
9956 std::tie(Mask, VL) =
9957 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9958 }
9959
9960 unsigned NewOpc;
9962 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9963 else if (Opc == ISD::TRUNCATE_USAT_U)
9964 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9965 else
9966 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9967
9968 do {
9969 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9970 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9971 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9972 } while (SrcEltVT != DstEltVT);
9973
9974 if (SrcVT.isFixedLengthVector())
9975 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9976
9977 return Result;
9978}
9979
9980SDValue
9981RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9982 SelectionDAG &DAG) const {
9983 SDLoc DL(Op);
9984 SDValue Chain = Op.getOperand(0);
9985 SDValue Src = Op.getOperand(1);
9986 MVT VT = Op.getSimpleValueType();
9987 MVT SrcVT = Src.getSimpleValueType();
9988 MVT ContainerVT = VT;
9989 if (VT.isFixedLengthVector()) {
9990 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9991 ContainerVT =
9992 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9993 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9994 }
9995
9996 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9997
9998 // RVV can only widen/truncate fp to types double/half the size as the source.
9999 if ((VT.getVectorElementType() == MVT::f64 &&
10000 (SrcVT.getVectorElementType() == MVT::f16 ||
10001 SrcVT.getVectorElementType() == MVT::bf16)) ||
10002 ((VT.getVectorElementType() == MVT::f16 ||
10003 VT.getVectorElementType() == MVT::bf16) &&
10004 SrcVT.getVectorElementType() == MVT::f64)) {
10005 // For double rounding, the intermediate rounding should be round-to-odd.
10006 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10007 ? RISCVISD::STRICT_FP_EXTEND_VL
10008 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10009 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10010 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10011 Chain, Src, Mask, VL);
10012 Chain = Src.getValue(1);
10013 }
10014
10015 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10016 ? RISCVISD::STRICT_FP_EXTEND_VL
10017 : RISCVISD::STRICT_FP_ROUND_VL;
10018 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10019 Chain, Src, Mask, VL);
10020 if (VT.isFixedLengthVector()) {
10021 // StrictFP operations have two result values. Their lowered result should
10022 // have same result count.
10023 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10024 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10025 }
10026 return Res;
10027}
10028
10029SDValue
10030RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10031 SelectionDAG &DAG) const {
10032 bool IsVP =
10033 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10034 bool IsExtend =
10035 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10036 // RVV can only do truncate fp to types half the size as the source. We
10037 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10038 // conversion instruction.
10039 SDLoc DL(Op);
10040 MVT VT = Op.getSimpleValueType();
10041
10042 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10043
10044 SDValue Src = Op.getOperand(0);
10045 MVT SrcVT = Src.getSimpleValueType();
10046
10047 bool IsDirectExtend =
10048 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10049 (SrcVT.getVectorElementType() != MVT::f16 &&
10050 SrcVT.getVectorElementType() != MVT::bf16));
10051 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10052 VT.getVectorElementType() != MVT::bf16) ||
10053 SrcVT.getVectorElementType() != MVT::f64);
10054
10055 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10056
10057 // We have regular SD node patterns for direct non-VL extends.
10058 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10059 return Op;
10060
10061 // Prepare any fixed-length vector operands.
10062 MVT ContainerVT = VT;
10063 SDValue Mask, VL;
10064 if (IsVP) {
10065 Mask = Op.getOperand(1);
10066 VL = Op.getOperand(2);
10067 }
10068 if (VT.isFixedLengthVector()) {
10069 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10070 ContainerVT =
10071 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10072 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10073 if (IsVP) {
10074 MVT MaskVT = getMaskTypeFor(ContainerVT);
10075 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10076 }
10077 }
10078
10079 if (!IsVP)
10080 std::tie(Mask, VL) =
10081 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10082
10083 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10084
10085 if (IsDirectConv) {
10086 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10087 if (VT.isFixedLengthVector())
10088 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10089 return Src;
10090 }
10091
10092 unsigned InterConvOpc =
10093 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10094
10095 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10096 SDValue IntermediateConv =
10097 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10098 SDValue Result =
10099 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10100 if (VT.isFixedLengthVector())
10101 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10102 return Result;
10103}
10104
10105// Given a scalable vector type and an index into it, returns the type for the
10106// smallest subvector that the index fits in. This can be used to reduce LMUL
10107// for operations like vslidedown.
10108//
10109// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10110static std::optional<MVT>
10111getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10112 const RISCVSubtarget &Subtarget) {
10113 assert(VecVT.isScalableVector());
10114 const unsigned EltSize = VecVT.getScalarSizeInBits();
10115 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10116 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10117 MVT SmallerVT;
10118 if (MaxIdx < MinVLMAX)
10119 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10120 else if (MaxIdx < MinVLMAX * 2)
10121 SmallerVT =
10123 else if (MaxIdx < MinVLMAX * 4)
10124 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10127 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10128 return std::nullopt;
10129 return SmallerVT;
10130}
10131
10133 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10134 if (!IdxC || isNullConstant(Idx))
10135 return false;
10136 return isUInt<5>(IdxC->getZExtValue());
10137}
10138
10139// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10140// first position of a vector, and that vector is slid up to the insert index.
10141// By limiting the active vector length to index+1 and merging with the
10142// original vector (with an undisturbed tail policy for elements >= VL), we
10143// achieve the desired result of leaving all elements untouched except the one
10144// at VL-1, which is replaced with the desired value.
10145SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10146 SelectionDAG &DAG) const {
10147 SDLoc DL(Op);
10148 MVT VecVT = Op.getSimpleValueType();
10149 MVT XLenVT = Subtarget.getXLenVT();
10150 SDValue Vec = Op.getOperand(0);
10151 SDValue Val = Op.getOperand(1);
10152 MVT ValVT = Val.getSimpleValueType();
10153 SDValue Idx = Op.getOperand(2);
10154
10155 if (VecVT.getVectorElementType() == MVT::i1) {
10156 // FIXME: For now we just promote to an i8 vector and insert into that,
10157 // but this is probably not optimal.
10158 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10159 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10160 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10161 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10162 }
10163
10164 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10165 ValVT == MVT::bf16) {
10166 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10167 MVT IntVT = VecVT.changeTypeToInteger();
10168 SDValue IntInsert = DAG.getNode(
10169 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10170 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10171 return DAG.getBitcast(VecVT, IntInsert);
10172 }
10173
10174 MVT ContainerVT = VecVT;
10175 // If the operand is a fixed-length vector, convert to a scalable one.
10176 if (VecVT.isFixedLengthVector()) {
10177 ContainerVT = getContainerForFixedLengthVector(VecVT);
10178 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10179 }
10180
10181 // If we know the index we're going to insert at, we can shrink Vec so that
10182 // we're performing the scalar inserts and slideup on a smaller LMUL.
10183 SDValue OrigVec = Vec;
10184 std::optional<unsigned> AlignedIdx;
10185 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10186 const unsigned OrigIdx = IdxC->getZExtValue();
10187 // Do we know an upper bound on LMUL?
10188 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10189 DL, DAG, Subtarget)) {
10190 ContainerVT = *ShrunkVT;
10191 AlignedIdx = 0;
10192 }
10193
10194 // If we're compiling for an exact VLEN value, we can always perform
10195 // the insert in m1 as we can determine the register corresponding to
10196 // the index in the register group.
10197 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10198 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10199 EVT ElemVT = VecVT.getVectorElementType();
10200 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10201 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10202 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10203 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10204 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10205 ContainerVT = M1VT;
10206 }
10207
10208 if (AlignedIdx)
10209 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10210 }
10211
10212 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10213 // Even i64-element vectors on RV32 can be lowered without scalar
10214 // legalization if the most-significant 32 bits of the value are not affected
10215 // by the sign-extension of the lower 32 bits.
10216 // TODO: We could also catch sign extensions of a 32-bit value.
10217 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10218 const auto *CVal = cast<ConstantSDNode>(Val);
10219 if (isInt<32>(CVal->getSExtValue())) {
10220 IsLegalInsert = true;
10221 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10222 }
10223 }
10224
10225 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10226
10227 SDValue ValInVec;
10228
10229 if (IsLegalInsert) {
10230 unsigned Opc =
10231 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10232 if (isNullConstant(Idx)) {
10233 if (!VecVT.isFloatingPoint())
10234 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10235 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10236
10237 if (AlignedIdx)
10238 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10239 if (!VecVT.isFixedLengthVector())
10240 return Vec;
10241 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10242 }
10243
10244 // Use ri.vinsert.v.x if available.
10245 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10247 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10248 SDValue PolicyOp =
10250 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10251 VL, PolicyOp);
10252 if (AlignedIdx)
10253 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10254 if (!VecVT.isFixedLengthVector())
10255 return Vec;
10256 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10257 }
10258
10259 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10260 } else {
10261 // On RV32, i64-element vectors must be specially handled to place the
10262 // value at element 0, by using two vslide1down instructions in sequence on
10263 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10264 // this.
10265 SDValue ValLo, ValHi;
10266 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10267 MVT I32ContainerVT =
10268 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10269 SDValue I32Mask =
10270 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10271 // Limit the active VL to two.
10272 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10273 // If the Idx is 0 we can insert directly into the vector.
10274 if (isNullConstant(Idx)) {
10275 // First slide in the lo value, then the hi in above it. We use slide1down
10276 // to avoid the register group overlap constraint of vslide1up.
10277 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10278 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10279 // If the source vector is undef don't pass along the tail elements from
10280 // the previous slide1down.
10281 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10282 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10283 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10284 // Bitcast back to the right container type.
10285 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10286
10287 if (AlignedIdx)
10288 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10289 if (!VecVT.isFixedLengthVector())
10290 return ValInVec;
10291 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10292 }
10293
10294 // First slide in the lo value, then the hi in above it. We use slide1down
10295 // to avoid the register group overlap constraint of vslide1up.
10296 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10297 DAG.getUNDEF(I32ContainerVT),
10298 DAG.getUNDEF(I32ContainerVT), ValLo,
10299 I32Mask, InsertI64VL);
10300 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10301 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10302 I32Mask, InsertI64VL);
10303 // Bitcast back to the right container type.
10304 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10305 }
10306
10307 // Now that the value is in a vector, slide it into position.
10308 SDValue InsertVL =
10309 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10310
10311 // Use tail agnostic policy if Idx is the last index of Vec.
10313 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10314 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10316 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10317 Idx, Mask, InsertVL, Policy);
10318
10319 if (AlignedIdx)
10320 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10321 if (!VecVT.isFixedLengthVector())
10322 return Slideup;
10323 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10324}
10325
10326// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10327// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10328// types this is done using VMV_X_S to allow us to glean information about the
10329// sign bits of the result.
10330SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10331 SelectionDAG &DAG) const {
10332 SDLoc DL(Op);
10333 SDValue Idx = Op.getOperand(1);
10334 SDValue Vec = Op.getOperand(0);
10335 EVT EltVT = Op.getValueType();
10336 MVT VecVT = Vec.getSimpleValueType();
10337 MVT XLenVT = Subtarget.getXLenVT();
10338
10339 if (VecVT.getVectorElementType() == MVT::i1) {
10340 // Use vfirst.m to extract the first bit.
10341 if (isNullConstant(Idx)) {
10342 MVT ContainerVT = VecVT;
10343 if (VecVT.isFixedLengthVector()) {
10344 ContainerVT = getContainerForFixedLengthVector(VecVT);
10345 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10346 }
10347 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10348 SDValue Vfirst =
10349 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10350 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10351 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10352 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10353 }
10354 if (VecVT.isFixedLengthVector()) {
10355 unsigned NumElts = VecVT.getVectorNumElements();
10356 if (NumElts >= 8) {
10357 MVT WideEltVT;
10358 unsigned WidenVecLen;
10359 SDValue ExtractElementIdx;
10360 SDValue ExtractBitIdx;
10361 unsigned MaxEEW = Subtarget.getELen();
10362 MVT LargestEltVT = MVT::getIntegerVT(
10363 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10364 if (NumElts <= LargestEltVT.getSizeInBits()) {
10365 assert(isPowerOf2_32(NumElts) &&
10366 "the number of elements should be power of 2");
10367 WideEltVT = MVT::getIntegerVT(NumElts);
10368 WidenVecLen = 1;
10369 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10370 ExtractBitIdx = Idx;
10371 } else {
10372 WideEltVT = LargestEltVT;
10373 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10374 // extract element index = index / element width
10375 ExtractElementIdx = DAG.getNode(
10376 ISD::SRL, DL, XLenVT, Idx,
10377 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10378 // mask bit index = index % element width
10379 ExtractBitIdx = DAG.getNode(
10380 ISD::AND, DL, XLenVT, Idx,
10381 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10382 }
10383 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10384 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10385 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10386 Vec, ExtractElementIdx);
10387 // Extract the bit from GPR.
10388 SDValue ShiftRight =
10389 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10390 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10391 DAG.getConstant(1, DL, XLenVT));
10392 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10393 }
10394 }
10395 // Otherwise, promote to an i8 vector and extract from that.
10396 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10397 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10398 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10399 }
10400
10401 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10402 EltVT == MVT::bf16) {
10403 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10404 MVT IntVT = VecVT.changeTypeToInteger();
10405 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10406 SDValue IntExtract =
10407 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10408 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10409 }
10410
10411 // If this is a fixed vector, we need to convert it to a scalable vector.
10412 MVT ContainerVT = VecVT;
10413 if (VecVT.isFixedLengthVector()) {
10414 ContainerVT = getContainerForFixedLengthVector(VecVT);
10415 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10416 }
10417
10418 // If we're compiling for an exact VLEN value and we have a known
10419 // constant index, we can always perform the extract in m1 (or
10420 // smaller) as we can determine the register corresponding to
10421 // the index in the register group.
10422 const auto VLen = Subtarget.getRealVLen();
10423 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10424 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10425 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10426 unsigned OrigIdx = IdxC->getZExtValue();
10427 EVT ElemVT = VecVT.getVectorElementType();
10428 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10429 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10430 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10431 unsigned ExtractIdx =
10432 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10433 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10434 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10435 ContainerVT = M1VT;
10436 }
10437
10438 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10439 // contains our index.
10440 std::optional<uint64_t> MaxIdx;
10441 if (VecVT.isFixedLengthVector())
10442 MaxIdx = VecVT.getVectorNumElements() - 1;
10443 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10444 MaxIdx = IdxC->getZExtValue();
10445 if (MaxIdx) {
10446 if (auto SmallerVT =
10447 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10448 ContainerVT = *SmallerVT;
10449 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10450 }
10451 }
10452
10453 // Use ri.vextract.x.v if available.
10454 // TODO: Avoid index 0 and just use the vmv.x.s
10455 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10457 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10458 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10459 }
10460
10461 // If after narrowing, the required slide is still greater than LMUL2,
10462 // fallback to generic expansion and go through the stack. This is done
10463 // for a subtle reason: extracting *all* elements out of a vector is
10464 // widely expected to be linear in vector size, but because vslidedown
10465 // is linear in LMUL, performing N extracts using vslidedown becomes
10466 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10467 // seems to have the same problem (the store is linear in LMUL), but the
10468 // generic expansion *memoizes* the store, and thus for many extracts of
10469 // the same vector we end up with one store and a bunch of loads.
10470 // TODO: We don't have the same code for insert_vector_elt because we
10471 // have BUILD_VECTOR and handle the degenerate case there. Should we
10472 // consider adding an inverse BUILD_VECTOR node?
10473 MVT LMUL2VT =
10475 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10476 return SDValue();
10477
10478 // If the index is 0, the vector is already in the right position.
10479 if (!isNullConstant(Idx)) {
10480 // Use a VL of 1 to avoid processing more elements than we need.
10481 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10482 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10483 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10484 }
10485
10486 if (!EltVT.isInteger()) {
10487 // Floating-point extracts are handled in TableGen.
10488 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10489 }
10490
10491 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10492 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10493}
10494
10495// Some RVV intrinsics may claim that they want an integer operand to be
10496// promoted or expanded.
10498 const RISCVSubtarget &Subtarget) {
10499 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10500 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10501 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10502 "Unexpected opcode");
10503
10504 if (!Subtarget.hasVInstructions())
10505 return SDValue();
10506
10507 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10508 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10509 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10510
10511 SDLoc DL(Op);
10512
10514 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10515 if (!II || !II->hasScalarOperand())
10516 return SDValue();
10517
10518 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10519 assert(SplatOp < Op.getNumOperands());
10520
10522 SDValue &ScalarOp = Operands[SplatOp];
10523 MVT OpVT = ScalarOp.getSimpleValueType();
10524 MVT XLenVT = Subtarget.getXLenVT();
10525
10526 // If this isn't a scalar, or its type is XLenVT we're done.
10527 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10528 return SDValue();
10529
10530 // Simplest case is that the operand needs to be promoted to XLenVT.
10531 if (OpVT.bitsLT(XLenVT)) {
10532 // If the operand is a constant, sign extend to increase our chances
10533 // of being able to use a .vi instruction. ANY_EXTEND would become a
10534 // a zero extend and the simm5 check in isel would fail.
10535 // FIXME: Should we ignore the upper bits in isel instead?
10536 unsigned ExtOpc =
10538 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10539 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10540 }
10541
10542 // Use the previous operand to get the vXi64 VT. The result might be a mask
10543 // VT for compares. Using the previous operand assumes that the previous
10544 // operand will never have a smaller element size than a scalar operand and
10545 // that a widening operation never uses SEW=64.
10546 // NOTE: If this fails the below assert, we can probably just find the
10547 // element count from any operand or result and use it to construct the VT.
10548 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10549 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10550
10551 // The more complex case is when the scalar is larger than XLenVT.
10552 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10553 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10554
10555 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10556 // instruction to sign-extend since SEW>XLEN.
10557 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10558 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10559 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10560 }
10561
10562 switch (IntNo) {
10563 case Intrinsic::riscv_vslide1up:
10564 case Intrinsic::riscv_vslide1down:
10565 case Intrinsic::riscv_vslide1up_mask:
10566 case Intrinsic::riscv_vslide1down_mask: {
10567 // We need to special case these when the scalar is larger than XLen.
10568 unsigned NumOps = Op.getNumOperands();
10569 bool IsMasked = NumOps == 7;
10570
10571 // Convert the vector source to the equivalent nxvXi32 vector.
10572 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10573 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10574 SDValue ScalarLo, ScalarHi;
10575 std::tie(ScalarLo, ScalarHi) =
10576 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10577
10578 // Double the VL since we halved SEW.
10579 SDValue AVL = getVLOperand(Op);
10580 SDValue I32VL;
10581
10582 // Optimize for constant AVL
10583 if (isa<ConstantSDNode>(AVL)) {
10584 const auto [MinVLMAX, MaxVLMAX] =
10586
10587 uint64_t AVLInt = AVL->getAsZExtVal();
10588 if (AVLInt <= MinVLMAX) {
10589 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10590 } else if (AVLInt >= 2 * MaxVLMAX) {
10591 // Just set vl to VLMAX in this situation
10592 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10593 } else {
10594 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10595 // is related to the hardware implementation.
10596 // So let the following code handle
10597 }
10598 }
10599 if (!I32VL) {
10601 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10602 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10603 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10604 SDValue SETVL =
10605 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10606 // Using vsetvli instruction to get actually used length which related to
10607 // the hardware implementation
10608 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10609 SEW, LMUL);
10610 I32VL =
10611 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10612 }
10613
10614 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10615
10616 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10617 // instructions.
10618 SDValue Passthru;
10619 if (IsMasked)
10620 Passthru = DAG.getUNDEF(I32VT);
10621 else
10622 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10623
10624 if (IntNo == Intrinsic::riscv_vslide1up ||
10625 IntNo == Intrinsic::riscv_vslide1up_mask) {
10626 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10627 ScalarHi, I32Mask, I32VL);
10628 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10629 ScalarLo, I32Mask, I32VL);
10630 } else {
10631 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10632 ScalarLo, I32Mask, I32VL);
10633 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10634 ScalarHi, I32Mask, I32VL);
10635 }
10636
10637 // Convert back to nxvXi64.
10638 Vec = DAG.getBitcast(VT, Vec);
10639
10640 if (!IsMasked)
10641 return Vec;
10642 // Apply mask after the operation.
10643 SDValue Mask = Operands[NumOps - 3];
10644 SDValue MaskedOff = Operands[1];
10645 // Assume Policy operand is the last operand.
10646 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10647 // We don't need to select maskedoff if it's undef.
10648 if (MaskedOff.isUndef())
10649 return Vec;
10650 // TAMU
10651 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10652 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10653 DAG.getUNDEF(VT), AVL);
10654 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10655 // It's fine because vmerge does not care mask policy.
10656 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10657 MaskedOff, AVL);
10658 }
10659 }
10660
10661 // We need to convert the scalar to a splat vector.
10662 SDValue VL = getVLOperand(Op);
10663 assert(VL.getValueType() == XLenVT);
10664 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10665 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10666}
10667
10668// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10669// scalable vector llvm.get.vector.length for now.
10670//
10671// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10672// (vscale * VF). The vscale and VF are independent of element width. We use
10673// SEW=8 for the vsetvli because it is the only element width that supports all
10674// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10675// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10676// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10677// SEW and LMUL are better for the surrounding vector instructions.
10679 const RISCVSubtarget &Subtarget) {
10680 MVT XLenVT = Subtarget.getXLenVT();
10681
10682 // The smallest LMUL is only valid for the smallest element width.
10683 const unsigned ElementWidth = 8;
10684
10685 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10686 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10687 // We don't support VF==1 with ELEN==32.
10688 [[maybe_unused]] unsigned MinVF =
10689 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10690
10691 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10692 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10693 "Unexpected VF");
10694
10695 bool Fractional = VF < LMul1VF;
10696 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10697 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10698 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10699
10700 SDLoc DL(N);
10701
10702 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10703 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10704
10705 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10706
10707 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10708 SDValue Res =
10709 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10710 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10711}
10712
10714 const RISCVSubtarget &Subtarget) {
10715 SDValue Op0 = N->getOperand(1);
10716 MVT OpVT = Op0.getSimpleValueType();
10717 MVT ContainerVT = OpVT;
10718 if (OpVT.isFixedLengthVector()) {
10719 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10720 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10721 }
10722 MVT XLenVT = Subtarget.getXLenVT();
10723 SDLoc DL(N);
10724 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10725 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10726 if (isOneConstant(N->getOperand(2)))
10727 return Res;
10728
10729 // Convert -1 to VL.
10730 SDValue Setcc =
10731 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10732 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10733 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10734}
10735
10736static inline void promoteVCIXScalar(SDValue Op,
10738 SelectionDAG &DAG) {
10739 const RISCVSubtarget &Subtarget =
10741
10742 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10743 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10744 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10745 SDLoc DL(Op);
10746
10748 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10749 if (!II || !II->hasScalarOperand())
10750 return;
10751
10752 unsigned SplatOp = II->ScalarOperand + 1;
10753 assert(SplatOp < Op.getNumOperands());
10754
10755 SDValue &ScalarOp = Operands[SplatOp];
10756 MVT OpVT = ScalarOp.getSimpleValueType();
10757 MVT XLenVT = Subtarget.getXLenVT();
10758
10759 // The code below is partially copied from lowerVectorIntrinsicScalars.
10760 // If this isn't a scalar, or its type is XLenVT we're done.
10761 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10762 return;
10763
10764 // Manually emit promote operation for scalar operation.
10765 if (OpVT.bitsLT(XLenVT)) {
10766 unsigned ExtOpc =
10768 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10769 }
10770}
10771
10772static void processVCIXOperands(SDValue OrigOp,
10774 SelectionDAG &DAG) {
10775 promoteVCIXScalar(OrigOp, Operands, DAG);
10776 const RISCVSubtarget &Subtarget =
10778 for (SDValue &V : Operands) {
10779 EVT ValType = V.getValueType();
10780 if (ValType.isVector() && ValType.isFloatingPoint()) {
10781 MVT InterimIVT =
10782 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10783 ValType.getVectorElementCount());
10784 V = DAG.getBitcast(InterimIVT, V);
10785 }
10786 if (ValType.isFixedLengthVector()) {
10787 MVT OpContainerVT = getContainerForFixedLengthVector(
10788 DAG, V.getSimpleValueType(), Subtarget);
10789 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10790 }
10791 }
10792}
10793
10794// LMUL * VLEN should be greater than or equal to EGS * SEW
10795static inline bool isValidEGW(int EGS, EVT VT,
10796 const RISCVSubtarget &Subtarget) {
10797 return (Subtarget.getRealMinVLen() *
10799 EGS * VT.getScalarSizeInBits();
10800}
10801
10802SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10803 SelectionDAG &DAG) const {
10804 unsigned IntNo = Op.getConstantOperandVal(0);
10805 SDLoc DL(Op);
10806 MVT XLenVT = Subtarget.getXLenVT();
10807
10808 switch (IntNo) {
10809 default:
10810 break; // Don't custom lower most intrinsics.
10811 case Intrinsic::riscv_tuple_insert: {
10812 SDValue Vec = Op.getOperand(1);
10813 SDValue SubVec = Op.getOperand(2);
10814 SDValue Index = Op.getOperand(3);
10815
10816 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10817 SubVec, Index);
10818 }
10819 case Intrinsic::riscv_tuple_extract: {
10820 SDValue Vec = Op.getOperand(1);
10821 SDValue Index = Op.getOperand(2);
10822
10823 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10824 Index);
10825 }
10826 case Intrinsic::thread_pointer: {
10827 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10828 return DAG.getRegister(RISCV::X4, PtrVT);
10829 }
10830 case Intrinsic::riscv_orc_b:
10831 case Intrinsic::riscv_brev8:
10832 case Intrinsic::riscv_sha256sig0:
10833 case Intrinsic::riscv_sha256sig1:
10834 case Intrinsic::riscv_sha256sum0:
10835 case Intrinsic::riscv_sha256sum1:
10836 case Intrinsic::riscv_sm3p0:
10837 case Intrinsic::riscv_sm3p1: {
10838 unsigned Opc;
10839 switch (IntNo) {
10840 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10841 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10842 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10843 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10844 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10845 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10846 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10847 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10848 }
10849
10850 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10851 }
10852 case Intrinsic::riscv_sm4ks:
10853 case Intrinsic::riscv_sm4ed: {
10854 unsigned Opc =
10855 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10856
10857 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10858 Op.getOperand(3));
10859 }
10860 case Intrinsic::riscv_zip:
10861 case Intrinsic::riscv_unzip: {
10862 unsigned Opc =
10863 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10864 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10865 }
10866 case Intrinsic::riscv_mopr:
10867 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10868 Op.getOperand(2));
10869
10870 case Intrinsic::riscv_moprr: {
10871 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10872 Op.getOperand(2), Op.getOperand(3));
10873 }
10874 case Intrinsic::riscv_clmul:
10875 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10876 Op.getOperand(2));
10877 case Intrinsic::riscv_clmulh:
10878 case Intrinsic::riscv_clmulr: {
10879 unsigned Opc =
10880 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10881 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10882 }
10883 case Intrinsic::experimental_get_vector_length:
10884 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10885 case Intrinsic::experimental_cttz_elts:
10886 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10887 case Intrinsic::riscv_vmv_x_s: {
10888 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10889 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10890 }
10891 case Intrinsic::riscv_vfmv_f_s:
10892 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10893 case Intrinsic::riscv_vmv_v_x:
10894 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10895 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10896 Subtarget);
10897 case Intrinsic::riscv_vfmv_v_f:
10898 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10899 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10900 case Intrinsic::riscv_vmv_s_x: {
10901 SDValue Scalar = Op.getOperand(2);
10902
10903 if (Scalar.getValueType().bitsLE(XLenVT)) {
10904 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10905 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10906 Op.getOperand(1), Scalar, Op.getOperand(3));
10907 }
10908
10909 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10910
10911 // This is an i64 value that lives in two scalar registers. We have to
10912 // insert this in a convoluted way. First we build vXi64 splat containing
10913 // the two values that we assemble using some bit math. Next we'll use
10914 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10915 // to merge element 0 from our splat into the source vector.
10916 // FIXME: This is probably not the best way to do this, but it is
10917 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10918 // point.
10919 // sw lo, (a0)
10920 // sw hi, 4(a0)
10921 // vlse vX, (a0)
10922 //
10923 // vid.v vVid
10924 // vmseq.vx mMask, vVid, 0
10925 // vmerge.vvm vDest, vSrc, vVal, mMask
10926 MVT VT = Op.getSimpleValueType();
10927 SDValue Vec = Op.getOperand(1);
10928 SDValue VL = getVLOperand(Op);
10929
10930 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10931 if (Op.getOperand(1).isUndef())
10932 return SplattedVal;
10933 SDValue SplattedIdx =
10934 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10935 DAG.getConstant(0, DL, MVT::i32), VL);
10936
10937 MVT MaskVT = getMaskTypeFor(VT);
10938 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10939 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10940 SDValue SelectCond =
10941 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10942 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10943 DAG.getUNDEF(MaskVT), Mask, VL});
10944 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10945 Vec, DAG.getUNDEF(VT), VL);
10946 }
10947 case Intrinsic::riscv_vfmv_s_f:
10948 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10949 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10950 // EGS * EEW >= 128 bits
10951 case Intrinsic::riscv_vaesdf_vv:
10952 case Intrinsic::riscv_vaesdf_vs:
10953 case Intrinsic::riscv_vaesdm_vv:
10954 case Intrinsic::riscv_vaesdm_vs:
10955 case Intrinsic::riscv_vaesef_vv:
10956 case Intrinsic::riscv_vaesef_vs:
10957 case Intrinsic::riscv_vaesem_vv:
10958 case Intrinsic::riscv_vaesem_vs:
10959 case Intrinsic::riscv_vaeskf1:
10960 case Intrinsic::riscv_vaeskf2:
10961 case Intrinsic::riscv_vaesz_vs:
10962 case Intrinsic::riscv_vsm4k:
10963 case Intrinsic::riscv_vsm4r_vv:
10964 case Intrinsic::riscv_vsm4r_vs: {
10965 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10966 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10967 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10968 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10969 return Op;
10970 }
10971 // EGS * EEW >= 256 bits
10972 case Intrinsic::riscv_vsm3c:
10973 case Intrinsic::riscv_vsm3me: {
10974 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10975 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10976 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10977 return Op;
10978 }
10979 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10980 case Intrinsic::riscv_vsha2ch:
10981 case Intrinsic::riscv_vsha2cl:
10982 case Intrinsic::riscv_vsha2ms: {
10983 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10984 !Subtarget.hasStdExtZvknhb())
10985 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10986 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10987 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10988 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10989 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10990 return Op;
10991 }
10992 case Intrinsic::riscv_sf_vc_v_x:
10993 case Intrinsic::riscv_sf_vc_v_i:
10994 case Intrinsic::riscv_sf_vc_v_xv:
10995 case Intrinsic::riscv_sf_vc_v_iv:
10996 case Intrinsic::riscv_sf_vc_v_vv:
10997 case Intrinsic::riscv_sf_vc_v_fv:
10998 case Intrinsic::riscv_sf_vc_v_xvv:
10999 case Intrinsic::riscv_sf_vc_v_ivv:
11000 case Intrinsic::riscv_sf_vc_v_vvv:
11001 case Intrinsic::riscv_sf_vc_v_fvv:
11002 case Intrinsic::riscv_sf_vc_v_xvw:
11003 case Intrinsic::riscv_sf_vc_v_ivw:
11004 case Intrinsic::riscv_sf_vc_v_vvw:
11005 case Intrinsic::riscv_sf_vc_v_fvw: {
11006 MVT VT = Op.getSimpleValueType();
11007
11008 SmallVector<SDValue> Operands{Op->op_values()};
11010
11011 MVT RetVT = VT;
11012 if (VT.isFixedLengthVector())
11014 else if (VT.isFloatingPoint())
11017
11018 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11019
11020 if (VT.isFixedLengthVector())
11021 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11022 else if (VT.isFloatingPoint())
11023 NewNode = DAG.getBitcast(VT, NewNode);
11024
11025 if (Op == NewNode)
11026 break;
11027
11028 return NewNode;
11029 }
11030 }
11031
11032 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11033}
11034
11036 unsigned Type) {
11037 SDLoc DL(Op);
11038 SmallVector<SDValue> Operands{Op->op_values()};
11039 Operands.erase(Operands.begin() + 1);
11040
11041 const RISCVSubtarget &Subtarget =
11043 MVT VT = Op.getSimpleValueType();
11044 MVT RetVT = VT;
11045 MVT FloatVT = VT;
11046
11047 if (VT.isFloatingPoint()) {
11048 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11049 VT.getVectorElementCount());
11050 FloatVT = RetVT;
11051 }
11052 if (VT.isFixedLengthVector())
11054 Subtarget);
11055
11057
11058 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11059 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11060 SDValue Chain = NewNode.getValue(1);
11061
11062 if (VT.isFixedLengthVector())
11063 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11064 if (VT.isFloatingPoint())
11065 NewNode = DAG.getBitcast(VT, NewNode);
11066
11067 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11068
11069 return NewNode;
11070}
11071
11073 unsigned Type) {
11074 SmallVector<SDValue> Operands{Op->op_values()};
11075 Operands.erase(Operands.begin() + 1);
11077
11078 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11079}
11080
11081static SDValue
11083 const RISCVSubtarget &Subtarget,
11084 SelectionDAG &DAG) {
11085 bool IsStrided;
11086 switch (IntNo) {
11087 case Intrinsic::riscv_seg2_load_mask:
11088 case Intrinsic::riscv_seg3_load_mask:
11089 case Intrinsic::riscv_seg4_load_mask:
11090 case Intrinsic::riscv_seg5_load_mask:
11091 case Intrinsic::riscv_seg6_load_mask:
11092 case Intrinsic::riscv_seg7_load_mask:
11093 case Intrinsic::riscv_seg8_load_mask:
11094 IsStrided = false;
11095 break;
11096 case Intrinsic::riscv_sseg2_load_mask:
11097 case Intrinsic::riscv_sseg3_load_mask:
11098 case Intrinsic::riscv_sseg4_load_mask:
11099 case Intrinsic::riscv_sseg5_load_mask:
11100 case Intrinsic::riscv_sseg6_load_mask:
11101 case Intrinsic::riscv_sseg7_load_mask:
11102 case Intrinsic::riscv_sseg8_load_mask:
11103 IsStrided = true;
11104 break;
11105 default:
11106 llvm_unreachable("unexpected intrinsic ID");
11107 };
11108
11109 static const Intrinsic::ID VlsegInts[7] = {
11110 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11111 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11112 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11113 Intrinsic::riscv_vlseg8_mask};
11114 static const Intrinsic::ID VlssegInts[7] = {
11115 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11116 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11117 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11118 Intrinsic::riscv_vlsseg8_mask};
11119
11120 SDLoc DL(Op);
11121 unsigned NF = Op->getNumValues() - 1;
11122 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11123 MVT XLenVT = Subtarget.getXLenVT();
11124 MVT VT = Op->getSimpleValueType(0);
11125 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11126 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11127 ContainerVT.getScalarSizeInBits();
11128 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11129
11130 // Operands: (chain, int_id, pointer, mask, vl) or
11131 // (chain, int_id, pointer, offset, mask, vl)
11132 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11133 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11134 MVT MaskVT = Mask.getSimpleValueType();
11135 MVT MaskContainerVT =
11136 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11137 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11138
11139 SDValue IntID = DAG.getTargetConstant(
11140 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11141 auto *Load = cast<MemIntrinsicSDNode>(Op);
11142
11143 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11145 Load->getChain(),
11146 IntID,
11147 DAG.getUNDEF(VecTupTy),
11148 Op.getOperand(2),
11149 Mask,
11150 VL,
11153 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11154 // Insert the stride operand.
11155 if (IsStrided)
11156 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11157
11158 SDValue Result =
11160 Load->getMemoryVT(), Load->getMemOperand());
11162 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11163 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11164 Result.getValue(0),
11165 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11166 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11167 }
11168 Results.push_back(Result.getValue(1));
11169 return DAG.getMergeValues(Results, DL);
11170}
11171
11172SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11173 SelectionDAG &DAG) const {
11174 unsigned IntNo = Op.getConstantOperandVal(1);
11175 switch (IntNo) {
11176 default:
11177 break;
11178 case Intrinsic::riscv_seg2_load_mask:
11179 case Intrinsic::riscv_seg3_load_mask:
11180 case Intrinsic::riscv_seg4_load_mask:
11181 case Intrinsic::riscv_seg5_load_mask:
11182 case Intrinsic::riscv_seg6_load_mask:
11183 case Intrinsic::riscv_seg7_load_mask:
11184 case Intrinsic::riscv_seg8_load_mask:
11185 case Intrinsic::riscv_sseg2_load_mask:
11186 case Intrinsic::riscv_sseg3_load_mask:
11187 case Intrinsic::riscv_sseg4_load_mask:
11188 case Intrinsic::riscv_sseg5_load_mask:
11189 case Intrinsic::riscv_sseg6_load_mask:
11190 case Intrinsic::riscv_sseg7_load_mask:
11191 case Intrinsic::riscv_sseg8_load_mask:
11192 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11193
11194 case Intrinsic::riscv_sf_vc_v_x_se:
11195 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11196 case Intrinsic::riscv_sf_vc_v_i_se:
11197 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11198 case Intrinsic::riscv_sf_vc_v_xv_se:
11199 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11200 case Intrinsic::riscv_sf_vc_v_iv_se:
11201 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11202 case Intrinsic::riscv_sf_vc_v_vv_se:
11203 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11204 case Intrinsic::riscv_sf_vc_v_fv_se:
11205 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11206 case Intrinsic::riscv_sf_vc_v_xvv_se:
11207 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11208 case Intrinsic::riscv_sf_vc_v_ivv_se:
11209 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11210 case Intrinsic::riscv_sf_vc_v_vvv_se:
11211 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11212 case Intrinsic::riscv_sf_vc_v_fvv_se:
11213 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11214 case Intrinsic::riscv_sf_vc_v_xvw_se:
11215 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11216 case Intrinsic::riscv_sf_vc_v_ivw_se:
11217 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11218 case Intrinsic::riscv_sf_vc_v_vvw_se:
11219 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11220 case Intrinsic::riscv_sf_vc_v_fvw_se:
11221 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11222 }
11223
11224 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11225}
11226
11227static SDValue
11229 const RISCVSubtarget &Subtarget,
11230 SelectionDAG &DAG) {
11231 bool IsStrided;
11232 switch (IntNo) {
11233 case Intrinsic::riscv_seg2_store_mask:
11234 case Intrinsic::riscv_seg3_store_mask:
11235 case Intrinsic::riscv_seg4_store_mask:
11236 case Intrinsic::riscv_seg5_store_mask:
11237 case Intrinsic::riscv_seg6_store_mask:
11238 case Intrinsic::riscv_seg7_store_mask:
11239 case Intrinsic::riscv_seg8_store_mask:
11240 IsStrided = false;
11241 break;
11242 case Intrinsic::riscv_sseg2_store_mask:
11243 case Intrinsic::riscv_sseg3_store_mask:
11244 case Intrinsic::riscv_sseg4_store_mask:
11245 case Intrinsic::riscv_sseg5_store_mask:
11246 case Intrinsic::riscv_sseg6_store_mask:
11247 case Intrinsic::riscv_sseg7_store_mask:
11248 case Intrinsic::riscv_sseg8_store_mask:
11249 IsStrided = true;
11250 break;
11251 default:
11252 llvm_unreachable("unexpected intrinsic ID");
11253 }
11254
11255 SDLoc DL(Op);
11256 static const Intrinsic::ID VssegInts[] = {
11257 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11258 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11259 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11260 Intrinsic::riscv_vsseg8_mask};
11261 static const Intrinsic::ID VsssegInts[] = {
11262 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11263 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11264 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11265 Intrinsic::riscv_vssseg8_mask};
11266
11267 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11268 // (chain, int_id, vec*, ptr, stride, mask, vl)
11269 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11270 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11271 MVT XLenVT = Subtarget.getXLenVT();
11272 MVT VT = Op->getOperand(2).getSimpleValueType();
11273 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11274 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11275 ContainerVT.getScalarSizeInBits();
11276 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11277
11278 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11279 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11280 MVT MaskVT = Mask.getSimpleValueType();
11281 MVT MaskContainerVT =
11282 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11283 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11284
11285 SDValue IntID = DAG.getTargetConstant(
11286 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11287 SDValue Ptr = Op->getOperand(NF + 2);
11288
11289 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11290
11291 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11292 for (unsigned i = 0; i < NF; i++)
11293 StoredVal = DAG.getNode(
11294 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11295 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11296 DAG, Subtarget),
11297 DAG.getTargetConstant(i, DL, MVT::i32));
11298
11300 FixedIntrinsic->getChain(),
11301 IntID,
11302 StoredVal,
11303 Ptr,
11304 Mask,
11305 VL,
11306 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11307 // Insert the stride operand.
11308 if (IsStrided)
11309 Ops.insert(std::next(Ops.begin(), 4),
11310 Op.getOperand(Op.getNumOperands() - 3));
11311
11312 return DAG.getMemIntrinsicNode(
11313 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11314 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11315}
11316
11317SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11318 SelectionDAG &DAG) const {
11319 unsigned IntNo = Op.getConstantOperandVal(1);
11320 switch (IntNo) {
11321 default:
11322 break;
11323 case Intrinsic::riscv_seg2_store_mask:
11324 case Intrinsic::riscv_seg3_store_mask:
11325 case Intrinsic::riscv_seg4_store_mask:
11326 case Intrinsic::riscv_seg5_store_mask:
11327 case Intrinsic::riscv_seg6_store_mask:
11328 case Intrinsic::riscv_seg7_store_mask:
11329 case Intrinsic::riscv_seg8_store_mask:
11330 case Intrinsic::riscv_sseg2_store_mask:
11331 case Intrinsic::riscv_sseg3_store_mask:
11332 case Intrinsic::riscv_sseg4_store_mask:
11333 case Intrinsic::riscv_sseg5_store_mask:
11334 case Intrinsic::riscv_sseg6_store_mask:
11335 case Intrinsic::riscv_sseg7_store_mask:
11336 case Intrinsic::riscv_sseg8_store_mask:
11337 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11338
11339 case Intrinsic::riscv_sf_vc_xv_se:
11340 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11341 case Intrinsic::riscv_sf_vc_iv_se:
11342 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11343 case Intrinsic::riscv_sf_vc_vv_se:
11344 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11345 case Intrinsic::riscv_sf_vc_fv_se:
11346 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11347 case Intrinsic::riscv_sf_vc_xvv_se:
11348 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11349 case Intrinsic::riscv_sf_vc_ivv_se:
11350 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11351 case Intrinsic::riscv_sf_vc_vvv_se:
11352 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11353 case Intrinsic::riscv_sf_vc_fvv_se:
11354 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11355 case Intrinsic::riscv_sf_vc_xvw_se:
11356 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11357 case Intrinsic::riscv_sf_vc_ivw_se:
11358 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11359 case Intrinsic::riscv_sf_vc_vvw_se:
11360 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11361 case Intrinsic::riscv_sf_vc_fvw_se:
11362 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11363 }
11364
11365 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11366}
11367
11368static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11369 switch (ISDOpcode) {
11370 default:
11371 llvm_unreachable("Unhandled reduction");
11372 case ISD::VP_REDUCE_ADD:
11373 case ISD::VECREDUCE_ADD:
11374 return RISCVISD::VECREDUCE_ADD_VL;
11375 case ISD::VP_REDUCE_UMAX:
11376 case ISD::VECREDUCE_UMAX:
11377 return RISCVISD::VECREDUCE_UMAX_VL;
11378 case ISD::VP_REDUCE_SMAX:
11379 case ISD::VECREDUCE_SMAX:
11380 return RISCVISD::VECREDUCE_SMAX_VL;
11381 case ISD::VP_REDUCE_UMIN:
11382 case ISD::VECREDUCE_UMIN:
11383 return RISCVISD::VECREDUCE_UMIN_VL;
11384 case ISD::VP_REDUCE_SMIN:
11385 case ISD::VECREDUCE_SMIN:
11386 return RISCVISD::VECREDUCE_SMIN_VL;
11387 case ISD::VP_REDUCE_AND:
11388 case ISD::VECREDUCE_AND:
11389 return RISCVISD::VECREDUCE_AND_VL;
11390 case ISD::VP_REDUCE_OR:
11391 case ISD::VECREDUCE_OR:
11392 return RISCVISD::VECREDUCE_OR_VL;
11393 case ISD::VP_REDUCE_XOR:
11394 case ISD::VECREDUCE_XOR:
11395 return RISCVISD::VECREDUCE_XOR_VL;
11396 case ISD::VP_REDUCE_FADD:
11397 return RISCVISD::VECREDUCE_FADD_VL;
11398 case ISD::VP_REDUCE_SEQ_FADD:
11399 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11400 case ISD::VP_REDUCE_FMAX:
11401 case ISD::VP_REDUCE_FMAXIMUM:
11402 return RISCVISD::VECREDUCE_FMAX_VL;
11403 case ISD::VP_REDUCE_FMIN:
11404 case ISD::VP_REDUCE_FMINIMUM:
11405 return RISCVISD::VECREDUCE_FMIN_VL;
11406 }
11407
11408}
11409
11410SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11411 SelectionDAG &DAG,
11412 bool IsVP) const {
11413 SDLoc DL(Op);
11414 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11415 MVT VecVT = Vec.getSimpleValueType();
11416 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11417 Op.getOpcode() == ISD::VECREDUCE_OR ||
11418 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11419 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11420 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11421 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11422 "Unexpected reduction lowering");
11423
11424 MVT XLenVT = Subtarget.getXLenVT();
11425
11426 MVT ContainerVT = VecVT;
11427 if (VecVT.isFixedLengthVector()) {
11428 ContainerVT = getContainerForFixedLengthVector(VecVT);
11429 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11430 }
11431
11432 SDValue Mask, VL;
11433 if (IsVP) {
11434 Mask = Op.getOperand(2);
11435 VL = Op.getOperand(3);
11436 } else {
11437 std::tie(Mask, VL) =
11438 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11439 }
11440
11441 ISD::CondCode CC;
11442 switch (Op.getOpcode()) {
11443 default:
11444 llvm_unreachable("Unhandled reduction");
11445 case ISD::VECREDUCE_AND:
11446 case ISD::VP_REDUCE_AND: {
11447 // vcpop ~x == 0
11448 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11449 if (IsVP || VecVT.isFixedLengthVector())
11450 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11451 else
11452 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11453 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11454 CC = ISD::SETEQ;
11455 break;
11456 }
11457 case ISD::VECREDUCE_OR:
11458 case ISD::VP_REDUCE_OR:
11459 // vcpop x != 0
11460 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11461 CC = ISD::SETNE;
11462 break;
11463 case ISD::VECREDUCE_XOR:
11464 case ISD::VP_REDUCE_XOR: {
11465 // ((vcpop x) & 1) != 0
11466 SDValue One = DAG.getConstant(1, DL, XLenVT);
11467 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11468 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11469 CC = ISD::SETNE;
11470 break;
11471 }
11472 }
11473
11474 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11475 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11476 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11477
11478 if (!IsVP)
11479 return SetCC;
11480
11481 // Now include the start value in the operation.
11482 // Note that we must return the start value when no elements are operated
11483 // upon. The vcpop instructions we've emitted in each case above will return
11484 // 0 for an inactive vector, and so we've already received the neutral value:
11485 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11486 // can simply include the start value.
11487 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11488 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11489}
11490
11491static bool isNonZeroAVL(SDValue AVL) {
11492 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11493 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11494 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11495 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11496}
11497
11498/// Helper to lower a reduction sequence of the form:
11499/// scalar = reduce_op vec, scalar_start
11500static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11501 SDValue StartValue, SDValue Vec, SDValue Mask,
11502 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11503 const RISCVSubtarget &Subtarget) {
11504 const MVT VecVT = Vec.getSimpleValueType();
11505 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11506 const MVT XLenVT = Subtarget.getXLenVT();
11507 const bool NonZeroAVL = isNonZeroAVL(VL);
11508
11509 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11510 // or the original VT if fractional.
11511 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11512 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11513 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11514 // be the result of the reduction operation.
11515 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11516 SDValue InitialValue =
11517 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11518 if (M1VT != InnerVT)
11519 InitialValue =
11520 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11521 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11523 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11524 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11525 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11526}
11527
11528SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11529 SelectionDAG &DAG) const {
11530 SDLoc DL(Op);
11531 SDValue Vec = Op.getOperand(0);
11532 EVT VecEVT = Vec.getValueType();
11533
11534 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11535
11536 // Due to ordering in legalize types we may have a vector type that needs to
11537 // be split. Do that manually so we can get down to a legal type.
11538 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11540 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11541 VecEVT = Lo.getValueType();
11542 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11543 }
11544
11545 // TODO: The type may need to be widened rather than split. Or widened before
11546 // it can be split.
11547 if (!isTypeLegal(VecEVT))
11548 return SDValue();
11549
11550 MVT VecVT = VecEVT.getSimpleVT();
11551 MVT VecEltVT = VecVT.getVectorElementType();
11552 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11553
11554 MVT ContainerVT = VecVT;
11555 if (VecVT.isFixedLengthVector()) {
11556 ContainerVT = getContainerForFixedLengthVector(VecVT);
11557 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11558 }
11559
11560 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11561
11562 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11563 switch (BaseOpc) {
11564 case ISD::AND:
11565 case ISD::OR:
11566 case ISD::UMAX:
11567 case ISD::UMIN:
11568 case ISD::SMAX:
11569 case ISD::SMIN:
11570 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11571 }
11572 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11573 Mask, VL, DL, DAG, Subtarget);
11574}
11575
11576// Given a reduction op, this function returns the matching reduction opcode,
11577// the vector SDValue and the scalar SDValue required to lower this to a
11578// RISCVISD node.
11579static std::tuple<unsigned, SDValue, SDValue>
11581 const RISCVSubtarget &Subtarget) {
11582 SDLoc DL(Op);
11583 auto Flags = Op->getFlags();
11584 unsigned Opcode = Op.getOpcode();
11585 switch (Opcode) {
11586 default:
11587 llvm_unreachable("Unhandled reduction");
11588 case ISD::VECREDUCE_FADD: {
11589 // Use positive zero if we can. It is cheaper to materialize.
11590 SDValue Zero =
11591 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11592 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11593 }
11594 case ISD::VECREDUCE_SEQ_FADD:
11595 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11596 Op.getOperand(0));
11597 case ISD::VECREDUCE_FMINIMUM:
11598 case ISD::VECREDUCE_FMAXIMUM:
11599 case ISD::VECREDUCE_FMIN:
11600 case ISD::VECREDUCE_FMAX: {
11601 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11602 unsigned RVVOpc =
11603 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11604 ? RISCVISD::VECREDUCE_FMIN_VL
11605 : RISCVISD::VECREDUCE_FMAX_VL;
11606 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11607 }
11608 }
11609}
11610
11611SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11612 SelectionDAG &DAG) const {
11613 SDLoc DL(Op);
11614 MVT VecEltVT = Op.getSimpleValueType();
11615
11616 unsigned RVVOpcode;
11617 SDValue VectorVal, ScalarVal;
11618 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11619 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11620 MVT VecVT = VectorVal.getSimpleValueType();
11621
11622 MVT ContainerVT = VecVT;
11623 if (VecVT.isFixedLengthVector()) {
11624 ContainerVT = getContainerForFixedLengthVector(VecVT);
11625 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11626 }
11627
11628 MVT ResVT = Op.getSimpleValueType();
11629 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11630 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11631 VL, DL, DAG, Subtarget);
11632 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11633 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11634 return Res;
11635
11636 if (Op->getFlags().hasNoNaNs())
11637 return Res;
11638
11639 // Force output to NaN if any element is Nan.
11640 SDValue IsNan =
11641 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11642 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11643 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11644 MVT XLenVT = Subtarget.getXLenVT();
11645 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11646 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11647 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11648 return DAG.getSelect(
11649 DL, ResVT, NoNaNs, Res,
11650 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11651}
11652
11653SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11654 SelectionDAG &DAG) const {
11655 SDLoc DL(Op);
11656 unsigned Opc = Op.getOpcode();
11657 SDValue Start = Op.getOperand(0);
11658 SDValue Vec = Op.getOperand(1);
11659 EVT VecEVT = Vec.getValueType();
11660 MVT XLenVT = Subtarget.getXLenVT();
11661
11662 // TODO: The type may need to be widened rather than split. Or widened before
11663 // it can be split.
11664 if (!isTypeLegal(VecEVT))
11665 return SDValue();
11666
11667 MVT VecVT = VecEVT.getSimpleVT();
11668 unsigned RVVOpcode = getRVVReductionOp(Opc);
11669
11670 if (VecVT.isFixedLengthVector()) {
11671 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11672 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11673 }
11674
11675 SDValue VL = Op.getOperand(3);
11676 SDValue Mask = Op.getOperand(2);
11677 SDValue Res =
11678 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11679 Vec, Mask, VL, DL, DAG, Subtarget);
11680 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11681 Op->getFlags().hasNoNaNs())
11682 return Res;
11683
11684 // Propagate NaNs.
11685 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11686 // Check if any of the elements in Vec is NaN.
11687 SDValue IsNaN = DAG.getNode(
11688 RISCVISD::SETCC_VL, DL, PredVT,
11689 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11690 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11691 // Check if the start value is NaN.
11692 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11693 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11694 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11695 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11696 MVT ResVT = Res.getSimpleValueType();
11697 return DAG.getSelect(
11698 DL, ResVT, NoNaNs, Res,
11699 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11700}
11701
11702SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11703 SelectionDAG &DAG) const {
11704 SDValue Vec = Op.getOperand(0);
11705 SDValue SubVec = Op.getOperand(1);
11706 MVT VecVT = Vec.getSimpleValueType();
11707 MVT SubVecVT = SubVec.getSimpleValueType();
11708
11709 SDLoc DL(Op);
11710 MVT XLenVT = Subtarget.getXLenVT();
11711 unsigned OrigIdx = Op.getConstantOperandVal(2);
11712 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11713
11714 if (OrigIdx == 0 && Vec.isUndef())
11715 return Op;
11716
11717 // We don't have the ability to slide mask vectors up indexed by their i1
11718 // elements; the smallest we can do is i8. Often we are able to bitcast to
11719 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11720 // into a scalable one, we might not necessarily have enough scalable
11721 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11722 if (SubVecVT.getVectorElementType() == MVT::i1) {
11723 if (VecVT.getVectorMinNumElements() >= 8 &&
11724 SubVecVT.getVectorMinNumElements() >= 8) {
11725 assert(OrigIdx % 8 == 0 && "Invalid index");
11726 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11727 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11728 "Unexpected mask vector lowering");
11729 OrigIdx /= 8;
11730 SubVecVT =
11731 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11732 SubVecVT.isScalableVector());
11733 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11734 VecVT.isScalableVector());
11735 Vec = DAG.getBitcast(VecVT, Vec);
11736 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11737 } else {
11738 // We can't slide this mask vector up indexed by its i1 elements.
11739 // This poses a problem when we wish to insert a scalable vector which
11740 // can't be re-expressed as a larger type. Just choose the slow path and
11741 // extend to a larger type, then truncate back down.
11742 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11743 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11744 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11745 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11746 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11747 Op.getOperand(2));
11748 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11749 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11750 }
11751 }
11752
11753 // If the subvector vector is a fixed-length type and we don't know VLEN
11754 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11755 // don't know which register of a LMUL group contains the specific subvector
11756 // as we only know the minimum register size. Therefore we must slide the
11757 // vector group up the full amount.
11758 const auto VLen = Subtarget.getRealVLen();
11759 if (SubVecVT.isFixedLengthVector() && !VLen) {
11760 MVT ContainerVT = VecVT;
11761 if (VecVT.isFixedLengthVector()) {
11762 ContainerVT = getContainerForFixedLengthVector(VecVT);
11763 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11764 }
11765
11766 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11767
11768 SDValue Mask =
11769 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11770 // Set the vector length to only the number of elements we care about. Note
11771 // that for slideup this includes the offset.
11772 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11773 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11774
11775 // Use tail agnostic policy if we're inserting over Vec's tail.
11777 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11779
11780 // If we're inserting into the lowest elements, use a tail undisturbed
11781 // vmv.v.v.
11782 if (OrigIdx == 0) {
11783 SubVec =
11784 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11785 } else {
11786 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11787 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11788 SlideupAmt, Mask, VL, Policy);
11789 }
11790
11791 if (VecVT.isFixedLengthVector())
11792 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11793 return DAG.getBitcast(Op.getValueType(), SubVec);
11794 }
11795
11796 MVT ContainerVecVT = VecVT;
11797 if (VecVT.isFixedLengthVector()) {
11798 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11799 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11800 }
11801
11802 MVT ContainerSubVecVT = SubVecVT;
11803 if (SubVecVT.isFixedLengthVector()) {
11804 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11805 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11806 }
11807
11808 unsigned SubRegIdx;
11809 ElementCount RemIdx;
11810 // insert_subvector scales the index by vscale if the subvector is scalable,
11811 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11812 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11813 if (SubVecVT.isFixedLengthVector()) {
11814 assert(VLen);
11815 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11816 auto Decompose =
11818 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11819 SubRegIdx = Decompose.first;
11820 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11821 (OrigIdx % Vscale));
11822 } else {
11823 auto Decompose =
11825 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11826 SubRegIdx = Decompose.first;
11827 RemIdx = ElementCount::getScalable(Decompose.second);
11828 }
11829
11830 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11832 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11833 bool ExactlyVecRegSized =
11834 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11835 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11836
11837 // 1. If the Idx has been completely eliminated and this subvector's size is
11838 // a vector register or a multiple thereof, or the surrounding elements are
11839 // undef, then this is a subvector insert which naturally aligns to a vector
11840 // register. These can easily be handled using subregister manipulation.
11841 // 2. If the subvector isn't an exact multiple of a valid register group size,
11842 // then the insertion must preserve the undisturbed elements of the register.
11843 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11844 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11845 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11846 // of that LMUL=1 type back into the larger vector (resolving to another
11847 // subregister operation). See below for how our VSLIDEUP works. We go via a
11848 // LMUL=1 type to avoid allocating a large register group to hold our
11849 // subvector.
11850 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11851 if (SubVecVT.isFixedLengthVector()) {
11852 // We may get NoSubRegister if inserting at index 0 and the subvec
11853 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11854 if (SubRegIdx == RISCV::NoSubRegister) {
11855 assert(OrigIdx == 0);
11856 return Op;
11857 }
11858
11859 // Use a insert_subvector that will resolve to an insert subreg.
11860 assert(VLen);
11861 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11862 SDValue Insert =
11863 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11864 if (VecVT.isFixedLengthVector())
11865 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11866 return Insert;
11867 }
11868 return Op;
11869 }
11870
11871 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11872 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11873 // (in our case undisturbed). This means we can set up a subvector insertion
11874 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11875 // size of the subvector.
11876 MVT InterSubVT = ContainerVecVT;
11877 SDValue AlignedExtract = Vec;
11878 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11879 if (SubVecVT.isFixedLengthVector()) {
11880 assert(VLen);
11881 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11882 }
11883 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11884 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11885 // Extract a subvector equal to the nearest full vector register type. This
11886 // should resolve to a EXTRACT_SUBREG instruction.
11887 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11888 }
11889
11890 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11891
11892 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11893
11894 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11895 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11896
11897 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11899 if (Subtarget.expandVScale(EndIndex) ==
11900 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11902
11903 // If we're inserting into the lowest elements, use a tail undisturbed
11904 // vmv.v.v.
11905 if (RemIdx.isZero()) {
11906 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11907 SubVec, VL);
11908 } else {
11909 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11910
11911 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11912 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11913
11914 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11915 SlideupAmt, Mask, VL, Policy);
11916 }
11917
11918 // If required, insert this subvector back into the correct vector register.
11919 // This should resolve to an INSERT_SUBREG instruction.
11920 if (ContainerVecVT.bitsGT(InterSubVT))
11921 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11922
11923 if (VecVT.isFixedLengthVector())
11924 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11925
11926 // We might have bitcast from a mask type: cast back to the original type if
11927 // required.
11928 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11929}
11930
11931SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11932 SelectionDAG &DAG) const {
11933 SDValue Vec = Op.getOperand(0);
11934 MVT SubVecVT = Op.getSimpleValueType();
11935 MVT VecVT = Vec.getSimpleValueType();
11936
11937 SDLoc DL(Op);
11938 MVT XLenVT = Subtarget.getXLenVT();
11939 unsigned OrigIdx = Op.getConstantOperandVal(1);
11940 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11941
11942 // With an index of 0 this is a cast-like subvector, which can be performed
11943 // with subregister operations.
11944 if (OrigIdx == 0)
11945 return Op;
11946
11947 // We don't have the ability to slide mask vectors down indexed by their i1
11948 // elements; the smallest we can do is i8. Often we are able to bitcast to
11949 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11950 // from a scalable one, we might not necessarily have enough scalable
11951 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11952 if (SubVecVT.getVectorElementType() == MVT::i1) {
11953 if (VecVT.getVectorMinNumElements() >= 8 &&
11954 SubVecVT.getVectorMinNumElements() >= 8) {
11955 assert(OrigIdx % 8 == 0 && "Invalid index");
11956 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11957 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11958 "Unexpected mask vector lowering");
11959 OrigIdx /= 8;
11960 SubVecVT =
11961 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11962 SubVecVT.isScalableVector());
11963 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11964 VecVT.isScalableVector());
11965 Vec = DAG.getBitcast(VecVT, Vec);
11966 } else {
11967 // We can't slide this mask vector down, indexed by its i1 elements.
11968 // This poses a problem when we wish to extract a scalable vector which
11969 // can't be re-expressed as a larger type. Just choose the slow path and
11970 // extend to a larger type, then truncate back down.
11971 // TODO: We could probably improve this when extracting certain fixed
11972 // from fixed, where we can extract as i8 and shift the correct element
11973 // right to reach the desired subvector?
11974 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11975 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11976 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11977 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11978 Op.getOperand(1));
11979 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11980 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11981 }
11982 }
11983
11984 const auto VLen = Subtarget.getRealVLen();
11985
11986 // If the subvector vector is a fixed-length type and we don't know VLEN
11987 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11988 // don't know which register of a LMUL group contains the specific subvector
11989 // as we only know the minimum register size. Therefore we must slide the
11990 // vector group down the full amount.
11991 if (SubVecVT.isFixedLengthVector() && !VLen) {
11992 MVT ContainerVT = VecVT;
11993 if (VecVT.isFixedLengthVector()) {
11994 ContainerVT = getContainerForFixedLengthVector(VecVT);
11995 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11996 }
11997
11998 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11999 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12000 if (auto ShrunkVT =
12001 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12002 ContainerVT = *ShrunkVT;
12003 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12004 }
12005
12006 SDValue Mask =
12007 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12008 // Set the vector length to only the number of elements we care about. This
12009 // avoids sliding down elements we're going to discard straight away.
12010 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12011 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12012 SDValue Slidedown =
12013 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12014 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12015 // Now we can use a cast-like subvector extract to get the result.
12016 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12017 return DAG.getBitcast(Op.getValueType(), Slidedown);
12018 }
12019
12020 if (VecVT.isFixedLengthVector()) {
12021 VecVT = getContainerForFixedLengthVector(VecVT);
12022 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12023 }
12024
12025 MVT ContainerSubVecVT = SubVecVT;
12026 if (SubVecVT.isFixedLengthVector())
12027 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12028
12029 unsigned SubRegIdx;
12030 ElementCount RemIdx;
12031 // extract_subvector scales the index by vscale if the subvector is scalable,
12032 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12033 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12034 if (SubVecVT.isFixedLengthVector()) {
12035 assert(VLen);
12036 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12037 auto Decompose =
12039 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12040 SubRegIdx = Decompose.first;
12041 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12042 (OrigIdx % Vscale));
12043 } else {
12044 auto Decompose =
12046 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12047 SubRegIdx = Decompose.first;
12048 RemIdx = ElementCount::getScalable(Decompose.second);
12049 }
12050
12051 // If the Idx has been completely eliminated then this is a subvector extract
12052 // which naturally aligns to a vector register. These can easily be handled
12053 // using subregister manipulation. We use an extract_subvector that will
12054 // resolve to an extract subreg.
12055 if (RemIdx.isZero()) {
12056 if (SubVecVT.isFixedLengthVector()) {
12057 assert(VLen);
12058 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12059 Vec =
12060 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12061 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12062 }
12063 return Op;
12064 }
12065
12066 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12067 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12068 // divide exactly.
12069 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12070 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12071
12072 // If the vector type is an LMUL-group type, extract a subvector equal to the
12073 // nearest full vector register type.
12074 MVT InterSubVT = VecVT;
12075 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12076 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12077 // we should have successfully decomposed the extract into a subregister.
12078 // We use an extract_subvector that will resolve to a subreg extract.
12079 assert(SubRegIdx != RISCV::NoSubRegister);
12080 (void)SubRegIdx;
12081 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12082 if (SubVecVT.isFixedLengthVector()) {
12083 assert(VLen);
12084 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12085 }
12086 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12087 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12088 }
12089
12090 // Slide this vector register down by the desired number of elements in order
12091 // to place the desired subvector starting at element 0.
12092 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12093 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12094 if (SubVecVT.isFixedLengthVector())
12095 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12096 SDValue Slidedown =
12097 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12098 Vec, SlidedownAmt, Mask, VL);
12099
12100 // Now the vector is in the right position, extract our final subvector. This
12101 // should resolve to a COPY.
12102 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12103
12104 // We might have bitcast from a mask type: cast back to the original type if
12105 // required.
12106 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12107}
12108
12109// Widen a vector's operands to i8, then truncate its results back to the
12110// original type, typically i1. All operand and result types must be the same.
12112 SelectionDAG &DAG) {
12113 MVT VT = N.getSimpleValueType();
12114 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12116 for (SDValue Op : N->ops()) {
12117 assert(Op.getSimpleValueType() == VT &&
12118 "Operands and result must be same type");
12119 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12120 }
12121
12122 unsigned NumVals = N->getNumValues();
12123
12125 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12126 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12127 SmallVector<SDValue, 4> TruncVals;
12128 for (unsigned I = 0; I < NumVals; I++) {
12129 TruncVals.push_back(
12130 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12131 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12132 }
12133
12134 if (TruncVals.size() > 1)
12135 return DAG.getMergeValues(TruncVals, DL);
12136 return TruncVals.front();
12137}
12138
12139SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12140 SelectionDAG &DAG) const {
12141 SDLoc DL(Op);
12142 MVT VecVT = Op.getSimpleValueType();
12143
12144 const unsigned Factor = Op->getNumValues();
12145 assert(Factor <= 8);
12146
12147 // 1 bit element vectors need to be widened to e8
12148 if (VecVT.getVectorElementType() == MVT::i1)
12149 return widenVectorOpsToi8(Op, DL, DAG);
12150
12151 // Convert to scalable vectors first.
12152 if (VecVT.isFixedLengthVector()) {
12153 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12155 for (unsigned i = 0U; i < Factor; ++i)
12156 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12157 Subtarget);
12158
12159 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12160 SDValue NewDeinterleave =
12162
12163 SmallVector<SDValue, 8> Res(Factor);
12164 for (unsigned i = 0U; i < Factor; ++i)
12165 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12166 DAG, Subtarget);
12167 return DAG.getMergeValues(Res, DL);
12168 }
12169
12170 // If concatenating would exceed LMUL=8, we need to split.
12171 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12172 (8 * RISCV::RVVBitsPerBlock)) {
12173 SmallVector<SDValue, 8> Ops(Factor * 2);
12174 for (unsigned i = 0; i != Factor; ++i) {
12175 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12176 Ops[i * 2] = OpLo;
12177 Ops[i * 2 + 1] = OpHi;
12178 }
12179
12180 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12181
12183 ArrayRef(Ops).slice(0, Factor));
12185 ArrayRef(Ops).slice(Factor, Factor));
12186
12187 SmallVector<SDValue, 8> Res(Factor);
12188 for (unsigned i = 0; i != Factor; ++i)
12189 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12190 Hi.getValue(i));
12191
12192 return DAG.getMergeValues(Res, DL);
12193 }
12194
12195 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12196 MVT VT = Op->getSimpleValueType(0);
12197 SDValue V1 = Op->getOperand(0);
12198 SDValue V2 = Op->getOperand(1);
12199
12200 // For fractional LMUL, check if we can use a higher LMUL
12201 // instruction to avoid a vslidedown.
12202 if (SDValue Src = foldConcatVector(V1, V2);
12203 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12204 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12205 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12206 // Freeze the source so we can increase its use count.
12207 Src = DAG.getFreeze(Src);
12208 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12209 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12210 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12211 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12212 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12213 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12214 return DAG.getMergeValues({Even, Odd}, DL);
12215 }
12216
12217 // Freeze the sources so we can increase their use count.
12218 V1 = DAG.getFreeze(V1);
12219 V2 = DAG.getFreeze(V2);
12220 SDValue Even =
12221 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12222 SDValue Odd =
12223 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12224 return DAG.getMergeValues({Even, Odd}, DL);
12225 }
12226
12227 SmallVector<SDValue, 8> Ops(Op->op_values());
12228
12229 // Concatenate the vectors as one vector to deinterleave
12230 MVT ConcatVT =
12233 PowerOf2Ceil(Factor)));
12234 if (Ops.size() < PowerOf2Ceil(Factor))
12235 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12236 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12237
12238 if (Factor == 2) {
12239 // We can deinterleave through vnsrl.wi if the element type is smaller than
12240 // ELEN
12241 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12242 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12243 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12244 return DAG.getMergeValues({Even, Odd}, DL);
12245 }
12246
12247 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12248 // possibly mask vector, then extract the required subvector. Doing this
12249 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12250 // creation to be rematerialized during register allocation to reduce
12251 // register pressure if needed.
12252
12253 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12254
12255 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12256 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12257 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12258
12259 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12260 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12261 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12262
12263 // vcompress the even and odd elements into two separate vectors
12264 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12265 EvenMask, DAG.getUNDEF(ConcatVT));
12266 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12267 OddMask, DAG.getUNDEF(ConcatVT));
12268
12269 // Extract the result half of the gather for even and odd
12270 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12271 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12272
12273 return DAG.getMergeValues({Even, Odd}, DL);
12274 }
12275
12276 // Store with unit-stride store and load it back with segmented load.
12277 MVT XLenVT = Subtarget.getXLenVT();
12278 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12279 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12280
12281 // Allocate a stack slot.
12282 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12284 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12285 auto &MF = DAG.getMachineFunction();
12286 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12287 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12288
12289 SDValue StoreOps[] = {DAG.getEntryNode(),
12290 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12291 Concat, StackPtr, VL};
12292
12293 SDValue Chain = DAG.getMemIntrinsicNode(
12294 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12295 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12297
12298 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12299 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12300 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12301 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12302 Intrinsic::riscv_vlseg8_mask};
12303
12304 SDValue LoadOps[] = {
12305 Chain,
12306 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12307 Passthru,
12308 StackPtr,
12309 Mask,
12310 VL,
12313 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12314
12315 unsigned Sz =
12316 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12317 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12318
12320 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12321 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12323
12324 SmallVector<SDValue, 8> Res(Factor);
12325
12326 for (unsigned i = 0U; i < Factor; ++i)
12327 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12328 DAG.getTargetConstant(i, DL, MVT::i32));
12329
12330 return DAG.getMergeValues(Res, DL);
12331}
12332
12333SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12334 SelectionDAG &DAG) const {
12335 SDLoc DL(Op);
12336 MVT VecVT = Op.getSimpleValueType();
12337
12338 const unsigned Factor = Op.getNumOperands();
12339 assert(Factor <= 8);
12340
12341 // i1 vectors need to be widened to i8
12342 if (VecVT.getVectorElementType() == MVT::i1)
12343 return widenVectorOpsToi8(Op, DL, DAG);
12344
12345 // Convert to scalable vectors first.
12346 if (VecVT.isFixedLengthVector()) {
12347 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12349 for (unsigned i = 0U; i < Factor; ++i)
12350 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12351 Subtarget);
12352
12353 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12354 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12355
12356 SmallVector<SDValue, 8> Res(Factor);
12357 for (unsigned i = 0U; i < Factor; ++i)
12358 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12359 Subtarget);
12360 return DAG.getMergeValues(Res, DL);
12361 }
12362
12363 MVT XLenVT = Subtarget.getXLenVT();
12364 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12365
12366 // If the VT is larger than LMUL=8, we need to split and reassemble.
12367 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12368 (8 * RISCV::RVVBitsPerBlock)) {
12369 SmallVector<SDValue, 8> Ops(Factor * 2);
12370 for (unsigned i = 0; i != Factor; ++i) {
12371 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12372 Ops[i] = OpLo;
12373 Ops[i + Factor] = OpHi;
12374 }
12375
12376 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12377
12378 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12379 ArrayRef(Ops).take_front(Factor)),
12381 ArrayRef(Ops).drop_front(Factor))};
12382
12383 SmallVector<SDValue, 8> Concats(Factor);
12384 for (unsigned i = 0; i != Factor; ++i) {
12385 unsigned IdxLo = 2 * i;
12386 unsigned IdxHi = 2 * i + 1;
12387 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12388 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12389 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12390 }
12391
12392 return DAG.getMergeValues(Concats, DL);
12393 }
12394
12395 SDValue Interleaved;
12396
12397 // Spill to the stack using a segment store for simplicity.
12398 if (Factor != 2) {
12399 EVT MemVT =
12401 VecVT.getVectorElementCount() * Factor);
12402
12403 // Allocate a stack slot.
12404 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12406 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12407 EVT PtrVT = StackPtr.getValueType();
12408 auto &MF = DAG.getMachineFunction();
12409 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12410 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12411
12412 static const Intrinsic::ID IntrIds[] = {
12413 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12414 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12415 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12416 Intrinsic::riscv_vsseg8_mask,
12417 };
12418
12419 unsigned Sz =
12420 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12421 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12422
12423 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12424 for (unsigned i = 0; i < Factor; i++)
12425 StoredVal =
12426 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12427 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12428
12429 SDValue Ops[] = {DAG.getEntryNode(),
12430 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12431 StoredVal,
12432 StackPtr,
12433 Mask,
12434 VL,
12436 DL, XLenVT)};
12437
12438 SDValue Chain = DAG.getMemIntrinsicNode(
12439 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12440 VecVT.getVectorElementType(), PtrInfo, Alignment,
12442
12443 SmallVector<SDValue, 8> Loads(Factor);
12444
12446 DAG.getVScale(DL, PtrVT,
12447 APInt(PtrVT.getFixedSizeInBits(),
12448 VecVT.getStoreSize().getKnownMinValue()));
12449 for (unsigned i = 0; i != Factor; ++i) {
12450 if (i != 0)
12451 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12452
12453 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12454 }
12455
12456 return DAG.getMergeValues(Loads, DL);
12457 }
12458
12459 // Use ri.vzip2{a,b} if available
12460 // TODO: Figure out the best lowering for the spread variants
12461 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12462 !Op.getOperand(1).isUndef()) {
12463 // Freeze the sources so we can increase their use count.
12464 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12465 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12466 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12467 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12468 return DAG.getMergeValues({Lo, Hi}, DL);
12469 }
12470
12471 // If the element type is smaller than ELEN, then we can interleave with
12472 // vwaddu.vv and vwmaccu.vx
12473 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12474 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12475 DAG, Subtarget);
12476 } else {
12477 // Otherwise, fallback to using vrgathere16.vv
12478 MVT ConcatVT =
12481 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12482 Op.getOperand(0), Op.getOperand(1));
12483
12484 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12485
12486 // 0 1 2 3 4 5 6 7 ...
12487 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12488
12489 // 1 1 1 1 1 1 1 1 ...
12490 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12491
12492 // 1 0 1 0 1 0 1 0 ...
12493 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12494 OddMask = DAG.getSetCC(
12495 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12496 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12498
12499 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12500
12501 // Build up the index vector for interleaving the concatenated vector
12502 // 0 0 1 1 2 2 3 3 ...
12503 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12504 // 0 n 1 n+1 2 n+2 3 n+3 ...
12505 Idx =
12506 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12507
12508 // Then perform the interleave
12509 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12510 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12511 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12512 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12513 }
12514
12515 // Extract the two halves from the interleaved result
12516 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12517 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12518 VecVT.getVectorMinNumElements());
12519
12520 return DAG.getMergeValues({Lo, Hi}, DL);
12521}
12522
12523// Lower step_vector to the vid instruction. Any non-identity step value must
12524// be accounted for my manual expansion.
12525SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12526 SelectionDAG &DAG) const {
12527 SDLoc DL(Op);
12528 MVT VT = Op.getSimpleValueType();
12529 assert(VT.isScalableVector() && "Expected scalable vector");
12530 MVT XLenVT = Subtarget.getXLenVT();
12531 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12532 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12533 uint64_t StepValImm = Op.getConstantOperandVal(0);
12534 if (StepValImm != 1) {
12535 if (isPowerOf2_64(StepValImm)) {
12536 SDValue StepVal =
12537 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12538 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12539 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12540 } else {
12541 SDValue StepVal = lowerScalarSplat(
12542 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12543 VL, VT, DL, DAG, Subtarget);
12544 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12545 }
12546 }
12547 return StepVec;
12548}
12549
12550// Implement vector_reverse using vrgather.vv with indices determined by
12551// subtracting the id of each element from (VLMAX-1). This will convert
12552// the indices like so:
12553// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12554// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12555SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12556 SelectionDAG &DAG) const {
12557 SDLoc DL(Op);
12558 MVT VecVT = Op.getSimpleValueType();
12559 if (VecVT.getVectorElementType() == MVT::i1) {
12560 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12561 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12562 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12563 return DAG.getSetCC(DL, VecVT, Op2,
12564 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12565 }
12566
12567 MVT ContainerVT = VecVT;
12568 SDValue Vec = Op.getOperand(0);
12569 if (VecVT.isFixedLengthVector()) {
12570 ContainerVT = getContainerForFixedLengthVector(VecVT);
12571 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12572 }
12573
12574 MVT XLenVT = Subtarget.getXLenVT();
12575 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12576
12577 // On some uarchs vrgather.vv will read from every input register for each
12578 // output register, regardless of the indices. However to reverse a vector
12579 // each output register only needs to read from one register. So decompose it
12580 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12581 // O(LMUL^2).
12582 //
12583 // vsetvli a1, zero, e64, m4, ta, ma
12584 // vrgatherei16.vv v12, v8, v16
12585 // ->
12586 // vsetvli a1, zero, e64, m1, ta, ma
12587 // vrgather.vv v15, v8, v16
12588 // vrgather.vv v14, v9, v16
12589 // vrgather.vv v13, v10, v16
12590 // vrgather.vv v12, v11, v16
12591 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12592 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12593 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12594 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12595 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12596 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12597
12598 // Fixed length vectors might not fit exactly into their container, and so
12599 // leave a gap in the front of the vector after being reversed. Slide this
12600 // away.
12601 //
12602 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12603 // 0 1 2 3 x x x x <- reverse
12604 // x x x x 0 1 2 3 <- vslidedown.vx
12605 if (VecVT.isFixedLengthVector()) {
12606 SDValue Offset = DAG.getNode(
12607 ISD::SUB, DL, XLenVT,
12608 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12609 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12610 Concat =
12611 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12612 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12613 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12614 }
12615 return Concat;
12616 }
12617
12618 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12619 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12620 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12621 unsigned MaxVLMAX =
12622 VecVT.isFixedLengthVector()
12623 ? VecVT.getVectorNumElements()
12624 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12625
12626 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12627 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12628
12629 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12630 // to use vrgatherei16.vv.
12631 if (MaxVLMAX > 256 && EltSize == 8) {
12632 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12633 // Reverse each half, then reassemble them in reverse order.
12634 // NOTE: It's also possible that after splitting that VLMAX no longer
12635 // requires vrgatherei16.vv.
12636 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12637 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12638 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12639 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12640 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12641 // Reassemble the low and high pieces reversed.
12642 // FIXME: This is a CONCAT_VECTORS.
12643 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12644 return DAG.getInsertSubvector(DL, Res, Lo,
12645 LoVT.getVectorMinNumElements());
12646 }
12647
12648 // Just promote the int type to i16 which will double the LMUL.
12649 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12650 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12651 }
12652
12653 // At LMUL > 1, do the index computation in 16 bits to reduce register
12654 // pressure.
12655 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12656 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12657 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12658 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12659 IntVT = IntVT.changeVectorElementType(MVT::i16);
12660 }
12661
12662 // Calculate VLMAX-1 for the desired SEW.
12663 SDValue VLMinus1 = DAG.getNode(
12664 ISD::SUB, DL, XLenVT,
12665 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12666 DAG.getConstant(1, DL, XLenVT));
12667
12668 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12669 bool IsRV32E64 =
12670 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12671 SDValue SplatVL;
12672 if (!IsRV32E64)
12673 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12674 else
12675 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12676 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12677
12678 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12679 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12680 DAG.getUNDEF(IntVT), Mask, VL);
12681
12682 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12683 DAG.getUNDEF(ContainerVT), Mask, VL);
12684 if (VecVT.isFixedLengthVector())
12685 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12686 return Gather;
12687}
12688
12689SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12690 SelectionDAG &DAG) const {
12691 SDLoc DL(Op);
12692 SDValue V1 = Op.getOperand(0);
12693 SDValue V2 = Op.getOperand(1);
12694 MVT XLenVT = Subtarget.getXLenVT();
12695 MVT VecVT = Op.getSimpleValueType();
12696
12697 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12698
12699 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12700 SDValue DownOffset, UpOffset;
12701 if (ImmValue >= 0) {
12702 // The operand is a TargetConstant, we need to rebuild it as a regular
12703 // constant.
12704 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12705 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12706 } else {
12707 // The operand is a TargetConstant, we need to rebuild it as a regular
12708 // constant rather than negating the original operand.
12709 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12710 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12711 }
12712
12713 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12714
12715 SDValue SlideDown = getVSlidedown(
12716 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12717 Subtarget.hasVLDependentLatency() ? UpOffset
12718 : DAG.getRegister(RISCV::X0, XLenVT));
12719 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12720 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12722}
12723
12724SDValue
12725RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12726 SelectionDAG &DAG) const {
12727 SDLoc DL(Op);
12728 auto *Load = cast<LoadSDNode>(Op);
12729
12731 Load->getMemoryVT(),
12732 *Load->getMemOperand()) &&
12733 "Expecting a correctly-aligned load");
12734
12735 MVT VT = Op.getSimpleValueType();
12736 MVT XLenVT = Subtarget.getXLenVT();
12737 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12738
12739 // If we know the exact VLEN and our fixed length vector completely fills
12740 // the container, use a whole register load instead.
12741 const auto [MinVLMAX, MaxVLMAX] =
12742 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12743 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12744 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12745 MachineMemOperand *MMO = Load->getMemOperand();
12746 SDValue NewLoad =
12747 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12748 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12749 MMO->getAAInfo(), MMO->getRanges());
12750 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12751 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12752 }
12753
12754 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12755
12756 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12757 SDValue IntID = DAG.getTargetConstant(
12758 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12759 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12760 if (!IsMaskOp)
12761 Ops.push_back(DAG.getUNDEF(ContainerVT));
12762 Ops.push_back(Load->getBasePtr());
12763 Ops.push_back(VL);
12764 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12765 SDValue NewLoad =
12767 Load->getMemoryVT(), Load->getMemOperand());
12768
12769 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12770 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12771}
12772
12773SDValue
12774RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12775 SelectionDAG &DAG) const {
12776 SDLoc DL(Op);
12777 auto *Store = cast<StoreSDNode>(Op);
12778
12780 Store->getMemoryVT(),
12781 *Store->getMemOperand()) &&
12782 "Expecting a correctly-aligned store");
12783
12784 SDValue StoreVal = Store->getValue();
12785 MVT VT = StoreVal.getSimpleValueType();
12786 MVT XLenVT = Subtarget.getXLenVT();
12787
12788 // If the size less than a byte, we need to pad with zeros to make a byte.
12789 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12790 VT = MVT::v8i1;
12791 StoreVal =
12792 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12793 }
12794
12795 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12796
12797 SDValue NewValue =
12798 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12799
12800 // If we know the exact VLEN and our fixed length vector completely fills
12801 // the container, use a whole register store instead.
12802 const auto [MinVLMAX, MaxVLMAX] =
12803 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12804 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12805 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12806 MachineMemOperand *MMO = Store->getMemOperand();
12807 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12808 MMO->getPointerInfo(), MMO->getBaseAlign(),
12809 MMO->getFlags(), MMO->getAAInfo());
12810 }
12811
12812 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12813
12814 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12815 SDValue IntID = DAG.getTargetConstant(
12816 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12817 return DAG.getMemIntrinsicNode(
12818 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12819 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12820 Store->getMemoryVT(), Store->getMemOperand());
12821}
12822
12823SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12824 SelectionDAG &DAG) const {
12825 SDLoc DL(Op);
12826 MVT VT = Op.getSimpleValueType();
12827
12828 const auto *MemSD = cast<MemSDNode>(Op);
12829 EVT MemVT = MemSD->getMemoryVT();
12830 MachineMemOperand *MMO = MemSD->getMemOperand();
12831 SDValue Chain = MemSD->getChain();
12832 SDValue BasePtr = MemSD->getBasePtr();
12833
12834 SDValue Mask, PassThru, VL;
12835 bool IsExpandingLoad = false;
12836 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12837 Mask = VPLoad->getMask();
12838 PassThru = DAG.getUNDEF(VT);
12839 VL = VPLoad->getVectorLength();
12840 } else {
12841 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12842 Mask = MLoad->getMask();
12843 PassThru = MLoad->getPassThru();
12844 IsExpandingLoad = MLoad->isExpandingLoad();
12845 }
12846
12847 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12848
12849 MVT XLenVT = Subtarget.getXLenVT();
12850
12851 MVT ContainerVT = VT;
12852 if (VT.isFixedLengthVector()) {
12853 ContainerVT = getContainerForFixedLengthVector(VT);
12854 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12855 if (!IsUnmasked) {
12856 MVT MaskVT = getMaskTypeFor(ContainerVT);
12857 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12858 }
12859 }
12860
12861 if (!VL)
12862 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12863
12864 SDValue ExpandingVL;
12865 if (!IsUnmasked && IsExpandingLoad) {
12866 ExpandingVL = VL;
12867 VL =
12868 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12869 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12870 }
12871
12872 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12873 : Intrinsic::riscv_vle_mask;
12874 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12875 if (IntID == Intrinsic::riscv_vle)
12876 Ops.push_back(DAG.getUNDEF(ContainerVT));
12877 else
12878 Ops.push_back(PassThru);
12879 Ops.push_back(BasePtr);
12880 if (IntID == Intrinsic::riscv_vle_mask)
12881 Ops.push_back(Mask);
12882 Ops.push_back(VL);
12883 if (IntID == Intrinsic::riscv_vle_mask)
12884 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12885
12886 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12887
12888 SDValue Result =
12889 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12890 Chain = Result.getValue(1);
12891 if (ExpandingVL) {
12892 MVT IndexVT = ContainerVT;
12893 if (ContainerVT.isFloatingPoint())
12894 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12895
12896 MVT IndexEltVT = IndexVT.getVectorElementType();
12897 bool UseVRGATHEREI16 = false;
12898 // If index vector is an i8 vector and the element count exceeds 256, we
12899 // should change the element type of index vector to i16 to avoid
12900 // overflow.
12901 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12902 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12903 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12904 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12905 UseVRGATHEREI16 = true;
12906 }
12907
12908 SDValue Iota =
12909 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12910 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12911 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12912 Result =
12913 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12914 : RISCVISD::VRGATHER_VV_VL,
12915 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12916 }
12917
12918 if (VT.isFixedLengthVector())
12919 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12920
12921 return DAG.getMergeValues({Result, Chain}, DL);
12922}
12923
12924SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12925 SDLoc DL(Op);
12926 MVT VT = Op->getSimpleValueType(0);
12927
12928 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12929 EVT MemVT = VPLoadFF->getMemoryVT();
12930 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12931 SDValue Chain = VPLoadFF->getChain();
12932 SDValue BasePtr = VPLoadFF->getBasePtr();
12933
12934 SDValue Mask = VPLoadFF->getMask();
12935 SDValue VL = VPLoadFF->getVectorLength();
12936
12937 MVT XLenVT = Subtarget.getXLenVT();
12938
12939 MVT ContainerVT = VT;
12940 if (VT.isFixedLengthVector()) {
12941 ContainerVT = getContainerForFixedLengthVector(VT);
12942 MVT MaskVT = getMaskTypeFor(ContainerVT);
12943 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12944 }
12945
12946 unsigned IntID = Intrinsic::riscv_vleff_mask;
12947 SDValue Ops[] = {
12948 Chain,
12949 DAG.getTargetConstant(IntID, DL, XLenVT),
12950 DAG.getUNDEF(ContainerVT),
12951 BasePtr,
12952 Mask,
12953 VL,
12955
12956 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12957
12958 SDValue Result =
12959 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12960 SDValue OutVL = Result.getValue(1);
12961 Chain = Result.getValue(2);
12962
12963 if (VT.isFixedLengthVector())
12964 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12965
12966 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12967}
12968
12969SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12970 SelectionDAG &DAG) const {
12971 SDLoc DL(Op);
12972
12973 const auto *MemSD = cast<MemSDNode>(Op);
12974 EVT MemVT = MemSD->getMemoryVT();
12975 MachineMemOperand *MMO = MemSD->getMemOperand();
12976 SDValue Chain = MemSD->getChain();
12977 SDValue BasePtr = MemSD->getBasePtr();
12978 SDValue Val, Mask, VL;
12979
12980 bool IsCompressingStore = false;
12981 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12982 Val = VPStore->getValue();
12983 Mask = VPStore->getMask();
12984 VL = VPStore->getVectorLength();
12985 } else {
12986 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12987 Val = MStore->getValue();
12988 Mask = MStore->getMask();
12989 IsCompressingStore = MStore->isCompressingStore();
12990 }
12991
12992 bool IsUnmasked =
12993 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12994
12995 MVT VT = Val.getSimpleValueType();
12996 MVT XLenVT = Subtarget.getXLenVT();
12997
12998 MVT ContainerVT = VT;
12999 if (VT.isFixedLengthVector()) {
13000 ContainerVT = getContainerForFixedLengthVector(VT);
13001
13002 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13003 if (!IsUnmasked || IsCompressingStore) {
13004 MVT MaskVT = getMaskTypeFor(ContainerVT);
13005 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13006 }
13007 }
13008
13009 if (!VL)
13010 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13011
13012 if (IsCompressingStore) {
13013 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13014 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13015 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13016 VL =
13017 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13018 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13019 }
13020
13021 unsigned IntID =
13022 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13023 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13024 Ops.push_back(Val);
13025 Ops.push_back(BasePtr);
13026 if (!IsUnmasked)
13027 Ops.push_back(Mask);
13028 Ops.push_back(VL);
13029
13031 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13032}
13033
13034SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13035 SelectionDAG &DAG) const {
13036 SDLoc DL(Op);
13037 SDValue Val = Op.getOperand(0);
13038 SDValue Mask = Op.getOperand(1);
13039 SDValue Passthru = Op.getOperand(2);
13040
13041 MVT VT = Val.getSimpleValueType();
13042 MVT XLenVT = Subtarget.getXLenVT();
13043 MVT ContainerVT = VT;
13044 if (VT.isFixedLengthVector()) {
13045 ContainerVT = getContainerForFixedLengthVector(VT);
13046 MVT MaskVT = getMaskTypeFor(ContainerVT);
13047 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13048 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13049 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13050 }
13051
13052 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13053 SDValue Res =
13054 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13055 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13056 Passthru, Val, Mask, VL);
13057
13058 if (VT.isFixedLengthVector())
13059 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13060
13061 return Res;
13062}
13063
13064SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13065 SelectionDAG &DAG) const {
13066 unsigned Opc = Op.getOpcode();
13067 SDLoc DL(Op);
13068 SDValue Chain = Op.getOperand(0);
13069 SDValue Op1 = Op.getOperand(1);
13070 SDValue Op2 = Op.getOperand(2);
13071 SDValue CC = Op.getOperand(3);
13072 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13073 MVT VT = Op.getSimpleValueType();
13074 MVT InVT = Op1.getSimpleValueType();
13075
13076 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13077 // condition code.
13078 if (Opc == ISD::STRICT_FSETCCS) {
13079 // Expand strict_fsetccs(x, oeq) to
13080 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13081 SDVTList VTList = Op->getVTList();
13082 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13083 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13084 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13085 Op2, OLECCVal);
13086 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13087 Op1, OLECCVal);
13088 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13089 Tmp1.getValue(1), Tmp2.getValue(1));
13090 // Tmp1 and Tmp2 might be the same node.
13091 if (Tmp1 != Tmp2)
13092 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13093 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13094 }
13095
13096 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13097 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13098 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13099 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13100 Op2, OEQCCVal);
13101 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13102 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13103 }
13104 }
13105
13106 MVT ContainerInVT = InVT;
13107 if (InVT.isFixedLengthVector()) {
13108 ContainerInVT = getContainerForFixedLengthVector(InVT);
13109 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13110 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13111 }
13112 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13113
13114 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13115
13116 SDValue Res;
13117 if (Opc == ISD::STRICT_FSETCC &&
13118 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13119 CCVal == ISD::SETOLE)) {
13120 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13121 // active when both input elements are ordered.
13122 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13123 SDValue OrderMask1 = DAG.getNode(
13124 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13125 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13126 True, VL});
13127 SDValue OrderMask2 = DAG.getNode(
13128 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13129 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13130 True, VL});
13131 Mask =
13132 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13133 // Use Mask as the passthru operand to let the result be 0 if either of the
13134 // inputs is unordered.
13135 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13136 DAG.getVTList(MaskVT, MVT::Other),
13137 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13138 } else {
13139 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13140 : RISCVISD::STRICT_FSETCCS_VL;
13141 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13142 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13143 }
13144
13145 if (VT.isFixedLengthVector()) {
13146 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13147 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13148 }
13149 return Res;
13150}
13151
13152// Lower vector ABS to smax(X, sub(0, X)).
13153SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13154 SDLoc DL(Op);
13155 MVT VT = Op.getSimpleValueType();
13156 SDValue X = Op.getOperand(0);
13157
13158 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13159 "Unexpected type for ISD::ABS");
13160
13161 MVT ContainerVT = VT;
13162 if (VT.isFixedLengthVector()) {
13163 ContainerVT = getContainerForFixedLengthVector(VT);
13164 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13165 }
13166
13167 SDValue Mask, VL;
13168 if (Op->getOpcode() == ISD::VP_ABS) {
13169 Mask = Op->getOperand(1);
13170 if (VT.isFixedLengthVector())
13171 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13172 Subtarget);
13173 VL = Op->getOperand(2);
13174 } else
13175 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13176
13177 SDValue SplatZero = DAG.getNode(
13178 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13179 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13180 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13181 DAG.getUNDEF(ContainerVT), Mask, VL);
13182 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13183 DAG.getUNDEF(ContainerVT), Mask, VL);
13184
13185 if (VT.isFixedLengthVector())
13186 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13187 return Max;
13188}
13189
13190SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13191 SelectionDAG &DAG) const {
13192 const auto &TSInfo =
13193 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13194
13195 unsigned NewOpc = getRISCVVLOp(Op);
13196 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13197 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13198
13199 MVT VT = Op.getSimpleValueType();
13200 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13201
13202 // Create list of operands by converting existing ones to scalable types.
13204 for (const SDValue &V : Op->op_values()) {
13205 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13206
13207 // Pass through non-vector operands.
13208 if (!V.getValueType().isVector()) {
13209 Ops.push_back(V);
13210 continue;
13211 }
13212
13213 // "cast" fixed length vector to a scalable vector.
13214 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13215 "Only fixed length vectors are supported!");
13216 MVT VContainerVT = ContainerVT.changeVectorElementType(
13217 V.getSimpleValueType().getVectorElementType());
13218 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13219 }
13220
13221 SDLoc DL(Op);
13222 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13223 if (HasPassthruOp)
13224 Ops.push_back(DAG.getUNDEF(ContainerVT));
13225 if (HasMask)
13226 Ops.push_back(Mask);
13227 Ops.push_back(VL);
13228
13229 // StrictFP operations have two result values. Their lowered result should
13230 // have same result count.
13231 if (Op->isStrictFPOpcode()) {
13232 SDValue ScalableRes =
13233 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13234 Op->getFlags());
13235 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13236 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13237 }
13238
13239 SDValue ScalableRes =
13240 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13241 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13242}
13243
13244// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13245// * Operands of each node are assumed to be in the same order.
13246// * The EVL operand is promoted from i32 to i64 on RV64.
13247// * Fixed-length vectors are converted to their scalable-vector container
13248// types.
13249SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13250 const auto &TSInfo =
13251 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13252
13253 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13254 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13255
13256 SDLoc DL(Op);
13257 MVT VT = Op.getSimpleValueType();
13259
13260 MVT ContainerVT = VT;
13261 if (VT.isFixedLengthVector())
13262 ContainerVT = getContainerForFixedLengthVector(VT);
13263
13264 for (const auto &OpIdx : enumerate(Op->ops())) {
13265 SDValue V = OpIdx.value();
13266 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13267 // Add dummy passthru value before the mask. Or if there isn't a mask,
13268 // before EVL.
13269 if (HasPassthruOp) {
13270 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13271 if (MaskIdx) {
13272 if (*MaskIdx == OpIdx.index())
13273 Ops.push_back(DAG.getUNDEF(ContainerVT));
13274 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13275 OpIdx.index()) {
13276 if (Op.getOpcode() == ISD::VP_MERGE) {
13277 // For VP_MERGE, copy the false operand instead of an undef value.
13278 Ops.push_back(Ops.back());
13279 } else {
13280 assert(Op.getOpcode() == ISD::VP_SELECT);
13281 // For VP_SELECT, add an undef value.
13282 Ops.push_back(DAG.getUNDEF(ContainerVT));
13283 }
13284 }
13285 }
13286 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13287 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13288 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13290 Subtarget.getXLenVT()));
13291 // Pass through operands which aren't fixed-length vectors.
13292 if (!V.getValueType().isFixedLengthVector()) {
13293 Ops.push_back(V);
13294 continue;
13295 }
13296 // "cast" fixed length vector to a scalable vector.
13297 MVT OpVT = V.getSimpleValueType();
13298 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13299 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13300 "Only fixed length vectors are supported!");
13301 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13302 }
13303
13304 if (!VT.isFixedLengthVector())
13305 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13306
13307 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13308
13309 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13310}
13311
13312SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13313 SelectionDAG &DAG) const {
13314 SDLoc DL(Op);
13315 MVT VT = Op.getSimpleValueType();
13316
13317 SDValue Src = Op.getOperand(0);
13318 // NOTE: Mask is dropped.
13319 SDValue VL = Op.getOperand(2);
13320
13321 MVT ContainerVT = VT;
13322 if (VT.isFixedLengthVector()) {
13323 ContainerVT = getContainerForFixedLengthVector(VT);
13324 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13325 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13326 }
13327
13328 MVT XLenVT = Subtarget.getXLenVT();
13329 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13330 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13331 DAG.getUNDEF(ContainerVT), Zero, VL);
13332
13333 SDValue SplatValue = DAG.getSignedConstant(
13334 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13335 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13336 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13337
13338 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13339 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13340 if (!VT.isFixedLengthVector())
13341 return Result;
13342 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13343}
13344
13345SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13346 SelectionDAG &DAG) const {
13347 SDLoc DL(Op);
13348 MVT VT = Op.getSimpleValueType();
13349
13350 SDValue Op1 = Op.getOperand(0);
13351 SDValue Op2 = Op.getOperand(1);
13352 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13353 // NOTE: Mask is dropped.
13354 SDValue VL = Op.getOperand(4);
13355
13356 MVT ContainerVT = VT;
13357 if (VT.isFixedLengthVector()) {
13358 ContainerVT = getContainerForFixedLengthVector(VT);
13359 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13360 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13361 }
13362
13364 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13365
13366 switch (Condition) {
13367 default:
13368 break;
13369 // X != Y --> (X^Y)
13370 case ISD::SETNE:
13371 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13372 break;
13373 // X == Y --> ~(X^Y)
13374 case ISD::SETEQ: {
13375 SDValue Temp =
13376 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13377 Result =
13378 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13379 break;
13380 }
13381 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13382 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13383 case ISD::SETGT:
13384 case ISD::SETULT: {
13385 SDValue Temp =
13386 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13387 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13388 break;
13389 }
13390 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13391 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13392 case ISD::SETLT:
13393 case ISD::SETUGT: {
13394 SDValue Temp =
13395 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13396 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13397 break;
13398 }
13399 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13400 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13401 case ISD::SETGE:
13402 case ISD::SETULE: {
13403 SDValue Temp =
13404 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13405 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13406 break;
13407 }
13408 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13409 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13410 case ISD::SETLE:
13411 case ISD::SETUGE: {
13412 SDValue Temp =
13413 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13414 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13415 break;
13416 }
13417 }
13418
13419 if (!VT.isFixedLengthVector())
13420 return Result;
13421 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13422}
13423
13424// Lower Floating-Point/Integer Type-Convert VP SDNodes
13425SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13426 SelectionDAG &DAG) const {
13427 SDLoc DL(Op);
13428
13429 SDValue Src = Op.getOperand(0);
13430 SDValue Mask = Op.getOperand(1);
13431 SDValue VL = Op.getOperand(2);
13432 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13433
13434 MVT DstVT = Op.getSimpleValueType();
13435 MVT SrcVT = Src.getSimpleValueType();
13436 if (DstVT.isFixedLengthVector()) {
13437 DstVT = getContainerForFixedLengthVector(DstVT);
13438 SrcVT = getContainerForFixedLengthVector(SrcVT);
13439 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13440 MVT MaskVT = getMaskTypeFor(DstVT);
13441 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13442 }
13443
13444 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13445 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13446
13448 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13449 if (SrcVT.isInteger()) {
13450 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13451
13452 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13453 ? RISCVISD::VSEXT_VL
13454 : RISCVISD::VZEXT_VL;
13455
13456 // Do we need to do any pre-widening before converting?
13457 if (SrcEltSize == 1) {
13458 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13459 MVT XLenVT = Subtarget.getXLenVT();
13460 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13461 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13462 DAG.getUNDEF(IntVT), Zero, VL);
13463 SDValue One = DAG.getSignedConstant(
13464 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13465 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13466 DAG.getUNDEF(IntVT), One, VL);
13467 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13468 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13469 } else if (DstEltSize > (2 * SrcEltSize)) {
13470 // Widen before converting.
13471 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13472 DstVT.getVectorElementCount());
13473 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13474 }
13475
13476 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13477 } else {
13478 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13479 "Wrong input/output vector types");
13480
13481 // Convert f16 to f32 then convert f32 to i64.
13482 if (DstEltSize > (2 * SrcEltSize)) {
13483 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13484 MVT InterimFVT =
13485 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13486 Src =
13487 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13488 }
13489
13490 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13491 }
13492 } else { // Narrowing + Conversion
13493 if (SrcVT.isInteger()) {
13494 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13495 // First do a narrowing convert to an FP type half the size, then round
13496 // the FP type to a small FP type if needed.
13497
13498 MVT InterimFVT = DstVT;
13499 if (SrcEltSize > (2 * DstEltSize)) {
13500 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13501 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13502 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13503 }
13504
13505 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13506
13507 if (InterimFVT != DstVT) {
13508 Src = Result;
13509 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13510 }
13511 } else {
13512 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13513 "Wrong input/output vector types");
13514 // First do a narrowing conversion to an integer half the size, then
13515 // truncate if needed.
13516
13517 if (DstEltSize == 1) {
13518 // First convert to the same size integer, then convert to mask using
13519 // setcc.
13520 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13521 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13522 DstVT.getVectorElementCount());
13523 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13524
13525 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13526 // otherwise the conversion was undefined.
13527 MVT XLenVT = Subtarget.getXLenVT();
13528 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13529 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13530 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13531 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13532 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13533 DAG.getUNDEF(DstVT), Mask, VL});
13534 } else {
13535 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13536 DstVT.getVectorElementCount());
13537
13538 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13539
13540 while (InterimIVT != DstVT) {
13541 SrcEltSize /= 2;
13542 Src = Result;
13543 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13544 DstVT.getVectorElementCount());
13545 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13546 Src, Mask, VL);
13547 }
13548 }
13549 }
13550 }
13551
13552 MVT VT = Op.getSimpleValueType();
13553 if (!VT.isFixedLengthVector())
13554 return Result;
13555 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13556}
13557
13558SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13559 SelectionDAG &DAG) const {
13560 SDLoc DL(Op);
13561 MVT VT = Op.getSimpleValueType();
13562 MVT XLenVT = Subtarget.getXLenVT();
13563
13564 SDValue Mask = Op.getOperand(0);
13565 SDValue TrueVal = Op.getOperand(1);
13566 SDValue FalseVal = Op.getOperand(2);
13567 SDValue VL = Op.getOperand(3);
13568
13569 // Use default legalization if a vector of EVL type would be legal.
13570 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13572 if (isTypeLegal(EVLVecVT))
13573 return SDValue();
13574
13575 MVT ContainerVT = VT;
13576 if (VT.isFixedLengthVector()) {
13577 ContainerVT = getContainerForFixedLengthVector(VT);
13578 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13579 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13580 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13581 }
13582
13583 // Promote to a vector of i8.
13584 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13585
13586 // Promote TrueVal and FalseVal using VLMax.
13587 // FIXME: Is there a better way to do this?
13588 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13589 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13590 DAG.getUNDEF(PromotedVT),
13591 DAG.getConstant(1, DL, XLenVT), VLMax);
13592 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13593 DAG.getUNDEF(PromotedVT),
13594 DAG.getConstant(0, DL, XLenVT), VLMax);
13595 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13596 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13597 // Any element past VL uses FalseVal, so use VLMax
13598 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13599 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13600
13601 // VP_MERGE the two promoted values.
13602 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13603 TrueVal, FalseVal, FalseVal, VL);
13604
13605 // Convert back to mask.
13606 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13607 SDValue Result = DAG.getNode(
13608 RISCVISD::SETCC_VL, DL, ContainerVT,
13609 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13610 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13611
13612 if (VT.isFixedLengthVector())
13613 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13614 return Result;
13615}
13616
13617SDValue
13618RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13619 SelectionDAG &DAG) const {
13620 using namespace SDPatternMatch;
13621
13622 SDLoc DL(Op);
13623
13624 SDValue Op1 = Op.getOperand(0);
13625 SDValue Op2 = Op.getOperand(1);
13626 SDValue Offset = Op.getOperand(2);
13627 SDValue Mask = Op.getOperand(3);
13628 SDValue EVL1 = Op.getOperand(4);
13629 SDValue EVL2 = Op.getOperand(5);
13630
13631 const MVT XLenVT = Subtarget.getXLenVT();
13632 MVT VT = Op.getSimpleValueType();
13633 MVT ContainerVT = VT;
13634 if (VT.isFixedLengthVector()) {
13635 ContainerVT = getContainerForFixedLengthVector(VT);
13636 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13637 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13638 MVT MaskVT = getMaskTypeFor(ContainerVT);
13639 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13640 }
13641
13642 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13643 if (IsMaskVector) {
13644 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13645
13646 // Expand input operands
13647 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13648 DAG.getUNDEF(ContainerVT),
13649 DAG.getConstant(1, DL, XLenVT), EVL1);
13650 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13651 DAG.getUNDEF(ContainerVT),
13652 DAG.getConstant(0, DL, XLenVT), EVL1);
13653 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13654 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13655
13656 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13657 DAG.getUNDEF(ContainerVT),
13658 DAG.getConstant(1, DL, XLenVT), EVL2);
13659 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13660 DAG.getUNDEF(ContainerVT),
13661 DAG.getConstant(0, DL, XLenVT), EVL2);
13662 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13663 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13664 }
13665
13666 auto getVectorFirstEle = [](SDValue Vec) {
13667 SDValue FirstEle;
13668 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13669 return FirstEle;
13670
13671 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13673 return Vec.getOperand(0);
13674
13675 return SDValue();
13676 };
13677
13678 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13679 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13680 MVT EltVT = ContainerVT.getVectorElementType();
13682 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13683 EltVT == MVT::bf16) {
13684 EltVT = EltVT.changeTypeToInteger();
13685 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13686 Op2 = DAG.getBitcast(ContainerVT, Op2);
13687 FirstEle =
13688 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13689 }
13690 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13691 : RISCVISD::VSLIDE1UP_VL,
13692 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13693 FirstEle, Mask, EVL2);
13694 Result = DAG.getBitcast(
13696 Result);
13697 return VT.isFixedLengthVector()
13698 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13699 : Result;
13700 }
13701
13702 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13703 SDValue DownOffset, UpOffset;
13704 if (ImmValue >= 0) {
13705 // The operand is a TargetConstant, we need to rebuild it as a regular
13706 // constant.
13707 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13708 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13709 } else {
13710 // The operand is a TargetConstant, we need to rebuild it as a regular
13711 // constant rather than negating the original operand.
13712 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13713 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13714 }
13715
13716 if (ImmValue != 0)
13717 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13718 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13719 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13720 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13721 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13722
13723 if (IsMaskVector) {
13724 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13725 Result = DAG.getNode(
13726 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13727 {Result, DAG.getConstant(0, DL, ContainerVT),
13728 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13729 Mask, EVL2});
13730 }
13731
13732 if (!VT.isFixedLengthVector())
13733 return Result;
13734 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13735}
13736
13737SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13738 SelectionDAG &DAG) const {
13739 SDLoc DL(Op);
13740 SDValue Val = Op.getOperand(0);
13741 SDValue Mask = Op.getOperand(1);
13742 SDValue VL = Op.getOperand(2);
13743 MVT VT = Op.getSimpleValueType();
13744
13745 MVT ContainerVT = VT;
13746 if (VT.isFixedLengthVector()) {
13747 ContainerVT = getContainerForFixedLengthVector(VT);
13748 MVT MaskVT = getMaskTypeFor(ContainerVT);
13749 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13750 }
13751
13753 if (VT.getScalarType() == MVT::i1) {
13754 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13755 Result =
13756 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13757 ContainerVT, VL);
13758 } else {
13759 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13760 SDValue LHS =
13761 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13762 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13763 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13764 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13765 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13766 DAG.getUNDEF(ContainerVT), Mask, VL});
13767 }
13768 } else {
13769 Result =
13770 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13771 }
13772
13773 if (!VT.isFixedLengthVector())
13774 return Result;
13775 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13776}
13777
13778SDValue
13779RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13780 SelectionDAG &DAG) const {
13781 SDLoc DL(Op);
13782 MVT VT = Op.getSimpleValueType();
13783 MVT XLenVT = Subtarget.getXLenVT();
13784
13785 SDValue Op1 = Op.getOperand(0);
13786 SDValue Mask = Op.getOperand(1);
13787 SDValue EVL = Op.getOperand(2);
13788
13789 MVT ContainerVT = VT;
13790 if (VT.isFixedLengthVector()) {
13791 ContainerVT = getContainerForFixedLengthVector(VT);
13792 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13793 MVT MaskVT = getMaskTypeFor(ContainerVT);
13794 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13795 }
13796
13797 MVT GatherVT = ContainerVT;
13798 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13799 // Check if we are working with mask vectors
13800 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13801 if (IsMaskVector) {
13802 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13803
13804 // Expand input operand
13805 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13806 DAG.getUNDEF(IndicesVT),
13807 DAG.getConstant(1, DL, XLenVT), EVL);
13808 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13809 DAG.getUNDEF(IndicesVT),
13810 DAG.getConstant(0, DL, XLenVT), EVL);
13811 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13812 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13813 }
13814
13815 unsigned EltSize = GatherVT.getScalarSizeInBits();
13816 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13817 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13818 unsigned MaxVLMAX =
13819 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13820
13821 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13822 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13823 // to use vrgatherei16.vv.
13824 // TODO: It's also possible to use vrgatherei16.vv for other types to
13825 // decrease register width for the index calculation.
13826 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13827 if (MaxVLMAX > 256 && EltSize == 8) {
13828 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13829 // Split the vector in half and reverse each half using a full register
13830 // reverse.
13831 // Swap the halves and concatenate them.
13832 // Slide the concatenated result by (VLMax - VL).
13833 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13834 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13835 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13836
13837 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13838 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13839
13840 // Reassemble the low and high pieces reversed.
13841 // NOTE: this Result is unmasked (because we do not need masks for
13842 // shuffles). If in the future this has to change, we can use a SELECT_VL
13843 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13844 SDValue Result =
13845 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13846
13847 // Slide off any elements from past EVL that were reversed into the low
13848 // elements.
13849 unsigned MinElts = GatherVT.getVectorMinNumElements();
13850 SDValue VLMax =
13851 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13852 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13853
13854 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13855 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13856
13857 if (IsMaskVector) {
13858 // Truncate Result back to a mask vector
13859 Result =
13860 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13861 {Result, DAG.getConstant(0, DL, GatherVT),
13863 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13864 }
13865
13866 if (!VT.isFixedLengthVector())
13867 return Result;
13868 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13869 }
13870
13871 // Just promote the int type to i16 which will double the LMUL.
13872 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13873 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13874 }
13875
13876 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13877 SDValue VecLen =
13878 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13879 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13880 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13881 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13882 DAG.getUNDEF(IndicesVT), Mask, EVL);
13883 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13884 DAG.getUNDEF(GatherVT), Mask, EVL);
13885
13886 if (IsMaskVector) {
13887 // Truncate Result back to a mask vector
13888 Result = DAG.getNode(
13889 RISCVISD::SETCC_VL, DL, ContainerVT,
13890 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13891 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13892 }
13893
13894 if (!VT.isFixedLengthVector())
13895 return Result;
13896 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13897}
13898
13899SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13900 SelectionDAG &DAG) const {
13901 MVT VT = Op.getSimpleValueType();
13902 if (VT.getVectorElementType() != MVT::i1)
13903 return lowerVPOp(Op, DAG);
13904
13905 // It is safe to drop mask parameter as masked-off elements are undef.
13906 SDValue Op1 = Op->getOperand(0);
13907 SDValue Op2 = Op->getOperand(1);
13908 SDValue VL = Op->getOperand(3);
13909
13910 MVT ContainerVT = VT;
13911 const bool IsFixed = VT.isFixedLengthVector();
13912 if (IsFixed) {
13913 ContainerVT = getContainerForFixedLengthVector(VT);
13914 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13915 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13916 }
13917
13918 SDLoc DL(Op);
13919 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13920 if (!IsFixed)
13921 return Val;
13922 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13923}
13924
13925SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13926 SelectionDAG &DAG) const {
13927 SDLoc DL(Op);
13928 MVT XLenVT = Subtarget.getXLenVT();
13929 MVT VT = Op.getSimpleValueType();
13930 MVT ContainerVT = VT;
13931 if (VT.isFixedLengthVector())
13932 ContainerVT = getContainerForFixedLengthVector(VT);
13933
13934 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13935
13936 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13937 // Check if the mask is known to be all ones
13938 SDValue Mask = VPNode->getMask();
13939 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13940
13941 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13942 : Intrinsic::riscv_vlse_mask,
13943 DL, XLenVT);
13944 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13945 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13946 VPNode->getStride()};
13947 if (!IsUnmasked) {
13948 if (VT.isFixedLengthVector()) {
13949 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13950 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13951 }
13952 Ops.push_back(Mask);
13953 }
13954 Ops.push_back(VPNode->getVectorLength());
13955 if (!IsUnmasked) {
13956 SDValue Policy =
13958 Ops.push_back(Policy);
13959 }
13960
13961 SDValue Result =
13963 VPNode->getMemoryVT(), VPNode->getMemOperand());
13964 SDValue Chain = Result.getValue(1);
13965
13966 if (VT.isFixedLengthVector())
13967 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13968
13969 return DAG.getMergeValues({Result, Chain}, DL);
13970}
13971
13972SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13973 SelectionDAG &DAG) const {
13974 SDLoc DL(Op);
13975 MVT XLenVT = Subtarget.getXLenVT();
13976
13977 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13978 SDValue StoreVal = VPNode->getValue();
13979 MVT VT = StoreVal.getSimpleValueType();
13980 MVT ContainerVT = VT;
13981 if (VT.isFixedLengthVector()) {
13982 ContainerVT = getContainerForFixedLengthVector(VT);
13983 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13984 }
13985
13986 // Check if the mask is known to be all ones
13987 SDValue Mask = VPNode->getMask();
13988 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13989
13990 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13991 : Intrinsic::riscv_vsse_mask,
13992 DL, XLenVT);
13993 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13994 VPNode->getBasePtr(), VPNode->getStride()};
13995 if (!IsUnmasked) {
13996 if (VT.isFixedLengthVector()) {
13997 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13998 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13999 }
14000 Ops.push_back(Mask);
14001 }
14002 Ops.push_back(VPNode->getVectorLength());
14003
14004 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14005 Ops, VPNode->getMemoryVT(),
14006 VPNode->getMemOperand());
14007}
14008
14009// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14010// matched to a RVV indexed load. The RVV indexed load instructions only
14011// support the "unsigned unscaled" addressing mode; indices are implicitly
14012// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14013// signed or scaled indexing is extended to the XLEN value type and scaled
14014// accordingly.
14015SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14016 SelectionDAG &DAG) const {
14017 SDLoc DL(Op);
14018 MVT VT = Op.getSimpleValueType();
14019
14020 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14021 EVT MemVT = MemSD->getMemoryVT();
14022 MachineMemOperand *MMO = MemSD->getMemOperand();
14023 SDValue Chain = MemSD->getChain();
14024 SDValue BasePtr = MemSD->getBasePtr();
14025
14026 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14027 SDValue Index, Mask, PassThru, VL;
14028
14029 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14030 Index = VPGN->getIndex();
14031 Mask = VPGN->getMask();
14032 PassThru = DAG.getUNDEF(VT);
14033 VL = VPGN->getVectorLength();
14034 // VP doesn't support extending loads.
14036 } else {
14037 // Else it must be a MGATHER.
14038 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14039 Index = MGN->getIndex();
14040 Mask = MGN->getMask();
14041 PassThru = MGN->getPassThru();
14042 LoadExtType = MGN->getExtensionType();
14043 }
14044
14045 MVT IndexVT = Index.getSimpleValueType();
14046 MVT XLenVT = Subtarget.getXLenVT();
14047
14049 "Unexpected VTs!");
14050 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14051 // Targets have to explicitly opt-in for extending vector loads.
14052 assert(LoadExtType == ISD::NON_EXTLOAD &&
14053 "Unexpected extending MGATHER/VP_GATHER");
14054
14055 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14056 // the selection of the masked intrinsics doesn't do this for us.
14057 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14058
14059 MVT ContainerVT = VT;
14060 if (VT.isFixedLengthVector()) {
14061 ContainerVT = getContainerForFixedLengthVector(VT);
14062 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14063 ContainerVT.getVectorElementCount());
14064
14065 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14066
14067 if (!IsUnmasked) {
14068 MVT MaskVT = getMaskTypeFor(ContainerVT);
14069 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14070 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14071 }
14072 }
14073
14074 if (!VL)
14075 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14076
14077 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14078 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14079 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14080 }
14081
14082 unsigned IntID =
14083 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14084 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14085 if (IsUnmasked)
14086 Ops.push_back(DAG.getUNDEF(ContainerVT));
14087 else
14088 Ops.push_back(PassThru);
14089 Ops.push_back(BasePtr);
14090 Ops.push_back(Index);
14091 if (!IsUnmasked)
14092 Ops.push_back(Mask);
14093 Ops.push_back(VL);
14094 if (!IsUnmasked)
14095 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14096
14097 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14098 SDValue Result =
14099 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14100 Chain = Result.getValue(1);
14101
14102 if (VT.isFixedLengthVector())
14103 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14104
14105 return DAG.getMergeValues({Result, Chain}, DL);
14106}
14107
14108// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14109// matched to a RVV indexed store. The RVV indexed store instructions only
14110// support the "unsigned unscaled" addressing mode; indices are implicitly
14111// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14112// signed or scaled indexing is extended to the XLEN value type and scaled
14113// accordingly.
14114SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14115 SelectionDAG &DAG) const {
14116 SDLoc DL(Op);
14117 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14118 EVT MemVT = MemSD->getMemoryVT();
14119 MachineMemOperand *MMO = MemSD->getMemOperand();
14120 SDValue Chain = MemSD->getChain();
14121 SDValue BasePtr = MemSD->getBasePtr();
14122
14123 [[maybe_unused]] bool IsTruncatingStore = false;
14124 SDValue Index, Mask, Val, VL;
14125
14126 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14127 Index = VPSN->getIndex();
14128 Mask = VPSN->getMask();
14129 Val = VPSN->getValue();
14130 VL = VPSN->getVectorLength();
14131 // VP doesn't support truncating stores.
14132 IsTruncatingStore = false;
14133 } else {
14134 // Else it must be a MSCATTER.
14135 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14136 Index = MSN->getIndex();
14137 Mask = MSN->getMask();
14138 Val = MSN->getValue();
14139 IsTruncatingStore = MSN->isTruncatingStore();
14140 }
14141
14142 MVT VT = Val.getSimpleValueType();
14143 MVT IndexVT = Index.getSimpleValueType();
14144 MVT XLenVT = Subtarget.getXLenVT();
14145
14147 "Unexpected VTs!");
14148 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14149 // Targets have to explicitly opt-in for extending vector loads and
14150 // truncating vector stores.
14151 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14152
14153 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14154 // the selection of the masked intrinsics doesn't do this for us.
14155 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14156
14157 MVT ContainerVT = VT;
14158 if (VT.isFixedLengthVector()) {
14159 ContainerVT = getContainerForFixedLengthVector(VT);
14160 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14161 ContainerVT.getVectorElementCount());
14162
14163 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14164 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14165
14166 if (!IsUnmasked) {
14167 MVT MaskVT = getMaskTypeFor(ContainerVT);
14168 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14169 }
14170 }
14171
14172 if (!VL)
14173 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14174
14175 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14176 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14177 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14178 }
14179
14180 unsigned IntID =
14181 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14182 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14183 Ops.push_back(Val);
14184 Ops.push_back(BasePtr);
14185 Ops.push_back(Index);
14186 if (!IsUnmasked)
14187 Ops.push_back(Mask);
14188 Ops.push_back(VL);
14189
14191 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14192}
14193
14194SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14195 SelectionDAG &DAG) const {
14196 const MVT XLenVT = Subtarget.getXLenVT();
14197 SDLoc DL(Op);
14198 SDValue Chain = Op->getOperand(0);
14199 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14200 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14201 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14202
14203 // Encoding used for rounding mode in RISC-V differs from that used in
14204 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14205 // table, which consists of a sequence of 4-bit fields, each representing
14206 // corresponding FLT_ROUNDS mode.
14207 static const int Table =
14213
14214 SDValue Shift =
14215 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14216 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14217 DAG.getConstant(Table, DL, XLenVT), Shift);
14218 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14219 DAG.getConstant(7, DL, XLenVT));
14220
14221 return DAG.getMergeValues({Masked, Chain}, DL);
14222}
14223
14224SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14225 SelectionDAG &DAG) const {
14226 const MVT XLenVT = Subtarget.getXLenVT();
14227 SDLoc DL(Op);
14228 SDValue Chain = Op->getOperand(0);
14229 SDValue RMValue = Op->getOperand(1);
14230 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14231
14232 // Encoding used for rounding mode in RISC-V differs from that used in
14233 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14234 // a table, which consists of a sequence of 4-bit fields, each representing
14235 // corresponding RISC-V mode.
14236 static const unsigned Table =
14242
14243 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14244
14245 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14246 DAG.getConstant(2, DL, XLenVT));
14247 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14248 DAG.getConstant(Table, DL, XLenVT), Shift);
14249 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14250 DAG.getConstant(0x7, DL, XLenVT));
14251 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14252 RMValue);
14253}
14254
14255SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14256 SelectionDAG &DAG) const {
14257 const MVT XLenVT = Subtarget.getXLenVT();
14258 SDLoc DL(Op);
14259 SDValue Chain = Op->getOperand(0);
14260 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14261 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14262 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14263}
14264
14265SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14266 SelectionDAG &DAG) const {
14267 const MVT XLenVT = Subtarget.getXLenVT();
14268 SDLoc DL(Op);
14269 SDValue Chain = Op->getOperand(0);
14270 SDValue EnvValue = Op->getOperand(1);
14271 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14272
14273 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14274 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14275 EnvValue);
14276}
14277
14278SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14279 SelectionDAG &DAG) const {
14280 const MVT XLenVT = Subtarget.getXLenVT();
14281 SDLoc DL(Op);
14282 SDValue Chain = Op->getOperand(0);
14283 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14284 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14285
14286 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14287 EnvValue);
14288}
14289
14292
14293SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14294 SelectionDAG &DAG) const {
14295 const MVT XLenVT = Subtarget.getXLenVT();
14296 SDLoc DL(Op);
14297 SDValue Chain = Op->getOperand(0);
14298 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14299 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14300 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14301 Chain = Result.getValue(1);
14302 return DAG.getMergeValues({Result, Chain}, DL);
14303}
14304
14305SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14306 SelectionDAG &DAG) const {
14307 const MVT XLenVT = Subtarget.getXLenVT();
14308 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14309 SDLoc DL(Op);
14310 SDValue Chain = Op->getOperand(0);
14311 SDValue EnvValue = Op->getOperand(1);
14312 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14313 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14314
14315 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14316 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14317 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14318 ModeMask);
14319 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14320 EnvValue);
14321}
14322
14323SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14324 SelectionDAG &DAG) const {
14325 const MVT XLenVT = Subtarget.getXLenVT();
14326 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14327 SDLoc DL(Op);
14328 SDValue Chain = Op->getOperand(0);
14329 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14330 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14331
14332 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14333 ModeMask);
14334}
14335
14336SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14337 SelectionDAG &DAG) const {
14338 MachineFunction &MF = DAG.getMachineFunction();
14339
14340 bool isRISCV64 = Subtarget.is64Bit();
14341 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14342
14343 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14344 return DAG.getFrameIndex(FI, PtrVT);
14345}
14346
14347// Returns the opcode of the target-specific SDNode that implements the 32-bit
14348// form of the given Opcode.
14349static unsigned getRISCVWOpcode(unsigned Opcode) {
14350 switch (Opcode) {
14351 default:
14352 llvm_unreachable("Unexpected opcode");
14353 case ISD::SHL:
14354 return RISCVISD::SLLW;
14355 case ISD::SRA:
14356 return RISCVISD::SRAW;
14357 case ISD::SRL:
14358 return RISCVISD::SRLW;
14359 case ISD::SDIV:
14360 return RISCVISD::DIVW;
14361 case ISD::UDIV:
14362 return RISCVISD::DIVUW;
14363 case ISD::UREM:
14364 return RISCVISD::REMUW;
14365 case ISD::ROTL:
14366 return RISCVISD::ROLW;
14367 case ISD::ROTR:
14368 return RISCVISD::RORW;
14369 }
14370}
14371
14372// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14373// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14374// otherwise be promoted to i64, making it difficult to select the
14375// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14376// type i8/i16/i32 is lost.
14378 unsigned ExtOpc = ISD::ANY_EXTEND) {
14379 SDLoc DL(N);
14380 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14381 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14382 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14383 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14384 // ReplaceNodeResults requires we maintain the same type for the return value.
14385 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14386}
14387
14388// Converts the given 32-bit operation to a i64 operation with signed extension
14389// semantic to reduce the signed extension instructions.
14391 SDLoc DL(N);
14392 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14393 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14394 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14395 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14396 DAG.getValueType(MVT::i32));
14397 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14398}
14399
14402 SelectionDAG &DAG) const {
14403 SDLoc DL(N);
14404 switch (N->getOpcode()) {
14405 default:
14406 llvm_unreachable("Don't know how to custom type legalize this operation!");
14409 case ISD::FP_TO_SINT:
14410 case ISD::FP_TO_UINT: {
14411 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14412 "Unexpected custom legalisation");
14413 bool IsStrict = N->isStrictFPOpcode();
14414 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14415 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14416 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14417 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14419 if (!isTypeLegal(Op0.getValueType()))
14420 return;
14421 if (IsStrict) {
14422 SDValue Chain = N->getOperand(0);
14423 // In absence of Zfh, promote f16 to f32, then convert.
14424 if (Op0.getValueType() == MVT::f16 &&
14425 !Subtarget.hasStdExtZfhOrZhinx()) {
14426 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14427 {Chain, Op0});
14428 Chain = Op0.getValue(1);
14429 }
14430 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14431 : RISCVISD::STRICT_FCVT_WU_RV64;
14432 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14433 SDValue Res = DAG.getNode(
14434 Opc, DL, VTs, Chain, Op0,
14435 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14436 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14437 Results.push_back(Res.getValue(1));
14438 return;
14439 }
14440 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14441 // convert.
14442 if ((Op0.getValueType() == MVT::f16 &&
14443 !Subtarget.hasStdExtZfhOrZhinx()) ||
14444 Op0.getValueType() == MVT::bf16)
14445 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14446
14447 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14448 SDValue Res =
14449 DAG.getNode(Opc, DL, MVT::i64, Op0,
14450 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14452 return;
14453 }
14454 // If the FP type needs to be softened, emit a library call using the 'si'
14455 // version. If we left it to default legalization we'd end up with 'di'. If
14456 // the FP type doesn't need to be softened just let generic type
14457 // legalization promote the result type.
14458 RTLIB::Libcall LC;
14459 if (IsSigned)
14460 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14461 else
14462 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14463 MakeLibCallOptions CallOptions;
14464 EVT OpVT = Op0.getValueType();
14465 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14466 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14467 SDValue Result;
14468 std::tie(Result, Chain) =
14469 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14470 Results.push_back(Result);
14471 if (IsStrict)
14472 Results.push_back(Chain);
14473 break;
14474 }
14475 case ISD::LROUND: {
14476 SDValue Op0 = N->getOperand(0);
14477 EVT Op0VT = Op0.getValueType();
14478 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14480 if (!isTypeLegal(Op0VT))
14481 return;
14482
14483 // In absence of Zfh, promote f16 to f32, then convert.
14484 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14485 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14486
14487 SDValue Res =
14488 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14489 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14490 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14491 return;
14492 }
14493 // If the FP type needs to be softened, emit a library call to lround. We'll
14494 // need to truncate the result. We assume any value that doesn't fit in i32
14495 // is allowed to return an unspecified value.
14496 RTLIB::Libcall LC =
14497 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14498 MakeLibCallOptions CallOptions;
14499 EVT OpVT = Op0.getValueType();
14500 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14501 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14502 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14503 Results.push_back(Result);
14504 break;
14505 }
14506 case ISD::READCYCLECOUNTER:
14507 case ISD::READSTEADYCOUNTER: {
14508 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14509 "has custom type legalization on riscv32");
14510
14511 SDValue LoCounter, HiCounter;
14512 MVT XLenVT = Subtarget.getXLenVT();
14513 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14514 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14515 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14516 } else {
14517 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14518 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14519 }
14520 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14521 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14522 N->getOperand(0), LoCounter, HiCounter);
14523
14524 Results.push_back(
14525 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14526 Results.push_back(RCW.getValue(2));
14527 break;
14528 }
14529 case ISD::LOAD: {
14530 if (!ISD::isNON_EXTLoad(N))
14531 return;
14532
14533 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14534 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14536
14537 if (N->getValueType(0) == MVT::i64) {
14538 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14539 "Unexpected custom legalisation");
14540
14541 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14542 return;
14543
14544 SDLoc DL(N);
14545 SDValue Result = DAG.getMemIntrinsicNode(
14546 RISCVISD::LD_RV32, DL,
14547 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14548 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14549 SDValue Lo = Result.getValue(0);
14550 SDValue Hi = Result.getValue(1);
14551 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14552 Results.append({Pair, Result.getValue(2)});
14553 return;
14554 }
14555
14556 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14557 "Unexpected custom legalisation");
14558
14559 SDLoc dl(N);
14560 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14561 Ld->getBasePtr(), Ld->getMemoryVT(),
14562 Ld->getMemOperand());
14563 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14564 Results.push_back(Res.getValue(1));
14565 return;
14566 }
14567 case ISD::MUL: {
14568 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14569 unsigned XLen = Subtarget.getXLen();
14570 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14571 if (Size > XLen) {
14572 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14573 SDValue LHS = N->getOperand(0);
14574 SDValue RHS = N->getOperand(1);
14575 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14576
14577 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14578 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14579 // We need exactly one side to be unsigned.
14580 if (LHSIsU == RHSIsU)
14581 return;
14582
14583 auto MakeMULPair = [&](SDValue S, SDValue U) {
14584 MVT XLenVT = Subtarget.getXLenVT();
14585 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14586 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14587 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14588 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14589 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14590 };
14591
14592 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14593 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14594
14595 // The other operand should be signed, but still prefer MULH when
14596 // possible.
14597 if (RHSIsU && LHSIsS && !RHSIsS)
14598 Results.push_back(MakeMULPair(LHS, RHS));
14599 else if (LHSIsU && RHSIsS && !LHSIsS)
14600 Results.push_back(MakeMULPair(RHS, LHS));
14601
14602 return;
14603 }
14604 [[fallthrough]];
14605 }
14606 case ISD::ADD:
14607 case ISD::SUB:
14608 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14609 "Unexpected custom legalisation");
14610 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14611 break;
14612 case ISD::SHL:
14613 case ISD::SRA:
14614 case ISD::SRL:
14615 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14616 "Unexpected custom legalisation");
14617 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14618 // If we can use a BSET instruction, allow default promotion to apply.
14619 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14620 isOneConstant(N->getOperand(0)))
14621 break;
14622 Results.push_back(customLegalizeToWOp(N, DAG));
14623 break;
14624 }
14625
14626 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14627 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14628 // shift amount.
14629 if (N->getOpcode() == ISD::SHL) {
14630 SDLoc DL(N);
14631 SDValue NewOp0 =
14632 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14633 SDValue NewOp1 =
14634 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14635 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14636 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14637 DAG.getValueType(MVT::i32));
14638 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14639 }
14640
14641 break;
14642 case ISD::ROTL:
14643 case ISD::ROTR:
14644 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14645 "Unexpected custom legalisation");
14646 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14647 Subtarget.hasVendorXTHeadBb()) &&
14648 "Unexpected custom legalization");
14649 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14650 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14651 return;
14652 Results.push_back(customLegalizeToWOp(N, DAG));
14653 break;
14654 case ISD::CTTZ:
14656 case ISD::CTLZ:
14657 case ISD::CTLZ_ZERO_UNDEF: {
14658 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14659 "Unexpected custom legalisation");
14660
14661 SDValue NewOp0 =
14662 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14663 bool IsCTZ =
14664 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14665 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14666 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14667 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14668 return;
14669 }
14670 case ISD::SDIV:
14671 case ISD::UDIV:
14672 case ISD::UREM: {
14673 MVT VT = N->getSimpleValueType(0);
14674 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14675 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14676 "Unexpected custom legalisation");
14677 // Don't promote division/remainder by constant since we should expand those
14678 // to multiply by magic constant.
14679 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14680 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14681 !isIntDivCheap(N->getValueType(0), Attr))
14682 return;
14683
14684 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14685 // the upper 32 bits. For other types we need to sign or zero extend
14686 // based on the opcode.
14687 unsigned ExtOpc = ISD::ANY_EXTEND;
14688 if (VT != MVT::i32)
14689 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14691
14692 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14693 break;
14694 }
14695 case ISD::SADDO: {
14696 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14697 "Unexpected custom legalisation");
14698
14699 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14700 // use the default legalization.
14701 if (!isa<ConstantSDNode>(N->getOperand(1)))
14702 return;
14703
14704 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14705 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14706 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14707 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14708 DAG.getValueType(MVT::i32));
14709
14710 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14711
14712 // For an addition, the result should be less than one of the operands (LHS)
14713 // if and only if the other operand (RHS) is negative, otherwise there will
14714 // be overflow.
14715 // For a subtraction, the result should be less than one of the operands
14716 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14717 // otherwise there will be overflow.
14718 EVT OType = N->getValueType(1);
14719 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14720 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14721
14722 SDValue Overflow =
14723 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14724 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14725 Results.push_back(Overflow);
14726 return;
14727 }
14728 case ISD::UADDO:
14729 case ISD::USUBO: {
14730 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14731 "Unexpected custom legalisation");
14732 bool IsAdd = N->getOpcode() == ISD::UADDO;
14733 // Create an ADDW or SUBW.
14734 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14735 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14736 SDValue Res =
14737 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14738 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14739 DAG.getValueType(MVT::i32));
14740
14741 SDValue Overflow;
14742 if (IsAdd && isOneConstant(RHS)) {
14743 // Special case uaddo X, 1 overflowed if the addition result is 0.
14744 // The general case (X + C) < C is not necessarily beneficial. Although we
14745 // reduce the live range of X, we may introduce the materialization of
14746 // constant C, especially when the setcc result is used by branch. We have
14747 // no compare with constant and branch instructions.
14748 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14749 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14750 } else if (IsAdd && isAllOnesConstant(RHS)) {
14751 // Special case uaddo X, -1 overflowed if X != 0.
14752 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14753 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14754 } else {
14755 // Sign extend the LHS and perform an unsigned compare with the ADDW
14756 // result. Since the inputs are sign extended from i32, this is equivalent
14757 // to comparing the lower 32 bits.
14758 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14759 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14760 IsAdd ? ISD::SETULT : ISD::SETUGT);
14761 }
14762
14763 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14764 Results.push_back(Overflow);
14765 return;
14766 }
14767 case ISD::UADDSAT:
14768 case ISD::USUBSAT: {
14769 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14770 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14771 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14772 // promotion for UADDO/USUBO.
14773 Results.push_back(expandAddSubSat(N, DAG));
14774 return;
14775 }
14776 case ISD::SADDSAT:
14777 case ISD::SSUBSAT: {
14778 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14779 "Unexpected custom legalisation");
14780 Results.push_back(expandAddSubSat(N, DAG));
14781 return;
14782 }
14783 case ISD::ABS: {
14784 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14785 "Unexpected custom legalisation");
14786
14787 if (Subtarget.hasStdExtZbb()) {
14788 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14789 // This allows us to remember that the result is sign extended. Expanding
14790 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14791 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14792 N->getOperand(0));
14793 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14795 return;
14796 }
14797
14798 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14799 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14800
14801 // Freeze the source so we can increase it's use count.
14802 Src = DAG.getFreeze(Src);
14803
14804 // Copy sign bit to all bits using the sraiw pattern.
14805 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14806 DAG.getValueType(MVT::i32));
14807 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14808 DAG.getConstant(31, DL, MVT::i64));
14809
14810 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14811 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14812
14813 // NOTE: The result is only required to be anyextended, but sext is
14814 // consistent with type legalization of sub.
14815 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14816 DAG.getValueType(MVT::i32));
14817 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14818 return;
14819 }
14820 case ISD::BITCAST: {
14821 EVT VT = N->getValueType(0);
14822 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14823 SDValue Op0 = N->getOperand(0);
14824 EVT Op0VT = Op0.getValueType();
14825 MVT XLenVT = Subtarget.getXLenVT();
14826 if (VT == MVT::i16 &&
14827 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14828 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14829 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14830 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14831 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14832 Subtarget.hasStdExtFOrZfinx()) {
14833 SDValue FPConv =
14834 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14835 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14836 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14837 Subtarget.hasStdExtDOrZdinx()) {
14838 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14839 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14840 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14841 NewReg.getValue(0), NewReg.getValue(1));
14842 Results.push_back(RetReg);
14843 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14844 isTypeLegal(Op0VT)) {
14845 // Custom-legalize bitcasts from fixed-length vector types to illegal
14846 // scalar types in order to improve codegen. Bitcast the vector to a
14847 // one-element vector type whose element type is the same as the result
14848 // type, and extract the first element.
14849 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14850 if (isTypeLegal(BVT)) {
14851 SDValue BVec = DAG.getBitcast(BVT, Op0);
14852 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14853 }
14854 }
14855 break;
14856 }
14857 case ISD::BITREVERSE: {
14858 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14859 "Unexpected custom legalisation");
14860 MVT XLenVT = Subtarget.getXLenVT();
14861 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14862 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14863 // ReplaceNodeResults requires we maintain the same type for the return
14864 // value.
14865 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14866 break;
14867 }
14868 case RISCVISD::BREV8:
14869 case RISCVISD::ORC_B: {
14870 MVT VT = N->getSimpleValueType(0);
14871 MVT XLenVT = Subtarget.getXLenVT();
14872 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14873 "Unexpected custom legalisation");
14874 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14875 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14876 "Unexpected extension");
14877 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14878 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14879 // ReplaceNodeResults requires we maintain the same type for the return
14880 // value.
14881 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14882 break;
14883 }
14885 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14886 // type is illegal (currently only vXi64 RV32).
14887 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14888 // transferred to the destination register. We issue two of these from the
14889 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14890 // first element.
14891 SDValue Vec = N->getOperand(0);
14892 SDValue Idx = N->getOperand(1);
14893
14894 // The vector type hasn't been legalized yet so we can't issue target
14895 // specific nodes if it needs legalization.
14896 // FIXME: We would manually legalize if it's important.
14897 if (!isTypeLegal(Vec.getValueType()))
14898 return;
14899
14900 MVT VecVT = Vec.getSimpleValueType();
14901
14902 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14903 VecVT.getVectorElementType() == MVT::i64 &&
14904 "Unexpected EXTRACT_VECTOR_ELT legalization");
14905
14906 // If this is a fixed vector, we need to convert it to a scalable vector.
14907 MVT ContainerVT = VecVT;
14908 if (VecVT.isFixedLengthVector()) {
14909 ContainerVT = getContainerForFixedLengthVector(VecVT);
14910 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14911 }
14912
14913 MVT XLenVT = Subtarget.getXLenVT();
14914
14915 // Use a VL of 1 to avoid processing more elements than we need.
14916 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14917
14918 // Unless the index is known to be 0, we must slide the vector down to get
14919 // the desired element into index 0.
14920 if (!isNullConstant(Idx)) {
14921 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14922 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14923 }
14924
14925 // Extract the lower XLEN bits of the correct vector element.
14926 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14927
14928 // To extract the upper XLEN bits of the vector element, shift the first
14929 // element right by 32 bits and re-extract the lower XLEN bits.
14930 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14931 DAG.getUNDEF(ContainerVT),
14932 DAG.getConstant(32, DL, XLenVT), VL);
14933 SDValue LShr32 =
14934 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14935 DAG.getUNDEF(ContainerVT), Mask, VL);
14936
14937 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14938
14939 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14940 break;
14941 }
14943 unsigned IntNo = N->getConstantOperandVal(0);
14944 switch (IntNo) {
14945 default:
14947 "Don't know how to custom type legalize this intrinsic!");
14948 case Intrinsic::experimental_get_vector_length: {
14949 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14950 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14951 return;
14952 }
14953 case Intrinsic::experimental_cttz_elts: {
14954 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14955 Results.push_back(
14956 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14957 return;
14958 }
14959 case Intrinsic::riscv_orc_b:
14960 case Intrinsic::riscv_brev8:
14961 case Intrinsic::riscv_sha256sig0:
14962 case Intrinsic::riscv_sha256sig1:
14963 case Intrinsic::riscv_sha256sum0:
14964 case Intrinsic::riscv_sha256sum1:
14965 case Intrinsic::riscv_sm3p0:
14966 case Intrinsic::riscv_sm3p1: {
14967 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14968 return;
14969 unsigned Opc;
14970 switch (IntNo) {
14971 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14972 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14973 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14974 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14975 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14976 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14977 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14978 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14979 }
14980
14981 SDValue NewOp =
14982 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14983 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14985 return;
14986 }
14987 case Intrinsic::riscv_sm4ks:
14988 case Intrinsic::riscv_sm4ed: {
14989 unsigned Opc =
14990 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14991 SDValue NewOp0 =
14992 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14993 SDValue NewOp1 =
14994 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14995 SDValue Res =
14996 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14997 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14998 return;
14999 }
15000 case Intrinsic::riscv_mopr: {
15001 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15002 return;
15003 SDValue NewOp =
15004 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15005 SDValue Res = DAG.getNode(
15006 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15007 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15008 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15009 return;
15010 }
15011 case Intrinsic::riscv_moprr: {
15012 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15013 return;
15014 SDValue NewOp0 =
15015 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15016 SDValue NewOp1 =
15017 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15018 SDValue Res = DAG.getNode(
15019 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15020 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15021 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15022 return;
15023 }
15024 case Intrinsic::riscv_clmul: {
15025 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15026 return;
15027
15028 SDValue NewOp0 =
15029 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15030 SDValue NewOp1 =
15031 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15032 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15033 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15034 return;
15035 }
15036 case Intrinsic::riscv_clmulh:
15037 case Intrinsic::riscv_clmulr: {
15038 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15039 return;
15040
15041 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15042 // to the full 128-bit clmul result of multiplying two xlen values.
15043 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15044 // upper 32 bits.
15045 //
15046 // The alternative is to mask the inputs to 32 bits and use clmul, but
15047 // that requires two shifts to mask each input without zext.w.
15048 // FIXME: If the inputs are known zero extended or could be freely
15049 // zero extended, the mask form would be better.
15050 SDValue NewOp0 =
15051 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15052 SDValue NewOp1 =
15053 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15054 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15055 DAG.getConstant(32, DL, MVT::i64));
15056 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15057 DAG.getConstant(32, DL, MVT::i64));
15058 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15059 : RISCVISD::CLMULR;
15060 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15061 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15062 DAG.getConstant(32, DL, MVT::i64));
15063 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15064 return;
15065 }
15066 case Intrinsic::riscv_vmv_x_s: {
15067 EVT VT = N->getValueType(0);
15068 MVT XLenVT = Subtarget.getXLenVT();
15069 if (VT.bitsLT(XLenVT)) {
15070 // Simple case just extract using vmv.x.s and truncate.
15071 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15072 Subtarget.getXLenVT(), N->getOperand(1));
15073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15074 return;
15075 }
15076
15077 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15078 "Unexpected custom legalization");
15079
15080 // We need to do the move in two steps.
15081 SDValue Vec = N->getOperand(1);
15082 MVT VecVT = Vec.getSimpleValueType();
15083
15084 // First extract the lower XLEN bits of the element.
15085 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15086
15087 // To extract the upper XLEN bits of the vector element, shift the first
15088 // element right by 32 bits and re-extract the lower XLEN bits.
15089 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15090
15091 SDValue ThirtyTwoV =
15092 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15093 DAG.getConstant(32, DL, XLenVT), VL);
15094 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15095 DAG.getUNDEF(VecVT), Mask, VL);
15096 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15097
15098 Results.push_back(
15099 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15100 break;
15101 }
15102 }
15103 break;
15104 }
15105 case ISD::VECREDUCE_ADD:
15106 case ISD::VECREDUCE_AND:
15107 case ISD::VECREDUCE_OR:
15108 case ISD::VECREDUCE_XOR:
15109 case ISD::VECREDUCE_SMAX:
15110 case ISD::VECREDUCE_UMAX:
15111 case ISD::VECREDUCE_SMIN:
15112 case ISD::VECREDUCE_UMIN:
15113 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15114 Results.push_back(V);
15115 break;
15116 case ISD::VP_REDUCE_ADD:
15117 case ISD::VP_REDUCE_AND:
15118 case ISD::VP_REDUCE_OR:
15119 case ISD::VP_REDUCE_XOR:
15120 case ISD::VP_REDUCE_SMAX:
15121 case ISD::VP_REDUCE_UMAX:
15122 case ISD::VP_REDUCE_SMIN:
15123 case ISD::VP_REDUCE_UMIN:
15124 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15125 Results.push_back(V);
15126 break;
15127 case ISD::GET_ROUNDING: {
15128 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15129 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15130 Results.push_back(Res.getValue(0));
15131 Results.push_back(Res.getValue(1));
15132 break;
15133 }
15134 }
15135}
15136
15137/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15138/// which corresponds to it.
15139static unsigned getVecReduceOpcode(unsigned Opc) {
15140 switch (Opc) {
15141 default:
15142 llvm_unreachable("Unhandled binary to transform reduction");
15143 case ISD::ADD:
15144 return ISD::VECREDUCE_ADD;
15145 case ISD::UMAX:
15146 return ISD::VECREDUCE_UMAX;
15147 case ISD::SMAX:
15148 return ISD::VECREDUCE_SMAX;
15149 case ISD::UMIN:
15150 return ISD::VECREDUCE_UMIN;
15151 case ISD::SMIN:
15152 return ISD::VECREDUCE_SMIN;
15153 case ISD::AND:
15154 return ISD::VECREDUCE_AND;
15155 case ISD::OR:
15156 return ISD::VECREDUCE_OR;
15157 case ISD::XOR:
15158 return ISD::VECREDUCE_XOR;
15159 case ISD::FADD:
15160 // Note: This is the associative form of the generic reduction opcode.
15161 return ISD::VECREDUCE_FADD;
15162 case ISD::FMAXNUM:
15163 return ISD::VECREDUCE_FMAX;
15164 case ISD::FMINNUM:
15165 return ISD::VECREDUCE_FMIN;
15166 }
15167}
15168
15169/// Perform two related transforms whose purpose is to incrementally recognize
15170/// an explode_vector followed by scalar reduction as a vector reduction node.
15171/// This exists to recover from a deficiency in SLP which can't handle
15172/// forests with multiple roots sharing common nodes. In some cases, one
15173/// of the trees will be vectorized, and the other will remain (unprofitably)
15174/// scalarized.
15175static SDValue
15177 const RISCVSubtarget &Subtarget) {
15178
15179 // This transforms need to run before all integer types have been legalized
15180 // to i64 (so that the vector element type matches the add type), and while
15181 // it's safe to introduce odd sized vector types.
15183 return SDValue();
15184
15185 // Without V, this transform isn't useful. We could form the (illegal)
15186 // operations and let them be scalarized again, but there's really no point.
15187 if (!Subtarget.hasVInstructions())
15188 return SDValue();
15189
15190 const SDLoc DL(N);
15191 const EVT VT = N->getValueType(0);
15192 const unsigned Opc = N->getOpcode();
15193
15194 if (!VT.isInteger()) {
15195 switch (Opc) {
15196 default:
15197 return SDValue();
15198 case ISD::FADD:
15199 // For FADD, we only handle the case with reassociation allowed. We
15200 // could handle strict reduction order, but at the moment, there's no
15201 // known reason to, and the complexity isn't worth it.
15202 if (!N->getFlags().hasAllowReassociation())
15203 return SDValue();
15204 break;
15205 case ISD::FMAXNUM:
15206 case ISD::FMINNUM:
15207 break;
15208 }
15209 }
15210
15211 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15212 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15213 "Inconsistent mappings");
15214 SDValue LHS = N->getOperand(0);
15215 SDValue RHS = N->getOperand(1);
15216
15217 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15218 return SDValue();
15219
15220 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15221 std::swap(LHS, RHS);
15222
15223 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15224 !isa<ConstantSDNode>(RHS.getOperand(1)))
15225 return SDValue();
15226
15227 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15228 SDValue SrcVec = RHS.getOperand(0);
15229 EVT SrcVecVT = SrcVec.getValueType();
15230 assert(SrcVecVT.getVectorElementType() == VT);
15231 if (SrcVecVT.isScalableVector())
15232 return SDValue();
15233
15234 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15235 return SDValue();
15236
15237 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15238 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15239 // root of our reduction tree. TODO: We could extend this to any two
15240 // adjacent aligned constant indices if desired.
15241 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15242 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15243 uint64_t LHSIdx =
15244 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15245 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15246 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15247 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15248 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15249 }
15250 }
15251
15252 // Match (binop (reduce (extract_subvector V, 0),
15253 // (extract_vector_elt V, sizeof(SubVec))))
15254 // into a reduction of one more element from the original vector V.
15255 if (LHS.getOpcode() != ReduceOpc)
15256 return SDValue();
15257
15258 SDValue ReduceVec = LHS.getOperand(0);
15259 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15260 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15261 isNullConstant(ReduceVec.getOperand(1)) &&
15262 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15263 // For illegal types (e.g. 3xi32), most will be combined again into a
15264 // wider (hopefully legal) type. If this is a terminal state, we are
15265 // relying on type legalization here to produce something reasonable
15266 // and this lowering quality could probably be improved. (TODO)
15267 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15268 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15269 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15270 ReduceVec->getFlags() & N->getFlags());
15271 }
15272
15273 return SDValue();
15274}
15275
15276
15277// Try to fold (<bop> x, (reduction.<bop> vec, start))
15279 const RISCVSubtarget &Subtarget) {
15280 auto BinOpToRVVReduce = [](unsigned Opc) {
15281 switch (Opc) {
15282 default:
15283 llvm_unreachable("Unhandled binary to transform reduction");
15284 case ISD::ADD:
15285 return RISCVISD::VECREDUCE_ADD_VL;
15286 case ISD::UMAX:
15287 return RISCVISD::VECREDUCE_UMAX_VL;
15288 case ISD::SMAX:
15289 return RISCVISD::VECREDUCE_SMAX_VL;
15290 case ISD::UMIN:
15291 return RISCVISD::VECREDUCE_UMIN_VL;
15292 case ISD::SMIN:
15293 return RISCVISD::VECREDUCE_SMIN_VL;
15294 case ISD::AND:
15295 return RISCVISD::VECREDUCE_AND_VL;
15296 case ISD::OR:
15297 return RISCVISD::VECREDUCE_OR_VL;
15298 case ISD::XOR:
15299 return RISCVISD::VECREDUCE_XOR_VL;
15300 case ISD::FADD:
15301 return RISCVISD::VECREDUCE_FADD_VL;
15302 case ISD::FMAXNUM:
15303 return RISCVISD::VECREDUCE_FMAX_VL;
15304 case ISD::FMINNUM:
15305 return RISCVISD::VECREDUCE_FMIN_VL;
15306 }
15307 };
15308
15309 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15310 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15311 isNullConstant(V.getOperand(1)) &&
15312 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15313 };
15314
15315 unsigned Opc = N->getOpcode();
15316 unsigned ReduceIdx;
15317 if (IsReduction(N->getOperand(0), Opc))
15318 ReduceIdx = 0;
15319 else if (IsReduction(N->getOperand(1), Opc))
15320 ReduceIdx = 1;
15321 else
15322 return SDValue();
15323
15324 // Skip if FADD disallows reassociation but the combiner needs.
15325 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15326 return SDValue();
15327
15328 SDValue Extract = N->getOperand(ReduceIdx);
15329 SDValue Reduce = Extract.getOperand(0);
15330 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15331 return SDValue();
15332
15333 SDValue ScalarV = Reduce.getOperand(2);
15334 EVT ScalarVT = ScalarV.getValueType();
15335 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15336 ScalarV.getOperand(0)->isUndef() &&
15337 isNullConstant(ScalarV.getOperand(2)))
15338 ScalarV = ScalarV.getOperand(1);
15339
15340 // Make sure that ScalarV is a splat with VL=1.
15341 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15342 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15343 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15344 return SDValue();
15345
15346 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15347 return SDValue();
15348
15349 // Check the scalar of ScalarV is neutral element
15350 // TODO: Deal with value other than neutral element.
15351 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15352 0))
15353 return SDValue();
15354
15355 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15356 // FIXME: We might be able to improve this if operand 0 is undef.
15357 if (!isNonZeroAVL(Reduce.getOperand(5)))
15358 return SDValue();
15359
15360 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15361
15362 SDLoc DL(N);
15363 SDValue NewScalarV =
15364 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15365 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15366
15367 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15368 if (ScalarVT != ScalarV.getValueType())
15369 NewScalarV =
15370 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15371
15372 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15373 NewScalarV, Reduce.getOperand(3),
15374 Reduce.getOperand(4), Reduce.getOperand(5)};
15375 SDValue NewReduce =
15376 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15377 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15378 Extract.getOperand(1));
15379}
15380
15381// Optimize (add (shl x, c0), (shl y, c1)) ->
15382// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15383// or
15384// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15386 const RISCVSubtarget &Subtarget) {
15387 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15388 // extension.
15389 if (!Subtarget.hasShlAdd(3))
15390 return SDValue();
15391
15392 // Skip for vector types and larger types.
15393 EVT VT = N->getValueType(0);
15394 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15395 return SDValue();
15396
15397 // The two operand nodes must be SHL and have no other use.
15398 SDValue N0 = N->getOperand(0);
15399 SDValue N1 = N->getOperand(1);
15400 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15401 !N0->hasOneUse() || !N1->hasOneUse())
15402 return SDValue();
15403
15404 // Check c0 and c1.
15405 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15406 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15407 if (!N0C || !N1C)
15408 return SDValue();
15409 int64_t C0 = N0C->getSExtValue();
15410 int64_t C1 = N1C->getSExtValue();
15411 if (C0 <= 0 || C1 <= 0)
15412 return SDValue();
15413
15414 int64_t Diff = std::abs(C0 - C1);
15415 if (!Subtarget.hasShlAdd(Diff))
15416 return SDValue();
15417
15418 // Build nodes.
15419 SDLoc DL(N);
15420 int64_t Bits = std::min(C0, C1);
15421 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15422 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15423 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15424 DAG.getConstant(Diff, DL, VT), NS);
15425 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15426}
15427
15428// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15429// or 3.
15431 SelectionDAG &DAG) {
15432 using namespace llvm::SDPatternMatch;
15433
15434 // Looking for a reg-reg add and not an addi.
15435 if (isa<ConstantSDNode>(N->getOperand(1)))
15436 return SDValue();
15437
15438 // Based on testing it seems that performance degrades if the ADDI has
15439 // more than 2 uses.
15440 if (AddI->use_size() > 2)
15441 return SDValue();
15442
15443 APInt AddVal;
15444 SDValue SHLVal;
15445 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15446 return SDValue();
15447
15448 APInt VShift;
15449 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15450 return SDValue();
15451
15452 if (VShift.slt(1) || VShift.sgt(3))
15453 return SDValue();
15454
15455 SDLoc DL(N);
15456 EVT VT = N->getValueType(0);
15457 // The shift must be positive but the add can be signed.
15458 uint64_t ShlConst = VShift.getZExtValue();
15459 int64_t AddConst = AddVal.getSExtValue();
15460
15461 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15462 DAG.getConstant(ShlConst, DL, VT), Other);
15463 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15464 DAG.getSignedConstant(AddConst, DL, VT));
15465}
15466
15467// Optimize (add (add (shl x, c0), c1), y) ->
15468// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15470 const RISCVSubtarget &Subtarget) {
15471 // Perform this optimization only in the zba extension.
15472 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15473 return SDValue();
15474
15475 // Skip for vector types and larger types.
15476 EVT VT = N->getValueType(0);
15477 if (VT != Subtarget.getXLenVT())
15478 return SDValue();
15479
15480 SDValue AddI = N->getOperand(0);
15481 SDValue Other = N->getOperand(1);
15482 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15483 return V;
15484 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15485 return V;
15486 return SDValue();
15487}
15488
15489// Combine a constant select operand into its use:
15490//
15491// (and (select cond, -1, c), x)
15492// -> (select cond, x, (and x, c)) [AllOnes=1]
15493// (or (select cond, 0, c), x)
15494// -> (select cond, x, (or x, c)) [AllOnes=0]
15495// (xor (select cond, 0, c), x)
15496// -> (select cond, x, (xor x, c)) [AllOnes=0]
15497// (add (select cond, 0, c), x)
15498// -> (select cond, x, (add x, c)) [AllOnes=0]
15499// (sub x, (select cond, 0, c))
15500// -> (select cond, x, (sub x, c)) [AllOnes=0]
15502 SelectionDAG &DAG, bool AllOnes,
15503 const RISCVSubtarget &Subtarget) {
15504 EVT VT = N->getValueType(0);
15505
15506 // Skip vectors.
15507 if (VT.isVector())
15508 return SDValue();
15509
15510 if (!Subtarget.hasConditionalMoveFusion()) {
15511 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15512 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15513 return SDValue();
15514
15515 // Maybe harmful when condition code has multiple use.
15516 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15517 return SDValue();
15518
15519 // Maybe harmful when VT is wider than XLen.
15520 if (VT.getSizeInBits() > Subtarget.getXLen())
15521 return SDValue();
15522 }
15523
15524 if ((Slct.getOpcode() != ISD::SELECT &&
15525 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15526 !Slct.hasOneUse())
15527 return SDValue();
15528
15529 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15531 };
15532
15533 bool SwapSelectOps;
15534 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15535 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15536 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15537 SDValue NonConstantVal;
15538 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15539 SwapSelectOps = false;
15540 NonConstantVal = FalseVal;
15541 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15542 SwapSelectOps = true;
15543 NonConstantVal = TrueVal;
15544 } else
15545 return SDValue();
15546
15547 // Slct is now know to be the desired identity constant when CC is true.
15548 TrueVal = OtherOp;
15549 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15550 // Unless SwapSelectOps says the condition should be false.
15551 if (SwapSelectOps)
15552 std::swap(TrueVal, FalseVal);
15553
15554 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15555 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15556 {Slct.getOperand(0), Slct.getOperand(1),
15557 Slct.getOperand(2), TrueVal, FalseVal});
15558
15559 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15560 {Slct.getOperand(0), TrueVal, FalseVal});
15561}
15562
15563// Attempt combineSelectAndUse on each operand of a commutative operator N.
15565 bool AllOnes,
15566 const RISCVSubtarget &Subtarget) {
15567 SDValue N0 = N->getOperand(0);
15568 SDValue N1 = N->getOperand(1);
15569 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15570 return Result;
15571 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15572 return Result;
15573 return SDValue();
15574}
15575
15576// Transform (add (mul x, c0), c1) ->
15577// (add (mul (add x, c1/c0), c0), c1%c0).
15578// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15579// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15580// to an infinite loop in DAGCombine if transformed.
15581// Or transform (add (mul x, c0), c1) ->
15582// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15583// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15584// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15585// lead to an infinite loop in DAGCombine if transformed.
15586// Or transform (add (mul x, c0), c1) ->
15587// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15588// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15589// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15590// lead to an infinite loop in DAGCombine if transformed.
15591// Or transform (add (mul x, c0), c1) ->
15592// (mul (add x, c1/c0), c0).
15593// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15595 const RISCVSubtarget &Subtarget) {
15596 // Skip for vector types and larger types.
15597 EVT VT = N->getValueType(0);
15598 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15599 return SDValue();
15600 // The first operand node must be a MUL and has no other use.
15601 SDValue N0 = N->getOperand(0);
15602 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15603 return SDValue();
15604 // Check if c0 and c1 match above conditions.
15605 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15606 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15607 if (!N0C || !N1C)
15608 return SDValue();
15609 // If N0C has multiple uses it's possible one of the cases in
15610 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15611 // in an infinite loop.
15612 if (!N0C->hasOneUse())
15613 return SDValue();
15614 int64_t C0 = N0C->getSExtValue();
15615 int64_t C1 = N1C->getSExtValue();
15616 int64_t CA, CB;
15617 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15618 return SDValue();
15619 // Search for proper CA (non-zero) and CB that both are simm12.
15620 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15621 !isInt<12>(C0 * (C1 / C0))) {
15622 CA = C1 / C0;
15623 CB = C1 % C0;
15624 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15625 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15626 CA = C1 / C0 + 1;
15627 CB = C1 % C0 - C0;
15628 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15629 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15630 CA = C1 / C0 - 1;
15631 CB = C1 % C0 + C0;
15632 } else
15633 return SDValue();
15634 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15635 SDLoc DL(N);
15636 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15637 DAG.getSignedConstant(CA, DL, VT));
15638 SDValue New1 =
15639 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15640 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15641}
15642
15643// add (zext, zext) -> zext (add (zext, zext))
15644// sub (zext, zext) -> sext (sub (zext, zext))
15645// mul (zext, zext) -> zext (mul (zext, zext))
15646// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15647// udiv (zext, zext) -> zext (udiv (zext, zext))
15648// srem (zext, zext) -> zext (srem (zext, zext))
15649// urem (zext, zext) -> zext (urem (zext, zext))
15650//
15651// where the sum of the extend widths match, and the the range of the bin op
15652// fits inside the width of the narrower bin op. (For profitability on rvv, we
15653// use a power of two for both inner and outer extend.)
15655
15656 EVT VT = N->getValueType(0);
15657 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15658 return SDValue();
15659
15660 SDValue N0 = N->getOperand(0);
15661 SDValue N1 = N->getOperand(1);
15663 return SDValue();
15664 if (!N0.hasOneUse() || !N1.hasOneUse())
15665 return SDValue();
15666
15667 SDValue Src0 = N0.getOperand(0);
15668 SDValue Src1 = N1.getOperand(0);
15669 EVT SrcVT = Src0.getValueType();
15670 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15671 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15672 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15673 return SDValue();
15674
15675 LLVMContext &C = *DAG.getContext();
15677 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15678
15679 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15680 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15681
15682 // Src0 and Src1 are zero extended, so they're always positive if signed.
15683 //
15684 // sub can produce a negative from two positive operands, so it needs sign
15685 // extended. Other nodes produce a positive from two positive operands, so
15686 // zero extend instead.
15687 unsigned OuterExtend =
15688 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15689
15690 return DAG.getNode(
15691 OuterExtend, SDLoc(N), VT,
15692 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15693}
15694
15695// Try to turn (add (xor bool, 1) -1) into (neg bool).
15697 SDValue N0 = N->getOperand(0);
15698 SDValue N1 = N->getOperand(1);
15699 EVT VT = N->getValueType(0);
15700 SDLoc DL(N);
15701
15702 // RHS should be -1.
15703 if (!isAllOnesConstant(N1))
15704 return SDValue();
15705
15706 // Look for (xor X, 1).
15707 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15708 return SDValue();
15709
15710 // First xor input should be 0 or 1.
15712 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15713 return SDValue();
15714
15715 // Emit a negate of the setcc.
15716 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15717 N0.getOperand(0));
15718}
15719
15722 const RISCVSubtarget &Subtarget) {
15723 SelectionDAG &DAG = DCI.DAG;
15724 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15725 return V;
15726 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15727 return V;
15728 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15729 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15730 return V;
15731 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15732 return V;
15733 }
15734 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15735 return V;
15736 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15737 return V;
15738 if (SDValue V = combineBinOpOfZExt(N, DAG))
15739 return V;
15740
15741 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15742 // (select lhs, rhs, cc, x, (add x, y))
15743 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15744}
15745
15746// Try to turn a sub boolean RHS and constant LHS into an addi.
15748 SDValue N0 = N->getOperand(0);
15749 SDValue N1 = N->getOperand(1);
15750 EVT VT = N->getValueType(0);
15751 SDLoc DL(N);
15752
15753 // Require a constant LHS.
15754 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15755 if (!N0C)
15756 return SDValue();
15757
15758 // All our optimizations involve subtracting 1 from the immediate and forming
15759 // an ADDI. Make sure the new immediate is valid for an ADDI.
15760 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15761 if (!ImmValMinus1.isSignedIntN(12))
15762 return SDValue();
15763
15764 SDValue NewLHS;
15765 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15766 // (sub constant, (setcc x, y, eq/neq)) ->
15767 // (add (setcc x, y, neq/eq), constant - 1)
15768 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15769 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15770 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15771 return SDValue();
15772 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15773 NewLHS =
15774 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15775 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15776 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15777 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15778 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15779 NewLHS = N1.getOperand(0);
15780 } else
15781 return SDValue();
15782
15783 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15784 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15785}
15786
15787// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15788// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15789// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15790// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15792 const RISCVSubtarget &Subtarget) {
15793 if (!Subtarget.hasStdExtZbb())
15794 return SDValue();
15795
15796 EVT VT = N->getValueType(0);
15797
15798 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15799 return SDValue();
15800
15801 SDValue N0 = N->getOperand(0);
15802 SDValue N1 = N->getOperand(1);
15803
15804 if (N0->getOpcode() != ISD::SHL)
15805 return SDValue();
15806
15807 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15808 if (!ShAmtCLeft)
15809 return SDValue();
15810 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15811
15812 if (ShiftedAmount >= 8)
15813 return SDValue();
15814
15815 SDValue LeftShiftOperand = N0->getOperand(0);
15816 SDValue RightShiftOperand = N1;
15817
15818 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15819 if (N1->getOpcode() != ISD::SRL)
15820 return SDValue();
15821 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15822 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15823 return SDValue();
15824 RightShiftOperand = N1.getOperand(0);
15825 }
15826
15827 // At least one shift should have a single use.
15828 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15829 return SDValue();
15830
15831 if (LeftShiftOperand != RightShiftOperand)
15832 return SDValue();
15833
15834 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15835 Mask <<= ShiftedAmount;
15836 // Check that X has indeed the right shape (only the Y-th bit can be set in
15837 // every byte).
15838 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15839 return SDValue();
15840
15841 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15842}
15843
15845 const RISCVSubtarget &Subtarget) {
15846 if (SDValue V = combineSubOfBoolean(N, DAG))
15847 return V;
15848
15849 EVT VT = N->getValueType(0);
15850 SDValue N0 = N->getOperand(0);
15851 SDValue N1 = N->getOperand(1);
15852 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15853 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15854 isNullConstant(N1.getOperand(1)) &&
15855 N1.getValueType() == N1.getOperand(0).getValueType()) {
15856 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15857 if (CCVal == ISD::SETLT) {
15858 SDLoc DL(N);
15859 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15860 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15861 DAG.getConstant(ShAmt, DL, VT));
15862 }
15863 }
15864
15865 if (SDValue V = combineBinOpOfZExt(N, DAG))
15866 return V;
15867 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15868 return V;
15869
15870 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15871 // (select lhs, rhs, cc, x, (sub x, y))
15872 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15873}
15874
15875// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15876// Legalizing setcc can introduce xors like this. Doing this transform reduces
15877// the number of xors and may allow the xor to fold into a branch condition.
15879 SDValue N0 = N->getOperand(0);
15880 SDValue N1 = N->getOperand(1);
15881 bool IsAnd = N->getOpcode() == ISD::AND;
15882
15883 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15884 return SDValue();
15885
15886 if (!N0.hasOneUse() || !N1.hasOneUse())
15887 return SDValue();
15888
15889 SDValue N01 = N0.getOperand(1);
15890 SDValue N11 = N1.getOperand(1);
15891
15892 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15893 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15894 // operation is And, allow one of the Xors to use -1.
15895 if (isOneConstant(N01)) {
15896 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15897 return SDValue();
15898 } else if (isOneConstant(N11)) {
15899 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15900 if (!(IsAnd && isAllOnesConstant(N01)))
15901 return SDValue();
15902 } else
15903 return SDValue();
15904
15905 EVT VT = N->getValueType(0);
15906
15907 SDValue N00 = N0.getOperand(0);
15908 SDValue N10 = N1.getOperand(0);
15909
15910 // The LHS of the xors needs to be 0/1.
15912 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15913 return SDValue();
15914
15915 // Invert the opcode and insert a new xor.
15916 SDLoc DL(N);
15917 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15918 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15919 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15920}
15921
15922// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15923// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15924// value to an unsigned value. This will be lowered to vmax and series of
15925// vnclipu instructions later. This can be extended to other truncated types
15926// other than i8 by replacing 256 and 255 with the equivalent constants for the
15927// type.
15929 EVT VT = N->getValueType(0);
15930 SDValue N0 = N->getOperand(0);
15931 EVT SrcVT = N0.getValueType();
15932
15933 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15934 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15935 return SDValue();
15936
15937 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15938 return SDValue();
15939
15940 SDValue Cond = N0.getOperand(0);
15941 SDValue True = N0.getOperand(1);
15942 SDValue False = N0.getOperand(2);
15943
15944 if (Cond.getOpcode() != ISD::SETCC)
15945 return SDValue();
15946
15947 // FIXME: Support the version of this pattern with the select operands
15948 // swapped.
15949 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15950 if (CCVal != ISD::SETULT)
15951 return SDValue();
15952
15953 SDValue CondLHS = Cond.getOperand(0);
15954 SDValue CondRHS = Cond.getOperand(1);
15955
15956 if (CondLHS != True)
15957 return SDValue();
15958
15959 unsigned ScalarBits = VT.getScalarSizeInBits();
15960
15961 // FIXME: Support other constants.
15962 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15963 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15964 return SDValue();
15965
15966 if (False.getOpcode() != ISD::SIGN_EXTEND)
15967 return SDValue();
15968
15969 False = False.getOperand(0);
15970
15971 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15972 return SDValue();
15973
15974 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15975 if (!FalseRHSC || !FalseRHSC->isZero())
15976 return SDValue();
15977
15978 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15979 if (CCVal2 != ISD::SETGT)
15980 return SDValue();
15981
15982 // Emit the signed to unsigned saturation pattern.
15983 SDLoc DL(N);
15984 SDValue Max =
15985 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15986 SDValue Min =
15987 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15988 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15989 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15990}
15991
15993 const RISCVSubtarget &Subtarget) {
15994 SDValue N0 = N->getOperand(0);
15995 EVT VT = N->getValueType(0);
15996
15997 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15998 // extending X. This is safe since we only need the LSB after the shift and
15999 // shift amounts larger than 31 would produce poison. If we wait until
16000 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16001 // to use a BEXT instruction.
16002 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16003 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16004 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16005 SDLoc DL(N0);
16006 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16007 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16008 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16009 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16010 }
16011
16012 return combineTruncSelectToSMaxUSat(N, DAG);
16013}
16014
16015// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16016// truncation. But RVV doesn't have truncation instructions for more than twice
16017// the bitwidth.
16018//
16019// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16020//
16021// vsetvli a0, zero, e32, m2, ta, ma
16022// vnsrl.wi v12, v8, 0
16023// vsetvli zero, zero, e16, m1, ta, ma
16024// vnsrl.wi v8, v12, 0
16025// vsetvli zero, zero, e8, mf2, ta, ma
16026// vnsrl.wi v8, v8, 0
16027//
16028// So reverse the combine so we generate an vmseq/vmsne again:
16029//
16030// and (lshr (trunc X), ShAmt), 1
16031// -->
16032// zext (icmp ne (and X, (1 << ShAmt)), 0)
16033//
16034// and (lshr (not (trunc X)), ShAmt), 1
16035// -->
16036// zext (icmp eq (and X, (1 << ShAmt)), 0)
16038 const RISCVSubtarget &Subtarget) {
16039 using namespace SDPatternMatch;
16040 SDLoc DL(N);
16041
16042 if (!Subtarget.hasVInstructions())
16043 return SDValue();
16044
16045 EVT VT = N->getValueType(0);
16046 if (!VT.isVector())
16047 return SDValue();
16048
16049 APInt ShAmt;
16050 SDValue Inner;
16051 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16052 m_One())))
16053 return SDValue();
16054
16055 SDValue X;
16056 bool IsNot;
16057 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16058 IsNot = true;
16059 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16060 IsNot = false;
16061 else
16062 return SDValue();
16063
16064 EVT WideVT = X.getValueType();
16065 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16066 return SDValue();
16067
16068 SDValue Res =
16069 DAG.getNode(ISD::AND, DL, WideVT, X,
16070 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16071 Res = DAG.getSetCC(DL,
16072 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16073 WideVT.getVectorElementCount()),
16074 Res, DAG.getConstant(0, DL, WideVT),
16075 IsNot ? ISD::SETEQ : ISD::SETNE);
16076 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16077}
16078
16081 SelectionDAG &DAG = DCI.DAG;
16082 if (N->getOpcode() != ISD::AND)
16083 return SDValue();
16084
16085 SDValue N0 = N->getOperand(0);
16086 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16087 return SDValue();
16088 if (!N0.hasOneUse())
16089 return SDValue();
16090
16093 return SDValue();
16094
16095 EVT LoadedVT = ALoad->getMemoryVT();
16096 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16097 if (!MaskConst)
16098 return SDValue();
16099 uint64_t Mask = MaskConst->getZExtValue();
16100 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16101 if (Mask != ExpectedMask)
16102 return SDValue();
16103
16104 SDValue ZextLoad = DAG.getAtomicLoad(
16105 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16106 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16107 DCI.CombineTo(N, ZextLoad);
16108 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16110 return SDValue(N, 0);
16111}
16112
16113// Combines two comparison operation and logic operation to one selection
16114// operation(min, max) and logic operation. Returns new constructed Node if
16115// conditions for optimization are satisfied.
16118 const RISCVSubtarget &Subtarget) {
16119 SelectionDAG &DAG = DCI.DAG;
16120
16121 SDValue N0 = N->getOperand(0);
16122 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16123 // extending X. This is safe since we only need the LSB after the shift and
16124 // shift amounts larger than 31 would produce poison. If we wait until
16125 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16126 // to use a BEXT instruction.
16127 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16128 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16129 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16130 N0.hasOneUse()) {
16131 SDLoc DL(N);
16132 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16133 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16134 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16135 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16136 DAG.getConstant(1, DL, MVT::i64));
16137 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16138 }
16139
16140 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16141 return V;
16142
16143 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16144 return V;
16145 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16146 return V;
16147 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16148 return V;
16149
16150 if (DCI.isAfterLegalizeDAG())
16151 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16152 return V;
16153
16154 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16155 // (select lhs, rhs, cc, x, (and x, y))
16156 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16157}
16158
16159// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16160// FIXME: Generalize to other binary operators with same operand.
16162 SelectionDAG &DAG) {
16163 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16164
16165 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16166 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16167 !N0.hasOneUse() || !N1.hasOneUse())
16168 return SDValue();
16169
16170 // Should have the same condition.
16171 SDValue Cond = N0.getOperand(1);
16172 if (Cond != N1.getOperand(1))
16173 return SDValue();
16174
16175 SDValue TrueV = N0.getOperand(0);
16176 SDValue FalseV = N1.getOperand(0);
16177
16178 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16179 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16180 !isOneConstant(TrueV.getOperand(1)) ||
16181 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16182 return SDValue();
16183
16184 EVT VT = N->getValueType(0);
16185 SDLoc DL(N);
16186
16187 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16188 Cond);
16189 SDValue NewN1 =
16190 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16191 SDValue NewOr =
16192 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16193 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16194}
16195
16196// (xor X, (xor (and X, C2), Y))
16197// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16198// where C2 is a shifted mask with width = Width and shift = ShAmt
16199// qc_insb might become qc.insb or qc.insbi depending on the operands.
16201 const RISCVSubtarget &Subtarget) {
16202 if (!Subtarget.hasVendorXqcibm())
16203 return SDValue();
16204
16205 using namespace SDPatternMatch;
16206
16207 SDValue Base, Inserted;
16208 APInt CMask;
16209 if (!sd_match(N, m_Xor(m_Value(Base),
16211 m_ConstInt(CMask))),
16212 m_Value(Inserted))))))
16213 return SDValue();
16214
16215 if (N->getValueType(0) != MVT::i32)
16216 return SDValue();
16217
16218 unsigned Width, ShAmt;
16219 if (!CMask.isShiftedMask(ShAmt, Width))
16220 return SDValue();
16221
16222 // Check if all zero bits in CMask are also zero in Inserted
16223 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16224 return SDValue();
16225
16226 SDLoc DL(N);
16227
16228 // `Inserted` needs to be right shifted before it is put into the
16229 // instruction.
16230 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16231 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16232
16233 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16234 DAG.getConstant(ShAmt, DL, MVT::i32)};
16235 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16236}
16237
16239 const RISCVSubtarget &Subtarget) {
16240 SelectionDAG &DAG = DCI.DAG;
16241
16242 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16243 return V;
16244 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16245 return V;
16246
16247 if (DCI.isAfterLegalizeDAG())
16248 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16249 return V;
16250
16251 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16252 // We may be able to pull a common operation out of the true and false value.
16253 SDValue N0 = N->getOperand(0);
16254 SDValue N1 = N->getOperand(1);
16255 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16256 return V;
16257 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16258 return V;
16259
16260 // fold (or (select cond, 0, y), x) ->
16261 // (select cond, x, (or x, y))
16262 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16263}
16264
16266 const RISCVSubtarget &Subtarget) {
16267 SDValue N0 = N->getOperand(0);
16268 SDValue N1 = N->getOperand(1);
16269
16270 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16271 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16272 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16273 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16274 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16275 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16276 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16277 SDLoc DL(N);
16278 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16279 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16280 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16281 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16282 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16283 }
16284
16285 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16286 // NOTE: Assumes ROL being legal means ROLW is legal.
16287 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16288 if (N0.getOpcode() == RISCVISD::SLLW &&
16290 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16291 SDLoc DL(N);
16292 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16293 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16294 }
16295
16296 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16297 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16298 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16300 if (ConstN00 && CC == ISD::SETLT) {
16301 EVT VT = N0.getValueType();
16302 SDLoc DL(N0);
16303 const APInt &Imm = ConstN00->getAPIntValue();
16304 if ((Imm + 1).isSignedIntN(12))
16305 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16306 DAG.getConstant(Imm + 1, DL, VT), CC);
16307 }
16308 }
16309
16310 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16311 return V;
16312
16313 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16314 return V;
16315 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16316 return V;
16317
16318 // fold (xor (select cond, 0, y), x) ->
16319 // (select cond, x, (xor x, y))
16320 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16321}
16322
16323// Try to expand a multiply to a sequence of shifts and add/subs,
16324// for a machine without native mul instruction.
16326 uint64_t MulAmt) {
16327 SDLoc DL(N);
16328 EVT VT = N->getValueType(0);
16330
16331 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16332 SDValue N0 = N->getOperand(0);
16333
16334 // Find the Non-adjacent form of the multiplier.
16335 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16336 if (E & 1) {
16337 bool IsAdd = (E & 3) == 1;
16338 E -= IsAdd ? 1 : -1;
16339 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16340 DAG.getShiftAmountConstant(I, VT, DL));
16341 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16342 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16343 }
16344 }
16345
16346 return Result;
16347}
16348
16349// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16351 uint64_t MulAmt) {
16352 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16354 uint64_t ShiftAmt1;
16355 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16356 Op = ISD::SUB;
16357 ShiftAmt1 = MulAmt + MulAmtLowBit;
16358 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16359 Op = ISD::ADD;
16360 ShiftAmt1 = MulAmt - MulAmtLowBit;
16361 } else {
16362 return SDValue();
16363 }
16364 EVT VT = N->getValueType(0);
16365 SDLoc DL(N);
16366 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16367 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16368 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16369 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16370 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16371}
16372
16373// Try to expand a scalar multiply to a faster sequence.
16376 const RISCVSubtarget &Subtarget) {
16377
16378 EVT VT = N->getValueType(0);
16379
16380 // LI + MUL is usually smaller than the alternative sequence.
16382 return SDValue();
16383
16384 if (VT != Subtarget.getXLenVT())
16385 return SDValue();
16386
16387 bool ShouldExpandMul =
16388 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16389 !Subtarget.hasStdExtZmmul();
16390 if (!ShouldExpandMul)
16391 return SDValue();
16392
16393 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16394 if (!CNode)
16395 return SDValue();
16396 uint64_t MulAmt = CNode->getZExtValue();
16397
16398 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16399 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16400 return SDValue();
16401
16402 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16403 // We're adding additional uses of X here, and in principle, we should be freezing
16404 // X before doing so. However, adding freeze here causes real regressions, and no
16405 // other target properly freezes X in these cases either.
16406 SDValue X = N->getOperand(0);
16407
16408 if (Subtarget.hasShlAdd(3)) {
16409 for (uint64_t Divisor : {3, 5, 9}) {
16410 if (MulAmt % Divisor != 0)
16411 continue;
16412 uint64_t MulAmt2 = MulAmt / Divisor;
16413 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16414 if (isPowerOf2_64(MulAmt2)) {
16415 SDLoc DL(N);
16416 SDValue X = N->getOperand(0);
16417 // Put the shift first if we can fold a zext into the
16418 // shift forming a slli.uw.
16419 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16420 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16421 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16422 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16423 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16424 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16425 Shl);
16426 }
16427 // Otherwise, put rhe shl second so that it can fold with following
16428 // instructions (e.g. sext or add).
16429 SDValue Mul359 =
16430 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16431 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16432 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16433 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16434 }
16435
16436 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16437 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16438 SDLoc DL(N);
16439 SDValue Mul359 =
16440 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16441 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16442 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16443 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16444 Mul359);
16445 }
16446 }
16447
16448 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16449 // shXadd. First check if this a sum of two power of 2s because that's
16450 // easy. Then count how many zeros are up to the first bit.
16451 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16452 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16453 if (ScaleShift >= 1 && ScaleShift < 4) {
16454 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16455 SDLoc DL(N);
16456 SDValue Shift1 =
16457 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16458 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16459 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16460 }
16461 }
16462
16463 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16464 // This is the two instruction form, there are also three instruction
16465 // variants we could implement. e.g.
16466 // (2^(1,2,3) * 3,5,9 + 1) << C2
16467 // 2^(C1>3) * 3,5,9 +/- 1
16468 for (uint64_t Divisor : {3, 5, 9}) {
16469 uint64_t C = MulAmt - 1;
16470 if (C <= Divisor)
16471 continue;
16472 unsigned TZ = llvm::countr_zero(C);
16473 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16474 SDLoc DL(N);
16475 SDValue Mul359 =
16476 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16477 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16478 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16479 DAG.getConstant(TZ, DL, VT), X);
16480 }
16481 }
16482
16483 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16484 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16485 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16486 if (ScaleShift >= 1 && ScaleShift < 4) {
16487 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16488 SDLoc DL(N);
16489 SDValue Shift1 =
16490 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16491 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16492 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16493 DAG.getConstant(ScaleShift, DL, VT), X));
16494 }
16495 }
16496
16497 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16498 for (uint64_t Offset : {3, 5, 9}) {
16499 if (isPowerOf2_64(MulAmt + Offset)) {
16500 unsigned ShAmt = Log2_64(MulAmt + Offset);
16501 if (ShAmt >= VT.getSizeInBits())
16502 continue;
16503 SDLoc DL(N);
16504 SDValue Shift1 =
16505 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16506 SDValue Mul359 =
16507 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16508 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16509 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16510 }
16511 }
16512
16513 for (uint64_t Divisor : {3, 5, 9}) {
16514 if (MulAmt % Divisor != 0)
16515 continue;
16516 uint64_t MulAmt2 = MulAmt / Divisor;
16517 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16518 // of 25 which happen to be quite common.
16519 for (uint64_t Divisor2 : {3, 5, 9}) {
16520 if (MulAmt2 % Divisor2 != 0)
16521 continue;
16522 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16523 if (isPowerOf2_64(MulAmt3)) {
16524 SDLoc DL(N);
16525 SDValue Mul359A =
16526 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16527 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16528 SDValue Mul359B = DAG.getNode(
16529 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16530 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16531 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16532 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16533 }
16534 }
16535 }
16536 }
16537
16538 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16539 return V;
16540
16541 if (!Subtarget.hasStdExtZmmul())
16542 return expandMulToNAFSequence(N, DAG, MulAmt);
16543
16544 return SDValue();
16545}
16546
16547// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16548// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16549// Same for other equivalent types with other equivalent constants.
16551 EVT VT = N->getValueType(0);
16552 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16553
16554 // Do this for legal vectors unless they are i1 or i8 vectors.
16555 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16556 return SDValue();
16557
16558 if (N->getOperand(0).getOpcode() != ISD::AND ||
16559 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16560 return SDValue();
16561
16562 SDValue And = N->getOperand(0);
16563 SDValue Srl = And.getOperand(0);
16564
16565 APInt V1, V2, V3;
16566 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16567 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16569 return SDValue();
16570
16571 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16572 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16573 V3 != (HalfSize - 1))
16574 return SDValue();
16575
16576 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16577 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16578 VT.getVectorElementCount() * 2);
16579 SDLoc DL(N);
16580 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16581 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16582 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16583 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16584}
16585
16588 const RISCVSubtarget &Subtarget) {
16589 EVT VT = N->getValueType(0);
16590 if (!VT.isVector())
16591 return expandMul(N, DAG, DCI, Subtarget);
16592
16593 SDLoc DL(N);
16594 SDValue N0 = N->getOperand(0);
16595 SDValue N1 = N->getOperand(1);
16596 SDValue MulOper;
16597 unsigned AddSubOpc;
16598
16599 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16600 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16601 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16602 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16603 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16604 AddSubOpc = V->getOpcode();
16605 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16606 SDValue Opnd = V->getOperand(1);
16607 MulOper = V->getOperand(0);
16608 if (AddSubOpc == ISD::SUB)
16609 std::swap(Opnd, MulOper);
16610 if (isOneOrOneSplat(Opnd))
16611 return true;
16612 }
16613 return false;
16614 };
16615
16616 if (IsAddSubWith1(N0)) {
16617 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16618 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16619 }
16620
16621 if (IsAddSubWith1(N1)) {
16622 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16623 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16624 }
16625
16626 if (SDValue V = combineBinOpOfZExt(N, DAG))
16627 return V;
16628
16630 return V;
16631
16632 return SDValue();
16633}
16634
16635/// According to the property that indexed load/store instructions zero-extend
16636/// their indices, try to narrow the type of index operand.
16637static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16638 if (isIndexTypeSigned(IndexType))
16639 return false;
16640
16641 if (!N->hasOneUse())
16642 return false;
16643
16644 EVT VT = N.getValueType();
16645 SDLoc DL(N);
16646
16647 // In general, what we're doing here is seeing if we can sink a truncate to
16648 // a smaller element type into the expression tree building our index.
16649 // TODO: We can generalize this and handle a bunch more cases if useful.
16650
16651 // Narrow a buildvector to the narrowest element type. This requires less
16652 // work and less register pressure at high LMUL, and creates smaller constants
16653 // which may be cheaper to materialize.
16654 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16655 KnownBits Known = DAG.computeKnownBits(N);
16656 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16657 LLVMContext &C = *DAG.getContext();
16658 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16659 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16660 N = DAG.getNode(ISD::TRUNCATE, DL,
16661 VT.changeVectorElementType(ResultVT), N);
16662 return true;
16663 }
16664 }
16665
16666 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16667 if (N.getOpcode() != ISD::SHL)
16668 return false;
16669
16670 SDValue N0 = N.getOperand(0);
16671 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16672 N0.getOpcode() != RISCVISD::VZEXT_VL)
16673 return false;
16674 if (!N0->hasOneUse())
16675 return false;
16676
16677 APInt ShAmt;
16678 SDValue N1 = N.getOperand(1);
16679 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16680 return false;
16681
16682 SDValue Src = N0.getOperand(0);
16683 EVT SrcVT = Src.getValueType();
16684 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16685 unsigned ShAmtV = ShAmt.getZExtValue();
16686 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16687 NewElen = std::max(NewElen, 8U);
16688
16689 // Skip if NewElen is not narrower than the original extended type.
16690 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16691 return false;
16692
16693 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16694 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16695
16696 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16697 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16698 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16699 return true;
16700}
16701
16702/// Try to map an integer comparison with size > XLEN to vector instructions
16703/// before type legalization splits it up into chunks.
16704static SDValue
16706 const SDLoc &DL, SelectionDAG &DAG,
16707 const RISCVSubtarget &Subtarget) {
16708 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16709
16710 if (!Subtarget.hasVInstructions())
16711 return SDValue();
16712
16713 MVT XLenVT = Subtarget.getXLenVT();
16714 EVT OpVT = X.getValueType();
16715 // We're looking for an oversized integer equality comparison.
16716 if (!OpVT.isScalarInteger())
16717 return SDValue();
16718
16719 unsigned OpSize = OpVT.getSizeInBits();
16720 // The size should be larger than XLen and smaller than the maximum vector
16721 // size.
16722 if (OpSize <= Subtarget.getXLen() ||
16723 OpSize > Subtarget.getRealMinVLen() *
16725 return SDValue();
16726
16727 // Don't perform this combine if constructing the vector will be expensive.
16728 auto IsVectorBitCastCheap = [](SDValue X) {
16730 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16731 X.getOpcode() == ISD::LOAD;
16732 };
16733 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16734 return SDValue();
16735
16737 Attribute::NoImplicitFloat))
16738 return SDValue();
16739
16740 // Bail out for non-byte-sized types.
16741 if (!OpVT.isByteSized())
16742 return SDValue();
16743
16744 unsigned VecSize = OpSize / 8;
16745 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16746 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16747
16748 SDValue VecX = DAG.getBitcast(VecVT, X);
16749 SDValue VecY = DAG.getBitcast(VecVT, Y);
16750 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16751 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16752
16753 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16754 DAG.getCondCode(ISD::SETNE), Mask, VL);
16755 return DAG.getSetCC(DL, VT,
16756 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16757 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16758 VL),
16759 DAG.getConstant(0, DL, XLenVT), CC);
16760}
16761
16764 const RISCVSubtarget &Subtarget) {
16765 SelectionDAG &DAG = DCI.DAG;
16766 SDLoc dl(N);
16767 SDValue N0 = N->getOperand(0);
16768 SDValue N1 = N->getOperand(1);
16769 EVT VT = N->getValueType(0);
16770 EVT OpVT = N0.getValueType();
16771
16772 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16773 // Looking for an equality compare.
16774 if (!isIntEqualitySetCC(Cond))
16775 return SDValue();
16776
16777 if (SDValue V =
16778 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16779 return V;
16780
16781 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16782 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16784 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16785 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16786 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16787 AndRHSC.isNegatedPowerOf2()) {
16788 unsigned ShiftBits = AndRHSC.countr_zero();
16789 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16790 DAG.getConstant(ShiftBits, dl, OpVT));
16791 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16792 }
16793
16794 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16795 // comparing with constants other than 0 if the constant can be folded into
16796 // addi or xori after shifting.
16797 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16798 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16799 if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
16800 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16801 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16802 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16803 if (NewC >= -2048 && NewC <= 2048) {
16804 SDValue SExt =
16805 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16806 DAG.getValueType(MVT::i32));
16807 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16808 DAG.getConstant(ShiftBits, dl, OpVT));
16809 return DAG.getSetCC(dl, VT, Shift,
16810 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16811 }
16812 }
16813 }
16814
16815 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16816 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16817 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16818 // sext_inreg can become a sext.w instead of a shift pair.
16819 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16820 return SDValue();
16821
16822 // RHS needs to be a constant.
16823 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16824 if (!N1C)
16825 return SDValue();
16826
16827 // LHS needs to be (and X, 0xffffffff).
16828 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16830 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16831 return SDValue();
16832
16833 // Don't do this if the sign bit is provably zero, it will be turned back into
16834 // an AND.
16835 APInt SignMask = APInt::getOneBitSet(64, 31);
16836 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16837 return SDValue();
16838
16839 const APInt &C1 = N1C->getAPIntValue();
16840
16841 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16842 // to be equal.
16843 if (C1.getActiveBits() > 32)
16844 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16845
16846 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16847 N0.getOperand(0), DAG.getValueType(MVT::i32));
16848 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16849 dl, OpVT), Cond);
16850}
16851
16852static SDValue
16854 const RISCVSubtarget &Subtarget) {
16855 SelectionDAG &DAG = DCI.DAG;
16856 SDValue Src = N->getOperand(0);
16857 EVT VT = N->getValueType(0);
16858 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16859 unsigned Opc = Src.getOpcode();
16860 SDLoc DL(N);
16861
16862 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16863 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16864 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16865 Subtarget.hasStdExtZfhmin())
16866 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16867
16868 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16869 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16870 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16871 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16872 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16873 Src.getOperand(1));
16874
16875 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16876 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16877 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16878
16879 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16880 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16881 isAllOnesConstant(Src.getOperand(1)) &&
16882 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16883 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16884 DAG.getAllOnesConstant(DL, VT));
16885
16886 return SDValue();
16887}
16888
16889namespace {
16890// Forward declaration of the structure holding the necessary information to
16891// apply a combine.
16892struct CombineResult;
16893
16894enum ExtKind : uint8_t {
16895 ZExt = 1 << 0,
16896 SExt = 1 << 1,
16897 FPExt = 1 << 2,
16898 BF16Ext = 1 << 3
16899};
16900/// Helper class for folding sign/zero extensions.
16901/// In particular, this class is used for the following combines:
16902/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16903/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16904/// mul | mul_vl -> vwmul(u) | vwmul_su
16905/// shl | shl_vl -> vwsll
16906/// fadd -> vfwadd | vfwadd_w
16907/// fsub -> vfwsub | vfwsub_w
16908/// fmul -> vfwmul
16909/// An object of this class represents an operand of the operation we want to
16910/// combine.
16911/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16912/// NodeExtensionHelper for `a` and one for `b`.
16913///
16914/// This class abstracts away how the extension is materialized and
16915/// how its number of users affect the combines.
16916///
16917/// In particular:
16918/// - VWADD_W is conceptually == add(op0, sext(op1))
16919/// - VWADDU_W == add(op0, zext(op1))
16920/// - VWSUB_W == sub(op0, sext(op1))
16921/// - VWSUBU_W == sub(op0, zext(op1))
16922/// - VFWADD_W == fadd(op0, fpext(op1))
16923/// - VFWSUB_W == fsub(op0, fpext(op1))
16924/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16925/// zext|sext(smaller_value).
16926struct NodeExtensionHelper {
16927 /// Records if this operand is like being zero extended.
16928 bool SupportsZExt;
16929 /// Records if this operand is like being sign extended.
16930 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16931 /// instance, a splat constant (e.g., 3), would support being both sign and
16932 /// zero extended.
16933 bool SupportsSExt;
16934 /// Records if this operand is like being floating point extended.
16935 bool SupportsFPExt;
16936 /// Records if this operand is extended from bf16.
16937 bool SupportsBF16Ext;
16938 /// This boolean captures whether we care if this operand would still be
16939 /// around after the folding happens.
16940 bool EnforceOneUse;
16941 /// Original value that this NodeExtensionHelper represents.
16942 SDValue OrigOperand;
16943
16944 /// Get the value feeding the extension or the value itself.
16945 /// E.g., for zext(a), this would return a.
16946 SDValue getSource() const {
16947 switch (OrigOperand.getOpcode()) {
16948 case ISD::ZERO_EXTEND:
16949 case ISD::SIGN_EXTEND:
16950 case RISCVISD::VSEXT_VL:
16951 case RISCVISD::VZEXT_VL:
16952 case RISCVISD::FP_EXTEND_VL:
16953 return OrigOperand.getOperand(0);
16954 default:
16955 return OrigOperand;
16956 }
16957 }
16958
16959 /// Check if this instance represents a splat.
16960 bool isSplat() const {
16961 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16962 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16963 }
16964
16965 /// Get the extended opcode.
16966 unsigned getExtOpc(ExtKind SupportsExt) const {
16967 switch (SupportsExt) {
16968 case ExtKind::SExt:
16969 return RISCVISD::VSEXT_VL;
16970 case ExtKind::ZExt:
16971 return RISCVISD::VZEXT_VL;
16972 case ExtKind::FPExt:
16973 case ExtKind::BF16Ext:
16974 return RISCVISD::FP_EXTEND_VL;
16975 }
16976 llvm_unreachable("Unknown ExtKind enum");
16977 }
16978
16979 /// Get or create a value that can feed \p Root with the given extension \p
16980 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16981 /// operand. \see ::getSource().
16982 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16983 const RISCVSubtarget &Subtarget,
16984 std::optional<ExtKind> SupportsExt) const {
16985 if (!SupportsExt.has_value())
16986 return OrigOperand;
16987
16988 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16989
16990 SDValue Source = getSource();
16991 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16992 if (Source.getValueType() == NarrowVT)
16993 return Source;
16994
16995 unsigned ExtOpc = getExtOpc(*SupportsExt);
16996
16997 // If we need an extension, we should be changing the type.
16998 SDLoc DL(OrigOperand);
16999 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17000 switch (OrigOperand.getOpcode()) {
17001 case ISD::ZERO_EXTEND:
17002 case ISD::SIGN_EXTEND:
17003 case RISCVISD::VSEXT_VL:
17004 case RISCVISD::VZEXT_VL:
17005 case RISCVISD::FP_EXTEND_VL:
17006 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17007 case ISD::SPLAT_VECTOR:
17008 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17009 case RISCVISD::VMV_V_X_VL:
17010 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17011 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17012 case RISCVISD::VFMV_V_F_VL:
17013 Source = Source.getOperand(1);
17014 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17015 Source = Source.getOperand(0);
17016 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17017 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17018 DAG.getUNDEF(NarrowVT), Source, VL);
17019 default:
17020 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17021 // and that operand should already have the right NarrowVT so no
17022 // extension should be required at this point.
17023 llvm_unreachable("Unsupported opcode");
17024 }
17025 }
17026
17027 /// Helper function to get the narrow type for \p Root.
17028 /// The narrow type is the type of \p Root where we divided the size of each
17029 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17030 /// \pre Both the narrow type and the original type should be legal.
17031 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17032 MVT VT = Root->getSimpleValueType(0);
17033
17034 // Determine the narrow size.
17035 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17036
17037 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17038 : SupportsExt == ExtKind::FPExt
17039 ? MVT::getFloatingPointVT(NarrowSize)
17040 : MVT::getIntegerVT(NarrowSize);
17041
17042 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17043 "Trying to extend something we can't represent");
17044 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17045 return NarrowVT;
17046 }
17047
17048 /// Get the opcode to materialize:
17049 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17050 static unsigned getSExtOpcode(unsigned Opcode) {
17051 switch (Opcode) {
17052 case ISD::ADD:
17053 case RISCVISD::ADD_VL:
17054 case RISCVISD::VWADD_W_VL:
17055 case RISCVISD::VWADDU_W_VL:
17056 case ISD::OR:
17057 case RISCVISD::OR_VL:
17058 return RISCVISD::VWADD_VL;
17059 case ISD::SUB:
17060 case RISCVISD::SUB_VL:
17061 case RISCVISD::VWSUB_W_VL:
17062 case RISCVISD::VWSUBU_W_VL:
17063 return RISCVISD::VWSUB_VL;
17064 case ISD::MUL:
17065 case RISCVISD::MUL_VL:
17066 return RISCVISD::VWMUL_VL;
17067 default:
17068 llvm_unreachable("Unexpected opcode");
17069 }
17070 }
17071
17072 /// Get the opcode to materialize:
17073 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17074 static unsigned getZExtOpcode(unsigned Opcode) {
17075 switch (Opcode) {
17076 case ISD::ADD:
17077 case RISCVISD::ADD_VL:
17078 case RISCVISD::VWADD_W_VL:
17079 case RISCVISD::VWADDU_W_VL:
17080 case ISD::OR:
17081 case RISCVISD::OR_VL:
17082 return RISCVISD::VWADDU_VL;
17083 case ISD::SUB:
17084 case RISCVISD::SUB_VL:
17085 case RISCVISD::VWSUB_W_VL:
17086 case RISCVISD::VWSUBU_W_VL:
17087 return RISCVISD::VWSUBU_VL;
17088 case ISD::MUL:
17089 case RISCVISD::MUL_VL:
17090 return RISCVISD::VWMULU_VL;
17091 case ISD::SHL:
17092 case RISCVISD::SHL_VL:
17093 return RISCVISD::VWSLL_VL;
17094 default:
17095 llvm_unreachable("Unexpected opcode");
17096 }
17097 }
17098
17099 /// Get the opcode to materialize:
17100 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17101 static unsigned getFPExtOpcode(unsigned Opcode) {
17102 switch (Opcode) {
17103 case RISCVISD::FADD_VL:
17104 case RISCVISD::VFWADD_W_VL:
17105 return RISCVISD::VFWADD_VL;
17106 case RISCVISD::FSUB_VL:
17107 case RISCVISD::VFWSUB_W_VL:
17108 return RISCVISD::VFWSUB_VL;
17109 case RISCVISD::FMUL_VL:
17110 return RISCVISD::VFWMUL_VL;
17111 case RISCVISD::VFMADD_VL:
17112 return RISCVISD::VFWMADD_VL;
17113 case RISCVISD::VFMSUB_VL:
17114 return RISCVISD::VFWMSUB_VL;
17115 case RISCVISD::VFNMADD_VL:
17116 return RISCVISD::VFWNMADD_VL;
17117 case RISCVISD::VFNMSUB_VL:
17118 return RISCVISD::VFWNMSUB_VL;
17119 default:
17120 llvm_unreachable("Unexpected opcode");
17121 }
17122 }
17123
17124 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17125 /// newOpcode(a, b).
17126 static unsigned getSUOpcode(unsigned Opcode) {
17127 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17128 "SU is only supported for MUL");
17129 return RISCVISD::VWMULSU_VL;
17130 }
17131
17132 /// Get the opcode to materialize
17133 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17134 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17135 switch (Opcode) {
17136 case ISD::ADD:
17137 case RISCVISD::ADD_VL:
17138 case ISD::OR:
17139 case RISCVISD::OR_VL:
17140 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17141 : RISCVISD::VWADDU_W_VL;
17142 case ISD::SUB:
17143 case RISCVISD::SUB_VL:
17144 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17145 : RISCVISD::VWSUBU_W_VL;
17146 case RISCVISD::FADD_VL:
17147 return RISCVISD::VFWADD_W_VL;
17148 case RISCVISD::FSUB_VL:
17149 return RISCVISD::VFWSUB_W_VL;
17150 default:
17151 llvm_unreachable("Unexpected opcode");
17152 }
17153 }
17154
17155 using CombineToTry = std::function<std::optional<CombineResult>(
17156 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17157 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17158 const RISCVSubtarget &)>;
17159
17160 /// Check if this node needs to be fully folded or extended for all users.
17161 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17162
17163 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17164 const RISCVSubtarget &Subtarget) {
17165 unsigned Opc = OrigOperand.getOpcode();
17166 MVT VT = OrigOperand.getSimpleValueType();
17167
17168 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17169 "Unexpected Opcode");
17170
17171 // The pasthru must be undef for tail agnostic.
17172 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17173 return;
17174
17175 // Get the scalar value.
17176 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17177 : OrigOperand.getOperand(1);
17178
17179 // See if we have enough sign bits or zero bits in the scalar to use a
17180 // widening opcode by splatting to smaller element size.
17181 unsigned EltBits = VT.getScalarSizeInBits();
17182 unsigned ScalarBits = Op.getValueSizeInBits();
17183 // If we're not getting all bits from the element, we need special handling.
17184 if (ScalarBits < EltBits) {
17185 // This should only occur on RV32.
17186 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17187 !Subtarget.is64Bit() && "Unexpected splat");
17188 // vmv.v.x sign extends narrow inputs.
17189 SupportsSExt = true;
17190
17191 // If the input is positive, then sign extend is also zero extend.
17192 if (DAG.SignBitIsZero(Op))
17193 SupportsZExt = true;
17194
17195 EnforceOneUse = false;
17196 return;
17197 }
17198
17199 unsigned NarrowSize = EltBits / 2;
17200 // If the narrow type cannot be expressed with a legal VMV,
17201 // this is not a valid candidate.
17202 if (NarrowSize < 8)
17203 return;
17204
17205 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17206 SupportsSExt = true;
17207
17208 if (DAG.MaskedValueIsZero(Op,
17209 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17210 SupportsZExt = true;
17211
17212 EnforceOneUse = false;
17213 }
17214
17215 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17216 return (NarrowEltVT == MVT::f32 ||
17217 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17218 }
17219
17220 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17221 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17222 }
17223
17224 /// Helper method to set the various fields of this struct based on the
17225 /// type of \p Root.
17226 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17227 const RISCVSubtarget &Subtarget) {
17228 SupportsZExt = false;
17229 SupportsSExt = false;
17230 SupportsFPExt = false;
17231 SupportsBF16Ext = false;
17232 EnforceOneUse = true;
17233 unsigned Opc = OrigOperand.getOpcode();
17234 // For the nodes we handle below, we end up using their inputs directly: see
17235 // getSource(). However since they either don't have a passthru or we check
17236 // that their passthru is undef, we can safely ignore their mask and VL.
17237 switch (Opc) {
17238 case ISD::ZERO_EXTEND:
17239 case ISD::SIGN_EXTEND: {
17240 MVT VT = OrigOperand.getSimpleValueType();
17241 if (!VT.isVector())
17242 break;
17243
17244 SDValue NarrowElt = OrigOperand.getOperand(0);
17245 MVT NarrowVT = NarrowElt.getSimpleValueType();
17246 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17247 if (NarrowVT.getVectorElementType() == MVT::i1)
17248 break;
17249
17250 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17251 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17252 break;
17253 }
17254 case RISCVISD::VZEXT_VL:
17255 SupportsZExt = true;
17256 break;
17257 case RISCVISD::VSEXT_VL:
17258 SupportsSExt = true;
17259 break;
17260 case RISCVISD::FP_EXTEND_VL: {
17261 MVT NarrowEltVT =
17263 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17264 SupportsFPExt = true;
17265 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17266 SupportsBF16Ext = true;
17267
17268 break;
17269 }
17270 case ISD::SPLAT_VECTOR:
17271 case RISCVISD::VMV_V_X_VL:
17272 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17273 break;
17274 case RISCVISD::VFMV_V_F_VL: {
17275 MVT VT = OrigOperand.getSimpleValueType();
17276
17277 if (!OrigOperand.getOperand(0).isUndef())
17278 break;
17279
17280 SDValue Op = OrigOperand.getOperand(1);
17281 if (Op.getOpcode() != ISD::FP_EXTEND)
17282 break;
17283
17284 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17285 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17286 if (NarrowSize != ScalarBits)
17287 break;
17288
17289 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17290 SupportsFPExt = true;
17291 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17292 Subtarget))
17293 SupportsBF16Ext = true;
17294 break;
17295 }
17296 default:
17297 break;
17298 }
17299 }
17300
17301 /// Check if \p Root supports any extension folding combines.
17302 static bool isSupportedRoot(const SDNode *Root,
17303 const RISCVSubtarget &Subtarget) {
17304 switch (Root->getOpcode()) {
17305 case ISD::ADD:
17306 case ISD::SUB:
17307 case ISD::MUL: {
17308 return Root->getValueType(0).isScalableVector();
17309 }
17310 case ISD::OR: {
17311 return Root->getValueType(0).isScalableVector() &&
17312 Root->getFlags().hasDisjoint();
17313 }
17314 // Vector Widening Integer Add/Sub/Mul Instructions
17315 case RISCVISD::ADD_VL:
17316 case RISCVISD::MUL_VL:
17317 case RISCVISD::VWADD_W_VL:
17318 case RISCVISD::VWADDU_W_VL:
17319 case RISCVISD::SUB_VL:
17320 case RISCVISD::VWSUB_W_VL:
17321 case RISCVISD::VWSUBU_W_VL:
17322 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17323 case RISCVISD::FADD_VL:
17324 case RISCVISD::FSUB_VL:
17325 case RISCVISD::FMUL_VL:
17326 case RISCVISD::VFWADD_W_VL:
17327 case RISCVISD::VFWSUB_W_VL:
17328 return true;
17329 case RISCVISD::OR_VL:
17330 return Root->getFlags().hasDisjoint();
17331 case ISD::SHL:
17332 return Root->getValueType(0).isScalableVector() &&
17333 Subtarget.hasStdExtZvbb();
17334 case RISCVISD::SHL_VL:
17335 return Subtarget.hasStdExtZvbb();
17336 case RISCVISD::VFMADD_VL:
17337 case RISCVISD::VFNMSUB_VL:
17338 case RISCVISD::VFNMADD_VL:
17339 case RISCVISD::VFMSUB_VL:
17340 return true;
17341 default:
17342 return false;
17343 }
17344 }
17345
17346 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17347 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17348 const RISCVSubtarget &Subtarget) {
17349 assert(isSupportedRoot(Root, Subtarget) &&
17350 "Trying to build an helper with an "
17351 "unsupported root");
17352 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17354 OrigOperand = Root->getOperand(OperandIdx);
17355
17356 unsigned Opc = Root->getOpcode();
17357 switch (Opc) {
17358 // We consider
17359 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17360 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17361 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17362 case RISCVISD::VWADD_W_VL:
17363 case RISCVISD::VWADDU_W_VL:
17364 case RISCVISD::VWSUB_W_VL:
17365 case RISCVISD::VWSUBU_W_VL:
17366 case RISCVISD::VFWADD_W_VL:
17367 case RISCVISD::VFWSUB_W_VL:
17368 // Operand 1 can't be changed.
17369 if (OperandIdx == 1)
17370 break;
17371 [[fallthrough]];
17372 default:
17373 fillUpExtensionSupport(Root, DAG, Subtarget);
17374 break;
17375 }
17376 }
17377
17378 /// Helper function to get the Mask and VL from \p Root.
17379 static std::pair<SDValue, SDValue>
17380 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17381 const RISCVSubtarget &Subtarget) {
17382 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17383 switch (Root->getOpcode()) {
17384 case ISD::ADD:
17385 case ISD::SUB:
17386 case ISD::MUL:
17387 case ISD::OR:
17388 case ISD::SHL: {
17389 SDLoc DL(Root);
17390 MVT VT = Root->getSimpleValueType(0);
17391 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17392 }
17393 default:
17394 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17395 }
17396 }
17397
17398 /// Helper function to check if \p N is commutative with respect to the
17399 /// foldings that are supported by this class.
17400 static bool isCommutative(const SDNode *N) {
17401 switch (N->getOpcode()) {
17402 case ISD::ADD:
17403 case ISD::MUL:
17404 case ISD::OR:
17405 case RISCVISD::ADD_VL:
17406 case RISCVISD::MUL_VL:
17407 case RISCVISD::OR_VL:
17408 case RISCVISD::FADD_VL:
17409 case RISCVISD::FMUL_VL:
17410 case RISCVISD::VFMADD_VL:
17411 case RISCVISD::VFNMSUB_VL:
17412 case RISCVISD::VFNMADD_VL:
17413 case RISCVISD::VFMSUB_VL:
17414 return true;
17415 case RISCVISD::VWADD_W_VL:
17416 case RISCVISD::VWADDU_W_VL:
17417 case ISD::SUB:
17418 case RISCVISD::SUB_VL:
17419 case RISCVISD::VWSUB_W_VL:
17420 case RISCVISD::VWSUBU_W_VL:
17421 case RISCVISD::VFWADD_W_VL:
17422 case RISCVISD::FSUB_VL:
17423 case RISCVISD::VFWSUB_W_VL:
17424 case ISD::SHL:
17425 case RISCVISD::SHL_VL:
17426 return false;
17427 default:
17428 llvm_unreachable("Unexpected opcode");
17429 }
17430 }
17431
17432 /// Get a list of combine to try for folding extensions in \p Root.
17433 /// Note that each returned CombineToTry function doesn't actually modify
17434 /// anything. Instead they produce an optional CombineResult that if not None,
17435 /// need to be materialized for the combine to be applied.
17436 /// \see CombineResult::materialize.
17437 /// If the related CombineToTry function returns std::nullopt, that means the
17438 /// combine didn't match.
17439 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17440};
17441
17442/// Helper structure that holds all the necessary information to materialize a
17443/// combine that does some extension folding.
17444struct CombineResult {
17445 /// Opcode to be generated when materializing the combine.
17446 unsigned TargetOpcode;
17447 // No value means no extension is needed.
17448 std::optional<ExtKind> LHSExt;
17449 std::optional<ExtKind> RHSExt;
17450 /// Root of the combine.
17451 SDNode *Root;
17452 /// LHS of the TargetOpcode.
17453 NodeExtensionHelper LHS;
17454 /// RHS of the TargetOpcode.
17455 NodeExtensionHelper RHS;
17456
17457 CombineResult(unsigned TargetOpcode, SDNode *Root,
17458 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17459 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17460 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17461 LHS(LHS), RHS(RHS) {}
17462
17463 /// Return a value that uses TargetOpcode and that can be used to replace
17464 /// Root.
17465 /// The actual replacement is *not* done in that method.
17466 SDValue materialize(SelectionDAG &DAG,
17467 const RISCVSubtarget &Subtarget) const {
17468 SDValue Mask, VL, Passthru;
17469 std::tie(Mask, VL) =
17470 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17471 switch (Root->getOpcode()) {
17472 default:
17473 Passthru = Root->getOperand(2);
17474 break;
17475 case ISD::ADD:
17476 case ISD::SUB:
17477 case ISD::MUL:
17478 case ISD::OR:
17479 case ISD::SHL:
17480 Passthru = DAG.getUNDEF(Root->getValueType(0));
17481 break;
17482 }
17483 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17484 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17485 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17486 Passthru, Mask, VL);
17487 }
17488};
17489
17490/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17491/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17492/// are zext) and LHS and RHS can be folded into Root.
17493/// AllowExtMask define which form `ext` can take in this pattern.
17494///
17495/// \note If the pattern can match with both zext and sext, the returned
17496/// CombineResult will feature the zext result.
17497///
17498/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17499/// can be used to apply the pattern.
17500static std::optional<CombineResult>
17501canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17502 const NodeExtensionHelper &RHS,
17503 uint8_t AllowExtMask, SelectionDAG &DAG,
17504 const RISCVSubtarget &Subtarget) {
17505 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17506 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17507 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17508 /*RHSExt=*/{ExtKind::ZExt});
17509 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17510 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17511 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17512 /*RHSExt=*/{ExtKind::SExt});
17513 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17514 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17515 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17516 /*RHSExt=*/{ExtKind::FPExt});
17517 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17518 RHS.SupportsBF16Ext)
17519 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17520 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17521 /*RHSExt=*/{ExtKind::BF16Ext});
17522 return std::nullopt;
17523}
17524
17525/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17526/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17527/// are zext) and LHS and RHS can be folded into Root.
17528///
17529/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17530/// can be used to apply the pattern.
17531static std::optional<CombineResult>
17532canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17533 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17534 const RISCVSubtarget &Subtarget) {
17535 return canFoldToVWWithSameExtensionImpl(
17536 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17537 Subtarget);
17538}
17539
17540/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17541///
17542/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17543/// can be used to apply the pattern.
17544static std::optional<CombineResult>
17545canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17546 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17547 const RISCVSubtarget &Subtarget) {
17548 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17549 Subtarget);
17550}
17551
17552/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17553///
17554/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17555/// can be used to apply the pattern.
17556static std::optional<CombineResult>
17557canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
17558 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17559 const RISCVSubtarget &Subtarget) {
17560 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17561 Subtarget);
17562}
17563
17564/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17565///
17566/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17567/// can be used to apply the pattern.
17568static std::optional<CombineResult>
17569canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17570 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17571 const RISCVSubtarget &Subtarget) {
17572 if (RHS.SupportsFPExt)
17573 return CombineResult(
17574 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17575 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17576
17577 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17578 // sext/zext?
17579 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17580 // purposes.
17581 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17582 return CombineResult(
17583 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17584 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17585 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17586 return CombineResult(
17587 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17588 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17589 return std::nullopt;
17590}
17591
17592/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
17593///
17594/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17595/// can be used to apply the pattern.
17596static std::optional<CombineResult>
17597canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17598 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17599 const RISCVSubtarget &Subtarget) {
17600 if (LHS.SupportsSExt)
17601 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17602 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17603 /*RHSExt=*/std::nullopt);
17604 return std::nullopt;
17605}
17606
17607/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
17608///
17609/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17610/// can be used to apply the pattern.
17611static std::optional<CombineResult>
17612canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17613 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17614 const RISCVSubtarget &Subtarget) {
17615 if (LHS.SupportsZExt)
17616 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17617 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17618 /*RHSExt=*/std::nullopt);
17619 return std::nullopt;
17620}
17621
17622/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
17623///
17624/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17625/// can be used to apply the pattern.
17626static std::optional<CombineResult>
17627canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17628 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17629 const RISCVSubtarget &Subtarget) {
17630 if (LHS.SupportsFPExt)
17631 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17632 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17633 /*RHSExt=*/std::nullopt);
17634 return std::nullopt;
17635}
17636
17637/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17638///
17639/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17640/// can be used to apply the pattern.
17641static std::optional<CombineResult>
17642canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17643 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17644 const RISCVSubtarget &Subtarget) {
17645
17646 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17647 return std::nullopt;
17648 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17649 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17650 /*RHSExt=*/{ExtKind::ZExt});
17651}
17652
17654NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17655 SmallVector<CombineToTry> Strategies;
17656 switch (Root->getOpcode()) {
17657 case ISD::ADD:
17658 case ISD::SUB:
17659 case ISD::OR:
17660 case RISCVISD::ADD_VL:
17661 case RISCVISD::SUB_VL:
17662 case RISCVISD::OR_VL:
17663 case RISCVISD::FADD_VL:
17664 case RISCVISD::FSUB_VL:
17665 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17666 Strategies.push_back(canFoldToVWWithSameExtension);
17667 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17668 Strategies.push_back(canFoldToVW_W);
17669 break;
17670 case RISCVISD::FMUL_VL:
17671 case RISCVISD::VFMADD_VL:
17672 case RISCVISD::VFMSUB_VL:
17673 case RISCVISD::VFNMADD_VL:
17674 case RISCVISD::VFNMSUB_VL:
17675 Strategies.push_back(canFoldToVWWithSameExtension);
17676 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17677 Strategies.push_back(canFoldToVWWithSameExtBF16);
17678 break;
17679 case ISD::MUL:
17680 case RISCVISD::MUL_VL:
17681 // mul -> vwmul(u)
17682 Strategies.push_back(canFoldToVWWithSameExtension);
17683 // mul -> vwmulsu
17684 Strategies.push_back(canFoldToVW_SU);
17685 break;
17686 case ISD::SHL:
17687 case RISCVISD::SHL_VL:
17688 // shl -> vwsll
17689 Strategies.push_back(canFoldToVWWithSameExtZEXT);
17690 break;
17691 case RISCVISD::VWADD_W_VL:
17692 case RISCVISD::VWSUB_W_VL:
17693 // vwadd_w|vwsub_w -> vwadd|vwsub
17694 Strategies.push_back(canFoldToVWWithSEXT);
17695 break;
17696 case RISCVISD::VWADDU_W_VL:
17697 case RISCVISD::VWSUBU_W_VL:
17698 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17699 Strategies.push_back(canFoldToVWWithZEXT);
17700 break;
17701 case RISCVISD::VFWADD_W_VL:
17702 case RISCVISD::VFWSUB_W_VL:
17703 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17704 Strategies.push_back(canFoldToVWWithFPEXT);
17705 break;
17706 default:
17707 llvm_unreachable("Unexpected opcode");
17708 }
17709 return Strategies;
17710}
17711} // End anonymous namespace.
17712
17714 // TODO: Extend this to other binops using generic identity logic
17715 assert(N->getOpcode() == RISCVISD::ADD_VL);
17716 SDValue A = N->getOperand(0);
17717 SDValue B = N->getOperand(1);
17718 SDValue Passthru = N->getOperand(2);
17719 if (!Passthru.isUndef())
17720 // TODO:This could be a vmerge instead
17721 return SDValue();
17722 ;
17724 return A;
17725 // Peek through fixed to scalable
17726 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17727 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17728 return A;
17729 return SDValue();
17730}
17731
17732/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17733/// The supported combines are:
17734/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17735/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17736/// mul | mul_vl -> vwmul(u) | vwmul_su
17737/// shl | shl_vl -> vwsll
17738/// fadd_vl -> vfwadd | vfwadd_w
17739/// fsub_vl -> vfwsub | vfwsub_w
17740/// fmul_vl -> vfwmul
17741/// vwadd_w(u) -> vwadd(u)
17742/// vwsub_w(u) -> vwsub(u)
17743/// vfwadd_w -> vfwadd
17744/// vfwsub_w -> vfwsub
17747 const RISCVSubtarget &Subtarget) {
17748 SelectionDAG &DAG = DCI.DAG;
17749 if (DCI.isBeforeLegalize())
17750 return SDValue();
17751
17752 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17753 return SDValue();
17754
17755 SmallVector<SDNode *> Worklist;
17756 SmallPtrSet<SDNode *, 8> Inserted;
17757 Worklist.push_back(N);
17758 Inserted.insert(N);
17759 SmallVector<CombineResult> CombinesToApply;
17760
17761 while (!Worklist.empty()) {
17762 SDNode *Root = Worklist.pop_back_val();
17763
17764 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17765 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17766 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17767 &Inserted](const NodeExtensionHelper &Op) {
17768 if (Op.needToPromoteOtherUsers()) {
17769 for (SDUse &Use : Op.OrigOperand->uses()) {
17770 SDNode *TheUser = Use.getUser();
17771 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17772 return false;
17773 // We only support the first 2 operands of FMA.
17774 if (Use.getOperandNo() >= 2)
17775 return false;
17776 if (Inserted.insert(TheUser).second)
17777 Worklist.push_back(TheUser);
17778 }
17779 }
17780 return true;
17781 };
17782
17783 // Control the compile time by limiting the number of node we look at in
17784 // total.
17785 if (Inserted.size() > ExtensionMaxWebSize)
17786 return SDValue();
17787
17789 NodeExtensionHelper::getSupportedFoldings(Root);
17790
17791 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17792 bool Matched = false;
17793 for (int Attempt = 0;
17794 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17795 ++Attempt) {
17796
17797 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17798 FoldingStrategies) {
17799 std::optional<CombineResult> Res =
17800 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17801 if (Res) {
17802 Matched = true;
17803 CombinesToApply.push_back(*Res);
17804 // All the inputs that are extended need to be folded, otherwise
17805 // we would be leaving the old input (since it is may still be used),
17806 // and the new one.
17807 if (Res->LHSExt.has_value())
17808 if (!AppendUsersIfNeeded(LHS))
17809 return SDValue();
17810 if (Res->RHSExt.has_value())
17811 if (!AppendUsersIfNeeded(RHS))
17812 return SDValue();
17813 break;
17814 }
17815 }
17816 std::swap(LHS, RHS);
17817 }
17818 // Right now we do an all or nothing approach.
17819 if (!Matched)
17820 return SDValue();
17821 }
17822 // Store the value for the replacement of the input node separately.
17823 SDValue InputRootReplacement;
17824 // We do the RAUW after we materialize all the combines, because some replaced
17825 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17826 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17827 // yet-to-be-visited CombinesToApply roots.
17829 ValuesToReplace.reserve(CombinesToApply.size());
17830 for (CombineResult Res : CombinesToApply) {
17831 SDValue NewValue = Res.materialize(DAG, Subtarget);
17832 if (!InputRootReplacement) {
17833 assert(Res.Root == N &&
17834 "First element is expected to be the current node");
17835 InputRootReplacement = NewValue;
17836 } else {
17837 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17838 }
17839 }
17840 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17841 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17842 DCI.AddToWorklist(OldNewValues.second.getNode());
17843 }
17844 return InputRootReplacement;
17845}
17846
17847// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17848// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17849// y will be the Passthru and cond will be the Mask.
17851 unsigned Opc = N->getOpcode();
17852 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17853 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17854
17855 SDValue Y = N->getOperand(0);
17856 SDValue MergeOp = N->getOperand(1);
17857 unsigned MergeOpc = MergeOp.getOpcode();
17858
17859 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17860 return SDValue();
17861
17862 SDValue X = MergeOp->getOperand(1);
17863
17864 if (!MergeOp.hasOneUse())
17865 return SDValue();
17866
17867 // Passthru should be undef
17868 SDValue Passthru = N->getOperand(2);
17869 if (!Passthru.isUndef())
17870 return SDValue();
17871
17872 // Mask should be all ones
17873 SDValue Mask = N->getOperand(3);
17874 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17875 return SDValue();
17876
17877 // False value of MergeOp should be all zeros
17878 SDValue Z = MergeOp->getOperand(2);
17879
17880 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17881 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17882 Z = Z.getOperand(1);
17883
17884 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17885 return SDValue();
17886
17887 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17888 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17889 N->getFlags());
17890}
17891
17894 const RISCVSubtarget &Subtarget) {
17895 [[maybe_unused]] unsigned Opc = N->getOpcode();
17896 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17897 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17898
17899 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17900 return V;
17901
17902 return combineVWADDSUBWSelect(N, DCI.DAG);
17903}
17904
17905// Helper function for performMemPairCombine.
17906// Try to combine the memory loads/stores LSNode1 and LSNode2
17907// into a single memory pair operation.
17909 LSBaseSDNode *LSNode2, SDValue BasePtr,
17910 uint64_t Imm) {
17912 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17913
17914 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17915 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17916 return SDValue();
17917
17919 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17920
17921 // The new operation has twice the width.
17922 MVT XLenVT = Subtarget.getXLenVT();
17923 EVT MemVT = LSNode1->getMemoryVT();
17924 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17925 MachineMemOperand *MMO = LSNode1->getMemOperand();
17927 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17928
17929 if (LSNode1->getOpcode() == ISD::LOAD) {
17930 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17931 unsigned Opcode;
17932 if (MemVT == MVT::i32)
17933 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17934 else
17935 Opcode = RISCVISD::TH_LDD;
17936
17937 SDValue Res = DAG.getMemIntrinsicNode(
17938 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17939 {LSNode1->getChain(), BasePtr,
17940 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17941 NewMemVT, NewMMO);
17942
17943 SDValue Node1 =
17944 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17945 SDValue Node2 =
17946 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17947
17948 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17949 return Node1;
17950 } else {
17951 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17952
17953 SDValue Res = DAG.getMemIntrinsicNode(
17954 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17955 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17956 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17957 NewMemVT, NewMMO);
17958
17959 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17960 return Res;
17961 }
17962}
17963
17964// Try to combine two adjacent loads/stores to a single pair instruction from
17965// the XTHeadMemPair vendor extension.
17968 SelectionDAG &DAG = DCI.DAG;
17970 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17971
17972 // Target does not support load/store pair.
17973 if (!Subtarget.hasVendorXTHeadMemPair())
17974 return SDValue();
17975
17976 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17977 EVT MemVT = LSNode1->getMemoryVT();
17978 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17979
17980 // No volatile, indexed or atomic loads/stores.
17981 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17982 return SDValue();
17983
17984 // Function to get a base + constant representation from a memory value.
17985 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17986 if (Ptr->getOpcode() == ISD::ADD)
17987 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17988 return {Ptr->getOperand(0), C1->getZExtValue()};
17989 return {Ptr, 0};
17990 };
17991
17992 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17993
17994 SDValue Chain = N->getOperand(0);
17995 for (SDUse &Use : Chain->uses()) {
17996 if (Use.getUser() != N && Use.getResNo() == 0 &&
17997 Use.getUser()->getOpcode() == N->getOpcode()) {
17999
18000 // No volatile, indexed or atomic loads/stores.
18001 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18002 continue;
18003
18004 // Check if LSNode1 and LSNode2 have the same type and extension.
18005 if (LSNode1->getOpcode() == ISD::LOAD)
18006 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18008 continue;
18009
18010 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18011 continue;
18012
18013 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18014
18015 // Check if the base pointer is the same for both instruction.
18016 if (Base1 != Base2)
18017 continue;
18018
18019 // Check if the offsets match the XTHeadMemPair encoding constraints.
18020 bool Valid = false;
18021 if (MemVT == MVT::i32) {
18022 // Check for adjacent i32 values and a 2-bit index.
18023 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18024 Valid = true;
18025 } else if (MemVT == MVT::i64) {
18026 // Check for adjacent i64 values and a 2-bit index.
18027 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18028 Valid = true;
18029 }
18030
18031 if (!Valid)
18032 continue;
18033
18034 // Try to combine.
18035 if (SDValue Res =
18036 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18037 return Res;
18038 }
18039 }
18040
18041 return SDValue();
18042}
18043
18044// Fold
18045// (fp_to_int (froundeven X)) -> fcvt X, rne
18046// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18047// (fp_to_int (ffloor X)) -> fcvt X, rdn
18048// (fp_to_int (fceil X)) -> fcvt X, rup
18049// (fp_to_int (fround X)) -> fcvt X, rmm
18050// (fp_to_int (frint X)) -> fcvt X
18053 const RISCVSubtarget &Subtarget) {
18054 SelectionDAG &DAG = DCI.DAG;
18055 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18056 MVT XLenVT = Subtarget.getXLenVT();
18057
18058 SDValue Src = N->getOperand(0);
18059
18060 // Don't do this for strict-fp Src.
18061 if (Src->isStrictFPOpcode())
18062 return SDValue();
18063
18064 // Ensure the FP type is legal.
18065 if (!TLI.isTypeLegal(Src.getValueType()))
18066 return SDValue();
18067
18068 // Don't do this for f16 with Zfhmin and not Zfh.
18069 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18070 return SDValue();
18071
18072 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18073 // If the result is invalid, we didn't find a foldable instruction.
18074 if (FRM == RISCVFPRndMode::Invalid)
18075 return SDValue();
18076
18077 SDLoc DL(N);
18078 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18079 EVT VT = N->getValueType(0);
18080
18081 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18082 MVT SrcVT = Src.getSimpleValueType();
18083 MVT SrcContainerVT = SrcVT;
18084 MVT ContainerVT = VT.getSimpleVT();
18085 SDValue XVal = Src.getOperand(0);
18086
18087 // For widening and narrowing conversions we just combine it into a
18088 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18089 // end up getting lowered to their appropriate pseudo instructions based on
18090 // their operand types
18091 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18092 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18093 return SDValue();
18094
18095 // Make fixed-length vectors scalable first
18096 if (SrcVT.isFixedLengthVector()) {
18097 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18098 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18099 ContainerVT =
18100 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18101 }
18102
18103 auto [Mask, VL] =
18104 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18105
18106 SDValue FpToInt;
18107 if (FRM == RISCVFPRndMode::RTZ) {
18108 // Use the dedicated trunc static rounding mode if we're truncating so we
18109 // don't need to generate calls to fsrmi/fsrm
18110 unsigned Opc =
18111 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18112 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18113 } else {
18114 unsigned Opc =
18115 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18116 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18117 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18118 }
18119
18120 // If converted from fixed-length to scalable, convert back
18121 if (VT.isFixedLengthVector())
18122 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18123
18124 return FpToInt;
18125 }
18126
18127 // Only handle XLen or i32 types. Other types narrower than XLen will
18128 // eventually be legalized to XLenVT.
18129 if (VT != MVT::i32 && VT != XLenVT)
18130 return SDValue();
18131
18132 unsigned Opc;
18133 if (VT == XLenVT)
18134 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18135 else
18136 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18137
18138 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18139 DAG.getTargetConstant(FRM, DL, XLenVT));
18140 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18141}
18142
18143// Fold
18144// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18145// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18146// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18147// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18148// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18149// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18152 const RISCVSubtarget &Subtarget) {
18153 SelectionDAG &DAG = DCI.DAG;
18154 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18155 MVT XLenVT = Subtarget.getXLenVT();
18156
18157 // Only handle XLen types. Other types narrower than XLen will eventually be
18158 // legalized to XLenVT.
18159 EVT DstVT = N->getValueType(0);
18160 if (DstVT != XLenVT)
18161 return SDValue();
18162
18163 SDValue Src = N->getOperand(0);
18164
18165 // Don't do this for strict-fp Src.
18166 if (Src->isStrictFPOpcode())
18167 return SDValue();
18168
18169 // Ensure the FP type is also legal.
18170 if (!TLI.isTypeLegal(Src.getValueType()))
18171 return SDValue();
18172
18173 // Don't do this for f16 with Zfhmin and not Zfh.
18174 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18175 return SDValue();
18176
18177 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18178
18179 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18180 if (FRM == RISCVFPRndMode::Invalid)
18181 return SDValue();
18182
18183 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18184
18185 unsigned Opc;
18186 if (SatVT == DstVT)
18187 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18188 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18189 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18190 else
18191 return SDValue();
18192 // FIXME: Support other SatVTs by clamping before or after the conversion.
18193
18194 Src = Src.getOperand(0);
18195
18196 SDLoc DL(N);
18197 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18198 DAG.getTargetConstant(FRM, DL, XLenVT));
18199
18200 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18201 // extend.
18202 if (Opc == RISCVISD::FCVT_WU_RV64)
18203 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18204
18205 // RISC-V FP-to-int conversions saturate to the destination register size, but
18206 // don't produce 0 for nan.
18207 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18208 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18209}
18210
18211// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18212// smaller than XLenVT.
18214 const RISCVSubtarget &Subtarget) {
18215 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18216
18217 SDValue Src = N->getOperand(0);
18218 if (Src.getOpcode() != ISD::BSWAP)
18219 return SDValue();
18220
18221 EVT VT = N->getValueType(0);
18222 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18224 return SDValue();
18225
18226 SDLoc DL(N);
18227 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18228}
18229
18231 const RISCVSubtarget &Subtarget) {
18232 // Fold:
18233 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18234
18235 // Check if its first operand is a vp.load.
18236 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18237 if (!VPLoad)
18238 return SDValue();
18239
18240 EVT LoadVT = VPLoad->getValueType(0);
18241 // We do not have a strided_load version for masks, and the evl of vp.reverse
18242 // and vp.load should always be the same.
18243 if (!LoadVT.getVectorElementType().isByteSized() ||
18244 N->getOperand(2) != VPLoad->getVectorLength() ||
18245 !N->getOperand(0).hasOneUse())
18246 return SDValue();
18247
18248 // Check if the mask of outer vp.reverse are all 1's.
18249 if (!isOneOrOneSplat(N->getOperand(1)))
18250 return SDValue();
18251
18252 SDValue LoadMask = VPLoad->getMask();
18253 // If Mask is all ones, then load is unmasked and can be reversed.
18254 if (!isOneOrOneSplat(LoadMask)) {
18255 // If the mask is not all ones, we can reverse the load if the mask was also
18256 // reversed by an unmasked vp.reverse with the same EVL.
18257 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18258 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18259 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18260 return SDValue();
18261 LoadMask = LoadMask.getOperand(0);
18262 }
18263
18264 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18265 SDLoc DL(N);
18266 MVT XLenVT = Subtarget.getXLenVT();
18267 SDValue NumElem = VPLoad->getVectorLength();
18268 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18269
18270 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18271 DAG.getConstant(1, DL, XLenVT));
18272 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18273 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18274 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18275 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18276
18278 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18280 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18281 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18282
18283 SDValue Ret = DAG.getStridedLoadVP(
18284 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18285 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18286
18287 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18288
18289 return Ret;
18290}
18291
18293 const RISCVSubtarget &Subtarget) {
18294 // Fold:
18295 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18296 // -1, MASK)
18297 auto *VPStore = cast<VPStoreSDNode>(N);
18298
18299 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18300 return SDValue();
18301
18302 SDValue VPReverse = VPStore->getValue();
18303 EVT ReverseVT = VPReverse->getValueType(0);
18304
18305 // We do not have a strided_store version for masks, and the evl of vp.reverse
18306 // and vp.store should always be the same.
18307 if (!ReverseVT.getVectorElementType().isByteSized() ||
18308 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18309 !VPReverse.hasOneUse())
18310 return SDValue();
18311
18312 SDValue StoreMask = VPStore->getMask();
18313 // If Mask is all ones, then load is unmasked and can be reversed.
18314 if (!isOneOrOneSplat(StoreMask)) {
18315 // If the mask is not all ones, we can reverse the store if the mask was
18316 // also reversed by an unmasked vp.reverse with the same EVL.
18317 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18318 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18319 StoreMask.getOperand(2) != VPStore->getVectorLength())
18320 return SDValue();
18321 StoreMask = StoreMask.getOperand(0);
18322 }
18323
18324 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18325 SDLoc DL(N);
18326 MVT XLenVT = Subtarget.getXLenVT();
18327 SDValue NumElem = VPStore->getVectorLength();
18328 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18329
18330 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18331 DAG.getConstant(1, DL, XLenVT));
18332 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18333 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18334 SDValue Base =
18335 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18336 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18337
18339 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18341 PtrInfo, VPStore->getMemOperand()->getFlags(),
18342 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18343
18344 return DAG.getStridedStoreVP(
18345 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18346 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18347 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18348 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18349}
18350
18351// Peephole avgceil pattern.
18352// %1 = zext <N x i8> %a to <N x i32>
18353// %2 = zext <N x i8> %b to <N x i32>
18354// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18355// %4 = add nuw nsw <N x i32> %3, %2
18356// %5 = lshr <N x i32> %4, splat (i32 1)
18357// %6 = trunc <N x i32> %5 to <N x i8>
18359 const RISCVSubtarget &Subtarget) {
18360 EVT VT = N->getValueType(0);
18361
18362 // Ignore fixed vectors.
18363 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18364 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18365 return SDValue();
18366
18367 SDValue In = N->getOperand(0);
18368 SDValue Mask = N->getOperand(1);
18369 SDValue VL = N->getOperand(2);
18370
18371 // Input should be a vp_srl with same mask and VL.
18372 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18373 In.getOperand(3) != VL)
18374 return SDValue();
18375
18376 // Shift amount should be 1.
18377 if (!isOneOrOneSplat(In.getOperand(1)))
18378 return SDValue();
18379
18380 // Shifted value should be a vp_add with same mask and VL.
18381 SDValue LHS = In.getOperand(0);
18382 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18383 LHS.getOperand(3) != VL)
18384 return SDValue();
18385
18386 SDValue Operands[3];
18387
18388 // Matches another VP_ADD with same VL and Mask.
18389 auto FindAdd = [&](SDValue V, SDValue Other) {
18390 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18391 V.getOperand(3) != VL)
18392 return false;
18393
18394 Operands[0] = Other;
18395 Operands[1] = V.getOperand(1);
18396 Operands[2] = V.getOperand(0);
18397 return true;
18398 };
18399
18400 // We need to find another VP_ADD in one of the operands.
18401 SDValue LHS0 = LHS.getOperand(0);
18402 SDValue LHS1 = LHS.getOperand(1);
18403 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18404 return SDValue();
18405
18406 // Now we have three operands of two additions. Check that one of them is a
18407 // constant vector with ones.
18408 auto I = llvm::find_if(Operands,
18409 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18410 if (I == std::end(Operands))
18411 return SDValue();
18412 // We found a vector with ones, move if it to the end of the Operands array.
18413 std::swap(*I, Operands[2]);
18414
18415 // Make sure the other 2 operands can be promoted from the result type.
18416 for (SDValue Op : drop_end(Operands)) {
18417 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18418 Op.getOperand(2) != VL)
18419 return SDValue();
18420 // Input must be the same size or smaller than our result.
18421 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18422 return SDValue();
18423 }
18424
18425 // Pattern is detected.
18426 // Rebuild the zero extends in case the inputs are smaller than our result.
18427 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18428 Operands[0].getOperand(0), Mask, VL);
18429 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18430 Operands[1].getOperand(0), Mask, VL);
18431 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18432 // mode.
18433 SDLoc DL(N);
18434 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18435 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18436}
18437
18438// Convert from one FMA opcode to another based on whether we are negating the
18439// multiply result and/or the accumulator.
18440// NOTE: Only supports RVV operations with VL.
18441static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18442 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18443 if (NegMul) {
18444 // clang-format off
18445 switch (Opcode) {
18446 default: llvm_unreachable("Unexpected opcode");
18447 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18448 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18449 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18450 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18451 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18452 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18453 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18454 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18455 }
18456 // clang-format on
18457 }
18458
18459 // Negating the accumulator changes ADD<->SUB.
18460 if (NegAcc) {
18461 // clang-format off
18462 switch (Opcode) {
18463 default: llvm_unreachable("Unexpected opcode");
18464 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18465 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18466 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18467 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18468 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18469 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18470 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18471 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18472 }
18473 // clang-format on
18474 }
18475
18476 return Opcode;
18477}
18478
18480 // Fold FNEG_VL into FMA opcodes.
18481 // The first operand of strict-fp is chain.
18482 bool IsStrict =
18483 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18484 unsigned Offset = IsStrict ? 1 : 0;
18485 SDValue A = N->getOperand(0 + Offset);
18486 SDValue B = N->getOperand(1 + Offset);
18487 SDValue C = N->getOperand(2 + Offset);
18488 SDValue Mask = N->getOperand(3 + Offset);
18489 SDValue VL = N->getOperand(4 + Offset);
18490
18491 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18492 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18493 V.getOperand(2) == VL) {
18494 // Return the negated input.
18495 V = V.getOperand(0);
18496 return true;
18497 }
18498
18499 return false;
18500 };
18501
18502 bool NegA = invertIfNegative(A);
18503 bool NegB = invertIfNegative(B);
18504 bool NegC = invertIfNegative(C);
18505
18506 // If no operands are negated, we're done.
18507 if (!NegA && !NegB && !NegC)
18508 return SDValue();
18509
18510 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18511 if (IsStrict)
18512 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18513 {N->getOperand(0), A, B, C, Mask, VL});
18514 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18515 VL);
18516}
18517
18520 const RISCVSubtarget &Subtarget) {
18521 SelectionDAG &DAG = DCI.DAG;
18522
18524 return V;
18525
18526 // FIXME: Ignore strict opcodes for now.
18527 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18528 return SDValue();
18529
18530 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18531}
18532
18534 const RISCVSubtarget &Subtarget) {
18535 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18536
18537 EVT VT = N->getValueType(0);
18538
18539 if (VT != Subtarget.getXLenVT())
18540 return SDValue();
18541
18542 if (!isa<ConstantSDNode>(N->getOperand(1)))
18543 return SDValue();
18544 uint64_t ShAmt = N->getConstantOperandVal(1);
18545
18546 SDValue N0 = N->getOperand(0);
18547
18548 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18549 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18550 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18551 unsigned ExtSize =
18552 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18553 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18554 N0.getOperand(0).hasOneUse() &&
18556 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18557 if (LShAmt < ExtSize) {
18558 unsigned Size = VT.getSizeInBits();
18559 SDLoc ShlDL(N0.getOperand(0));
18560 SDValue Shl =
18561 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18562 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18563 SDLoc DL(N);
18564 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18565 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18566 }
18567 }
18568 }
18569
18570 if (ShAmt > 32 || VT != MVT::i64)
18571 return SDValue();
18572
18573 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18574 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18575 //
18576 // Also try these folds where an add or sub is in the middle.
18577 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18578 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18579 SDValue Shl;
18580 ConstantSDNode *AddC = nullptr;
18581
18582 // We might have an ADD or SUB between the SRA and SHL.
18583 bool IsAdd = N0.getOpcode() == ISD::ADD;
18584 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18585 // Other operand needs to be a constant we can modify.
18586 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18587 if (!AddC)
18588 return SDValue();
18589
18590 // AddC needs to have at least 32 trailing zeros.
18591 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18592 return SDValue();
18593
18594 // All users should be a shift by constant less than or equal to 32. This
18595 // ensures we'll do this optimization for each of them to produce an
18596 // add/sub+sext_inreg they can all share.
18597 for (SDNode *U : N0->users()) {
18598 if (U->getOpcode() != ISD::SRA ||
18599 !isa<ConstantSDNode>(U->getOperand(1)) ||
18600 U->getConstantOperandVal(1) > 32)
18601 return SDValue();
18602 }
18603
18604 Shl = N0.getOperand(IsAdd ? 0 : 1);
18605 } else {
18606 // Not an ADD or SUB.
18607 Shl = N0;
18608 }
18609
18610 // Look for a shift left by 32.
18611 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18612 Shl.getConstantOperandVal(1) != 32)
18613 return SDValue();
18614
18615 // We if we didn't look through an add/sub, then the shl should have one use.
18616 // If we did look through an add/sub, the sext_inreg we create is free so
18617 // we're only creating 2 new instructions. It's enough to only remove the
18618 // original sra+add/sub.
18619 if (!AddC && !Shl.hasOneUse())
18620 return SDValue();
18621
18622 SDLoc DL(N);
18623 SDValue In = Shl.getOperand(0);
18624
18625 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18626 // constant.
18627 if (AddC) {
18628 SDValue ShiftedAddC =
18629 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18630 if (IsAdd)
18631 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18632 else
18633 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18634 }
18635
18636 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18637 DAG.getValueType(MVT::i32));
18638 if (ShAmt == 32)
18639 return SExt;
18640
18641 return DAG.getNode(
18642 ISD::SHL, DL, MVT::i64, SExt,
18643 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18644}
18645
18646// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18647// the result is used as the condition of a br_cc or select_cc we can invert,
18648// inverting the setcc is free, and Z is 0/1. Caller will invert the
18649// br_cc/select_cc.
18651 bool IsAnd = Cond.getOpcode() == ISD::AND;
18652 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18653 return SDValue();
18654
18655 if (!Cond.hasOneUse())
18656 return SDValue();
18657
18658 SDValue Setcc = Cond.getOperand(0);
18659 SDValue Xor = Cond.getOperand(1);
18660 // Canonicalize setcc to LHS.
18661 if (Setcc.getOpcode() != ISD::SETCC)
18662 std::swap(Setcc, Xor);
18663 // LHS should be a setcc and RHS should be an xor.
18664 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18665 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18666 return SDValue();
18667
18668 // If the condition is an And, SimplifyDemandedBits may have changed
18669 // (xor Z, 1) to (not Z).
18670 SDValue Xor1 = Xor.getOperand(1);
18671 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18672 return SDValue();
18673
18674 EVT VT = Cond.getValueType();
18675 SDValue Xor0 = Xor.getOperand(0);
18676
18677 // The LHS of the xor needs to be 0/1.
18679 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18680 return SDValue();
18681
18682 // We can only invert integer setccs.
18683 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18684 if (!SetCCOpVT.isScalarInteger())
18685 return SDValue();
18686
18687 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18688 if (ISD::isIntEqualitySetCC(CCVal)) {
18689 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18690 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18691 Setcc.getOperand(1), CCVal);
18692 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18693 // Invert (setlt 0, X) by converting to (setlt X, 1).
18694 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18695 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18696 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18697 // (setlt X, 1) by converting to (setlt 0, X).
18698 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18699 DAG.getConstant(0, SDLoc(Setcc), VT),
18700 Setcc.getOperand(0), CCVal);
18701 } else
18702 return SDValue();
18703
18704 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18705 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18706}
18707
18708// Perform common combines for BR_CC and SELECT_CC conditions.
18709static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18710 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18711 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18712
18713 // As far as arithmetic right shift always saves the sign,
18714 // shift can be omitted.
18715 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18716 // setge (sra X, N), 0 -> setge X, 0
18717 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18718 LHS.getOpcode() == ISD::SRA) {
18719 LHS = LHS.getOperand(0);
18720 return true;
18721 }
18722
18723 if (!ISD::isIntEqualitySetCC(CCVal))
18724 return false;
18725
18726 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18727 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18728 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18729 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18730 // If we're looking for eq 0 instead of ne 0, we need to invert the
18731 // condition.
18732 bool Invert = CCVal == ISD::SETEQ;
18733 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18734 if (Invert)
18735 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18736
18737 RHS = LHS.getOperand(1);
18738 LHS = LHS.getOperand(0);
18739 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18740
18741 CC = DAG.getCondCode(CCVal);
18742 return true;
18743 }
18744
18745 // If XOR is reused and has an immediate that will fit in XORI,
18746 // do not fold.
18747 auto isXorImmediate = [](const SDValue &Op) -> bool {
18748 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18749 return isInt<12>(XorCnst->getSExtValue());
18750 return false;
18751 };
18752 // Fold (X(i1) ^ 1) == 0 -> X != 0
18753 auto singleBitOp = [&DAG](const SDValue &VarOp,
18754 const SDValue &ConstOp) -> bool {
18755 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18756 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18757 return (XorCnst->getSExtValue() == 1) &&
18758 DAG.MaskedValueIsZero(VarOp, Mask);
18759 }
18760 return false;
18761 };
18762 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18763 for (const SDNode *UserNode : Op->users()) {
18764 const unsigned Opcode = UserNode->getOpcode();
18765 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18766 return false;
18767 }
18768 return true;
18769 };
18770 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18771 const SDValue &LHS, const SDValue &RHS) -> bool {
18772 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18773 (!isXorImmediate(LHS.getOperand(1)) ||
18774 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18775 onlyUsedBySelectOrBR(LHS));
18776 };
18777 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18778 if (isFoldableXorEq(LHS, RHS)) {
18779 RHS = LHS.getOperand(1);
18780 LHS = LHS.getOperand(0);
18781 return true;
18782 }
18783 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18784 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18785 const SDValue LHS0 = LHS.getOperand(0);
18786 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18787 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18788 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18789 LHS0.getOperand(1), LHS.getOperand(1));
18790 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18791 LHS0.getOperand(0), LHS.getOperand(1));
18792 return true;
18793 }
18794 }
18795
18796 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18797 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18798 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18799 SDValue LHS0 = LHS.getOperand(0);
18800 if (LHS0.getOpcode() == ISD::AND &&
18801 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18802 uint64_t Mask = LHS0.getConstantOperandVal(1);
18803 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18804 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18805 // XAndesPerf supports branch on test bit.
18806 if (Subtarget.hasVendorXAndesPerf()) {
18807 LHS =
18808 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18809 DAG.getConstant(Mask, DL, LHS.getValueType()));
18810 return true;
18811 }
18812
18813 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18814 CC = DAG.getCondCode(CCVal);
18815
18816 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18817 LHS = LHS0.getOperand(0);
18818 if (ShAmt != 0)
18819 LHS =
18820 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18821 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18822 return true;
18823 }
18824 }
18825 }
18826
18827 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18828 // This can occur when legalizing some floating point comparisons.
18829 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18830 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18831 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18832 CC = DAG.getCondCode(CCVal);
18833 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18834 return true;
18835 }
18836
18837 if (isNullConstant(RHS)) {
18838 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18839 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18840 CC = DAG.getCondCode(CCVal);
18841 LHS = NewCond;
18842 return true;
18843 }
18844 }
18845
18846 return false;
18847}
18848
18849// Fold
18850// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18851// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18852// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18853// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18854// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18855// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18857 SDValue TrueVal, SDValue FalseVal,
18858 bool Swapped) {
18859 bool Commutative = true;
18860 unsigned Opc = TrueVal.getOpcode();
18861 switch (Opc) {
18862 default:
18863 return SDValue();
18864 case ISD::SHL:
18865 case ISD::SRA:
18866 case ISD::SRL:
18867 case ISD::SUB:
18868 case ISD::ROTL:
18869 case ISD::ROTR:
18870 Commutative = false;
18871 break;
18872 case ISD::ADD:
18873 case ISD::OR:
18874 case ISD::XOR:
18875 case ISD::UMIN:
18876 case ISD::UMAX:
18877 break;
18878 }
18879
18880 if (!TrueVal.hasOneUse())
18881 return SDValue();
18882
18883 unsigned OpToFold;
18884 if (FalseVal == TrueVal.getOperand(0))
18885 OpToFold = 0;
18886 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18887 OpToFold = 1;
18888 else
18889 return SDValue();
18890
18891 EVT VT = N->getValueType(0);
18892 SDLoc DL(N);
18893 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18894 EVT OtherOpVT = OtherOp.getValueType();
18895 SDValue IdentityOperand =
18896 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18897 if (!Commutative)
18898 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18899 assert(IdentityOperand && "No identity operand!");
18900
18901 if (Swapped)
18902 std::swap(OtherOp, IdentityOperand);
18903 SDValue NewSel =
18904 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18905 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18906}
18907
18908// This tries to get rid of `select` and `icmp` that are being used to handle
18909// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18911 SDValue Cond = N->getOperand(0);
18912
18913 // This represents either CTTZ or CTLZ instruction.
18914 SDValue CountZeroes;
18915
18916 SDValue ValOnZero;
18917
18918 if (Cond.getOpcode() != ISD::SETCC)
18919 return SDValue();
18920
18921 if (!isNullConstant(Cond->getOperand(1)))
18922 return SDValue();
18923
18924 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18925 if (CCVal == ISD::CondCode::SETEQ) {
18926 CountZeroes = N->getOperand(2);
18927 ValOnZero = N->getOperand(1);
18928 } else if (CCVal == ISD::CondCode::SETNE) {
18929 CountZeroes = N->getOperand(1);
18930 ValOnZero = N->getOperand(2);
18931 } else {
18932 return SDValue();
18933 }
18934
18935 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18936 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18937 CountZeroes = CountZeroes.getOperand(0);
18938
18939 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18940 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18941 CountZeroes.getOpcode() != ISD::CTLZ &&
18942 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18943 return SDValue();
18944
18945 if (!isNullConstant(ValOnZero))
18946 return SDValue();
18947
18948 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18949 if (Cond->getOperand(0) != CountZeroesArgument)
18950 return SDValue();
18951
18952 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18953 if (!isPowerOf2_32(BitWidth))
18954 return SDValue();
18955
18956 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18957 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18958 CountZeroes.getValueType(), CountZeroesArgument);
18959 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18960 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18961 CountZeroes.getValueType(), CountZeroesArgument);
18962 }
18963
18964 SDValue BitWidthMinusOne =
18965 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18966
18967 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18968 CountZeroes, BitWidthMinusOne);
18969 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18970}
18971
18973 const RISCVSubtarget &Subtarget) {
18974 SDValue Cond = N->getOperand(0);
18975 SDValue True = N->getOperand(1);
18976 SDValue False = N->getOperand(2);
18977 SDLoc DL(N);
18978 EVT VT = N->getValueType(0);
18979 EVT CondVT = Cond.getValueType();
18980
18981 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18982 return SDValue();
18983
18984 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18985 // BEXTI, where C is power of 2.
18986 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
18987 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
18988 SDValue LHS = Cond.getOperand(0);
18989 SDValue RHS = Cond.getOperand(1);
18990 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18991 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18992 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18993 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18994 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18995 return DAG.getSelect(DL, VT,
18996 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18997 False, True);
18998 }
18999 }
19000 return SDValue();
19001}
19002
19003static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19004 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19005 return false;
19006
19007 SwapCC = false;
19008 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19009 std::swap(TrueVal, FalseVal);
19010 SwapCC = true;
19011 }
19012
19013 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19014 return false;
19015
19016 SDValue A = FalseVal.getOperand(0);
19017 SDValue B = FalseVal.getOperand(1);
19018 // Add is commutative, so check both orders
19019 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19020 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19021}
19022
19023/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19024/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19025/// register pressure over the add followed by masked vsub sequence.
19027 SDLoc DL(N);
19028 EVT VT = N->getValueType(0);
19029 SDValue CC = N->getOperand(0);
19030 SDValue TrueVal = N->getOperand(1);
19031 SDValue FalseVal = N->getOperand(2);
19032
19033 bool SwapCC;
19034 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19035 return SDValue();
19036
19037 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19038 SDValue A = Sub.getOperand(0);
19039 SDValue B = Sub.getOperand(1);
19040
19041 // Arrange the select such that we can match a masked
19042 // vrsub.vi to perform the conditional negate
19043 SDValue NegB = DAG.getNegative(B, DL, VT);
19044 if (!SwapCC)
19045 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19046 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19047 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19048}
19049
19051 const RISCVSubtarget &Subtarget) {
19052 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19053 return Folded;
19054
19055 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19056 return V;
19057
19058 if (Subtarget.hasConditionalMoveFusion())
19059 return SDValue();
19060
19061 SDValue TrueVal = N->getOperand(1);
19062 SDValue FalseVal = N->getOperand(2);
19063 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19064 return V;
19065 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19066}
19067
19068/// If we have a build_vector where each lane is binop X, C, where C
19069/// is a constant (but not necessarily the same constant on all lanes),
19070/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19071/// We assume that materializing a constant build vector will be no more
19072/// expensive that performing O(n) binops.
19074 const RISCVSubtarget &Subtarget,
19075 const RISCVTargetLowering &TLI) {
19076 SDLoc DL(N);
19077 EVT VT = N->getValueType(0);
19078
19079 assert(!VT.isScalableVector() && "unexpected build vector");
19080
19081 if (VT.getVectorNumElements() == 1)
19082 return SDValue();
19083
19084 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19085 if (!TLI.isBinOp(Opcode))
19086 return SDValue();
19087
19088 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19089 return SDValue();
19090
19091 // This BUILD_VECTOR involves an implicit truncation, and sinking
19092 // truncates through binops is non-trivial.
19093 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19094 return SDValue();
19095
19096 SmallVector<SDValue> LHSOps;
19097 SmallVector<SDValue> RHSOps;
19098 for (SDValue Op : N->ops()) {
19099 if (Op.isUndef()) {
19100 // We can't form a divide or remainder from undef.
19101 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19102 return SDValue();
19103
19104 LHSOps.push_back(Op);
19105 RHSOps.push_back(Op);
19106 continue;
19107 }
19108
19109 // TODO: We can handle operations which have an neutral rhs value
19110 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19111 // of profit in a more explicit manner.
19112 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19113 return SDValue();
19114
19115 LHSOps.push_back(Op.getOperand(0));
19116 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19117 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19118 return SDValue();
19119 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19120 // have different LHS and RHS types.
19121 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19122 return SDValue();
19123
19124 RHSOps.push_back(Op.getOperand(1));
19125 }
19126
19127 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19128 DAG.getBuildVector(VT, DL, RHSOps));
19129}
19130
19132 ElementCount OpEC = OpVT.getVectorElementCount();
19133 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19134 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19135}
19136
19137/// Given fixed length vectors A and B with equal element types, but possibly
19138/// different number of elements, return A + B where either A or B is zero
19139/// padded to the larger number of elements.
19141 SelectionDAG &DAG) {
19142 // NOTE: Manually doing the extract/add/insert scheme produces
19143 // significantly better codegen than the naive pad with zeros
19144 // and add scheme.
19145 EVT AVT = A.getValueType();
19146 EVT BVT = B.getValueType();
19149 std::swap(A, B);
19150 std::swap(AVT, BVT);
19151 }
19152
19153 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19154 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19155 return DAG.getInsertSubvector(DL, B, Res, 0);
19156}
19157
19159 SelectionDAG &DAG,
19160 const RISCVSubtarget &Subtarget,
19161 const RISCVTargetLowering &TLI) {
19162 using namespace SDPatternMatch;
19163 // Note: We intentionally do not check the legality of the reduction type.
19164 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19165 // intermediate types flow through here.
19166 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19168 return SDValue();
19169
19170 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19171 // form).
19172 SDValue A, B;
19173 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19174 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19175 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19176 if (AOpt || BOpt) {
19177 if (AOpt)
19178 A = AOpt;
19179 if (BOpt)
19180 B = BOpt;
19181 // From here, we're doing A + B with mixed types, implicitly zero
19182 // padded to the wider type. Note that we *don't* need the result
19183 // type to be the original VT, and in fact prefer narrower ones
19184 // if possible.
19185 return getZeroPaddedAdd(DL, A, B, DAG);
19186 }
19187 }
19188
19189 // zext a <--> partial_reduce_umla 0, a, 1
19190 // sext a <--> partial_reduce_smla 0, a, 1
19191 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19192 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19193 SDValue A = InVec.getOperand(0);
19194 EVT OpVT = A.getValueType();
19195 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19196 return SDValue();
19197
19198 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19199 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19200 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19201 unsigned Opc =
19202 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19203 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19204 }
19205
19206 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19207 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19208 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19209 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19210 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19211 return SDValue();
19212
19213 if (!ISD::isExtOpcode(A.getOpcode()))
19214 return SDValue();
19215
19216 EVT OpVT = A.getOperand(0).getValueType();
19217 if (OpVT.getVectorElementType() != MVT::i8 ||
19218 OpVT != B.getOperand(0).getValueType() ||
19219 !TLI.isTypeLegal(A.getValueType()))
19220 return SDValue();
19221
19222 unsigned Opc;
19223 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19224 Opc = ISD::PARTIAL_REDUCE_SMLA;
19225 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19226 B.getOpcode() == ISD::ZERO_EXTEND)
19227 Opc = ISD::PARTIAL_REDUCE_UMLA;
19228 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19229 B.getOpcode() == ISD::ZERO_EXTEND)
19230 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19231 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19232 B.getOpcode() == ISD::SIGN_EXTEND) {
19233 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19234 std::swap(A, B);
19235 } else
19236 return SDValue();
19237
19238 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19239 return DAG.getNode(
19240 Opc, DL, ResVT,
19241 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19242}
19243
19245 const RISCVSubtarget &Subtarget,
19246 const RISCVTargetLowering &TLI) {
19247 if (!Subtarget.hasStdExtZvqdotq())
19248 return SDValue();
19249
19250 SDLoc DL(N);
19251 EVT VT = N->getValueType(0);
19252 SDValue InVec = N->getOperand(0);
19253 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19254 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19255 return SDValue();
19256}
19257
19259 const RISCVSubtarget &Subtarget,
19260 const RISCVTargetLowering &TLI) {
19261 SDValue InVec = N->getOperand(0);
19262 SDValue InVal = N->getOperand(1);
19263 SDValue EltNo = N->getOperand(2);
19264 SDLoc DL(N);
19265
19266 EVT VT = InVec.getValueType();
19267 if (VT.isScalableVector())
19268 return SDValue();
19269
19270 if (!InVec.hasOneUse())
19271 return SDValue();
19272
19273 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19274 // move the insert_vector_elts into the arms of the binop. Note that
19275 // the new RHS must be a constant.
19276 const unsigned InVecOpcode = InVec->getOpcode();
19277 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19278 InVal.hasOneUse()) {
19279 SDValue InVecLHS = InVec->getOperand(0);
19280 SDValue InVecRHS = InVec->getOperand(1);
19281 SDValue InValLHS = InVal->getOperand(0);
19282 SDValue InValRHS = InVal->getOperand(1);
19283
19285 return SDValue();
19286 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19287 return SDValue();
19288 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19289 // have different LHS and RHS types.
19290 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19291 return SDValue();
19293 InVecLHS, InValLHS, EltNo);
19295 InVecRHS, InValRHS, EltNo);
19296 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19297 }
19298
19299 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19300 // move the insert_vector_elt to the source operand of the concat_vector.
19301 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19302 return SDValue();
19303
19304 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19305 if (!IndexC)
19306 return SDValue();
19307 unsigned Elt = IndexC->getZExtValue();
19308
19309 EVT ConcatVT = InVec.getOperand(0).getValueType();
19310 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19311 return SDValue();
19312 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19313 unsigned NewIdx = Elt % ConcatNumElts;
19314
19315 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19316 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19317 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19318
19319 SmallVector<SDValue> ConcatOps(InVec->ops());
19320 ConcatOps[ConcatOpIdx] = ConcatOp;
19321 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19322}
19323
19324// If we're concatenating a series of vector loads like
19325// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19326// Then we can turn this into a strided load by widening the vector elements
19327// vlse32 p, stride=n
19329 const RISCVSubtarget &Subtarget,
19330 const RISCVTargetLowering &TLI) {
19331 SDLoc DL(N);
19332 EVT VT = N->getValueType(0);
19333
19334 // Only perform this combine on legal MVTs.
19335 if (!TLI.isTypeLegal(VT))
19336 return SDValue();
19337
19338 // TODO: Potentially extend this to scalable vectors
19339 if (VT.isScalableVector())
19340 return SDValue();
19341
19342 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19343 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19344 !SDValue(BaseLd, 0).hasOneUse())
19345 return SDValue();
19346
19347 EVT BaseLdVT = BaseLd->getValueType(0);
19348
19349 // Go through the loads and check that they're strided
19351 Lds.push_back(BaseLd);
19352 Align Align = BaseLd->getAlign();
19353 for (SDValue Op : N->ops().drop_front()) {
19354 auto *Ld = dyn_cast<LoadSDNode>(Op);
19355 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19356 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19357 Ld->getValueType(0) != BaseLdVT)
19358 return SDValue();
19359
19360 Lds.push_back(Ld);
19361
19362 // The common alignment is the most restrictive (smallest) of all the loads
19363 Align = std::min(Align, Ld->getAlign());
19364 }
19365
19366 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19367 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19368 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19369 // If the load ptrs can be decomposed into a common (Base + Index) with a
19370 // common constant stride, then return the constant stride.
19371 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19372 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19373 if (BIO1.equalBaseIndex(BIO2, DAG))
19374 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19375
19376 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19377 SDValue P1 = Ld1->getBasePtr();
19378 SDValue P2 = Ld2->getBasePtr();
19379 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19380 return {{P2.getOperand(1), false}};
19381 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19382 return {{P1.getOperand(1), true}};
19383
19384 return std::nullopt;
19385 };
19386
19387 // Get the distance between the first and second loads
19388 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19389 if (!BaseDiff)
19390 return SDValue();
19391
19392 // Check all the loads are the same distance apart
19393 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19394 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19395 return SDValue();
19396
19397 // TODO: At this point, we've successfully matched a generalized gather
19398 // load. Maybe we should emit that, and then move the specialized
19399 // matchers above and below into a DAG combine?
19400
19401 // Get the widened scalar type, e.g. v4i8 -> i64
19402 unsigned WideScalarBitWidth =
19403 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19404 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19405
19406 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19407 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19408 if (!TLI.isTypeLegal(WideVecVT))
19409 return SDValue();
19410
19411 // Check that the operation is legal
19412 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19413 return SDValue();
19414
19415 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19416 SDValue Stride =
19417 std::holds_alternative<SDValue>(StrideVariant)
19418 ? std::get<SDValue>(StrideVariant)
19419 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19420 Lds[0]->getOffset().getValueType());
19421 if (MustNegateStride)
19422 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19423
19424 SDValue AllOneMask =
19425 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19426 DAG.getConstant(1, DL, MVT::i1));
19427
19428 uint64_t MemSize;
19429 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19430 ConstStride && ConstStride->getSExtValue() >= 0)
19431 // total size = (elsize * n) + (stride - elsize) * (n-1)
19432 // = elsize + stride * (n-1)
19433 MemSize = WideScalarVT.getSizeInBits() +
19434 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19435 else
19436 // If Stride isn't constant, then we can't know how much it will load
19438
19440 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19441 Align);
19442
19443 SDValue StridedLoad = DAG.getStridedLoadVP(
19444 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19445 AllOneMask,
19446 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19447
19448 for (SDValue Ld : N->ops())
19449 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19450
19451 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19452}
19453
19455 const RISCVSubtarget &Subtarget,
19456 const RISCVTargetLowering &TLI) {
19457 SDLoc DL(N);
19458 EVT VT = N->getValueType(0);
19459 const unsigned ElementSize = VT.getScalarSizeInBits();
19460 const unsigned NumElts = VT.getVectorNumElements();
19461 SDValue V1 = N->getOperand(0);
19462 SDValue V2 = N->getOperand(1);
19463 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19464 MVT XLenVT = Subtarget.getXLenVT();
19465
19466 // Recognized a disguised select of add/sub.
19467 bool SwapCC;
19468 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19469 matchSelectAddSub(V1, V2, SwapCC)) {
19470 SDValue Sub = SwapCC ? V1 : V2;
19471 SDValue A = Sub.getOperand(0);
19472 SDValue B = Sub.getOperand(1);
19473
19474 SmallVector<SDValue> MaskVals;
19475 for (int MaskIndex : Mask) {
19476 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19477 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19478 }
19479 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19480 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19481 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19482
19483 // Arrange the select such that we can match a masked
19484 // vrsub.vi to perform the conditional negate
19485 SDValue NegB = DAG.getNegative(B, DL, VT);
19486 if (!SwapCC)
19487 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19488 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19489 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19490 }
19491
19492 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19493 // during the combine phase before type legalization, and relies on
19494 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19495 // for the source mask.
19496 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19497 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19498 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19499 return SDValue();
19500
19501 SmallVector<int, 8> NewMask;
19502 narrowShuffleMaskElts(2, Mask, NewMask);
19503
19504 LLVMContext &C = *DAG.getContext();
19505 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19506 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19507 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19508 DAG.getBitcast(NewVT, V2), NewMask);
19509 return DAG.getBitcast(VT, Res);
19510}
19511
19513 const RISCVSubtarget &Subtarget) {
19514 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19515
19516 if (N->getValueType(0).isFixedLengthVector())
19517 return SDValue();
19518
19519 SDValue Addend = N->getOperand(0);
19520 SDValue MulOp = N->getOperand(1);
19521
19522 if (N->getOpcode() == RISCVISD::ADD_VL) {
19523 SDValue AddPassthruOp = N->getOperand(2);
19524 if (!AddPassthruOp.isUndef())
19525 return SDValue();
19526 }
19527
19528 auto IsVWMulOpc = [](unsigned Opc) {
19529 switch (Opc) {
19530 case RISCVISD::VWMUL_VL:
19531 case RISCVISD::VWMULU_VL:
19532 case RISCVISD::VWMULSU_VL:
19533 return true;
19534 default:
19535 return false;
19536 }
19537 };
19538
19539 if (!IsVWMulOpc(MulOp.getOpcode()))
19540 std::swap(Addend, MulOp);
19541
19542 if (!IsVWMulOpc(MulOp.getOpcode()))
19543 return SDValue();
19544
19545 SDValue MulPassthruOp = MulOp.getOperand(2);
19546
19547 if (!MulPassthruOp.isUndef())
19548 return SDValue();
19549
19550 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19551 const RISCVSubtarget &Subtarget) {
19552 if (N->getOpcode() == ISD::ADD) {
19553 SDLoc DL(N);
19554 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19555 Subtarget);
19556 }
19557 return std::make_pair(N->getOperand(3), N->getOperand(4));
19558 }(N, DAG, Subtarget);
19559
19560 SDValue MulMask = MulOp.getOperand(3);
19561 SDValue MulVL = MulOp.getOperand(4);
19562
19563 if (AddMask != MulMask || AddVL != MulVL)
19564 return SDValue();
19565
19566 const auto &TSInfo =
19567 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19568 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19569
19570 SDLoc DL(N);
19571 EVT VT = N->getValueType(0);
19572 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19573 AddVL};
19574 return DAG.getNode(Opc, DL, VT, Ops);
19575}
19576
19578 const RISCVSubtarget &Subtarget) {
19579
19580 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19581
19582 if (!N->getValueType(0).isVector())
19583 return SDValue();
19584
19585 SDValue Addend = N->getOperand(0);
19586 SDValue DotOp = N->getOperand(1);
19587
19588 if (N->getOpcode() == RISCVISD::ADD_VL) {
19589 SDValue AddPassthruOp = N->getOperand(2);
19590 if (!AddPassthruOp.isUndef())
19591 return SDValue();
19592 }
19593
19594 auto IsVqdotqOpc = [](unsigned Opc) {
19595 switch (Opc) {
19596 case RISCVISD::VQDOT_VL:
19597 case RISCVISD::VQDOTU_VL:
19598 case RISCVISD::VQDOTSU_VL:
19599 return true;
19600 default:
19601 return false;
19602 }
19603 };
19604
19605 if (!IsVqdotqOpc(DotOp.getOpcode()))
19606 std::swap(Addend, DotOp);
19607
19608 if (!IsVqdotqOpc(DotOp.getOpcode()))
19609 return SDValue();
19610
19611 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19612 const RISCVSubtarget &Subtarget) {
19613 if (N->getOpcode() == ISD::ADD) {
19614 SDLoc DL(N);
19615 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19616 Subtarget);
19617 }
19618 return std::make_pair(N->getOperand(3), N->getOperand(4));
19619 }(N, DAG, Subtarget);
19620
19621 SDValue MulVL = DotOp.getOperand(4);
19622 if (AddVL != MulVL)
19623 return SDValue();
19624
19625 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19626 AddMask.getOperand(0) != MulVL)
19627 return SDValue();
19628
19629 SDValue AccumOp = DotOp.getOperand(2);
19630 SDLoc DL(N);
19631 EVT VT = N->getValueType(0);
19632 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19633 DAG.getUNDEF(VT), AddMask, AddVL);
19634
19635 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19636 DotOp.getOperand(3), DotOp->getOperand(4)};
19637 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19638}
19639
19640static bool
19642 ISD::MemIndexType &IndexType,
19644 if (!DCI.isBeforeLegalize())
19645 return false;
19646
19647 SelectionDAG &DAG = DCI.DAG;
19648 const MVT XLenVT =
19649 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19650
19651 const EVT IndexVT = Index.getValueType();
19652
19653 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19654 // mode, so anything else must be manually legalized.
19655 if (!isIndexTypeSigned(IndexType))
19656 return false;
19657
19658 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19659 // Any index legalization should first promote to XLenVT, so we don't lose
19660 // bits when scaling. This may create an illegal index type so we let
19661 // LLVM's legalization take care of the splitting.
19662 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19663 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19664 IndexVT.changeVectorElementType(XLenVT), Index);
19665 }
19666 IndexType = ISD::UNSIGNED_SCALED;
19667 return true;
19668}
19669
19670/// Match the index vector of a scatter or gather node as the shuffle mask
19671/// which performs the rearrangement if possible. Will only match if
19672/// all lanes are touched, and thus replacing the scatter or gather with
19673/// a unit strided access and shuffle is legal.
19674static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19675 SmallVector<int> &ShuffleMask) {
19676 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19677 return false;
19678 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19679 return false;
19680
19681 const unsigned ElementSize = VT.getScalarStoreSize();
19682 const unsigned NumElems = VT.getVectorNumElements();
19683
19684 // Create the shuffle mask and check all bits active
19685 assert(ShuffleMask.empty());
19686 BitVector ActiveLanes(NumElems);
19687 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19688 // TODO: We've found an active bit of UB, and could be
19689 // more aggressive here if desired.
19690 if (Index->getOperand(i)->isUndef())
19691 return false;
19692 uint64_t C = Index->getConstantOperandVal(i);
19693 if (C % ElementSize != 0)
19694 return false;
19695 C = C / ElementSize;
19696 if (C >= NumElems)
19697 return false;
19698 ShuffleMask.push_back(C);
19699 ActiveLanes.set(C);
19700 }
19701 return ActiveLanes.all();
19702}
19703
19704/// Match the index of a gather or scatter operation as an operation
19705/// with twice the element width and half the number of elements. This is
19706/// generally profitable (if legal) because these operations are linear
19707/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19708/// come out ahead.
19709static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19710 Align BaseAlign, const RISCVSubtarget &ST) {
19711 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19712 return false;
19713 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19714 return false;
19715
19716 // Attempt a doubling. If we can use a element type 4x or 8x in
19717 // size, this will happen via multiply iterations of the transform.
19718 const unsigned NumElems = VT.getVectorNumElements();
19719 if (NumElems % 2 != 0)
19720 return false;
19721
19722 const unsigned ElementSize = VT.getScalarStoreSize();
19723 const unsigned WiderElementSize = ElementSize * 2;
19724 if (WiderElementSize > ST.getELen()/8)
19725 return false;
19726
19727 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19728 return false;
19729
19730 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19731 // TODO: We've found an active bit of UB, and could be
19732 // more aggressive here if desired.
19733 if (Index->getOperand(i)->isUndef())
19734 return false;
19735 // TODO: This offset check is too strict if we support fully
19736 // misaligned memory operations.
19737 uint64_t C = Index->getConstantOperandVal(i);
19738 if (i % 2 == 0) {
19739 if (C % WiderElementSize != 0)
19740 return false;
19741 continue;
19742 }
19743 uint64_t Last = Index->getConstantOperandVal(i-1);
19744 if (C != Last + ElementSize)
19745 return false;
19746 }
19747 return true;
19748}
19749
19750// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19751// This would be benefit for the cases where X and Y are both the same value
19752// type of low precision vectors. Since the truncate would be lowered into
19753// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19754// restriction, such pattern would be expanded into a series of "vsetvli"
19755// and "vnsrl" instructions later to reach this point.
19757 SDValue Mask = N->getOperand(1);
19758 SDValue VL = N->getOperand(2);
19759
19760 bool IsVLMAX = isAllOnesConstant(VL) ||
19761 (isa<RegisterSDNode>(VL) &&
19762 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19763 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19764 Mask.getOperand(0) != VL)
19765 return SDValue();
19766
19767 auto IsTruncNode = [&](SDValue V) {
19768 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19769 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19770 };
19771
19772 SDValue Op = N->getOperand(0);
19773
19774 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19775 // to distinguish such pattern.
19776 while (IsTruncNode(Op)) {
19777 if (!Op.hasOneUse())
19778 return SDValue();
19779 Op = Op.getOperand(0);
19780 }
19781
19782 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19783 return SDValue();
19784
19785 SDValue N0 = Op.getOperand(0);
19786 SDValue N1 = Op.getOperand(1);
19787 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19788 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19789 return SDValue();
19790
19791 SDValue N00 = N0.getOperand(0);
19792 SDValue N10 = N1.getOperand(0);
19793 if (!N00.getValueType().isVector() ||
19794 N00.getValueType() != N10.getValueType() ||
19795 N->getValueType(0) != N10.getValueType())
19796 return SDValue();
19797
19798 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19799 SDValue SMin =
19800 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19801 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19802 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19803}
19804
19805// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19806// maximum value for the truncated type.
19807// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19808// is the signed maximum value for the truncated type and C2 is the signed
19809// minimum value.
19811 const RISCVSubtarget &Subtarget) {
19812 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19813
19814 MVT VT = N->getSimpleValueType(0);
19815
19816 SDValue Mask = N->getOperand(1);
19817 SDValue VL = N->getOperand(2);
19818
19819 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19820 APInt &SplatVal) {
19821 if (V.getOpcode() != Opc &&
19822 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19823 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19824 return SDValue();
19825
19826 SDValue Op = V.getOperand(1);
19827
19828 // Peek through conversion between fixed and scalable vectors.
19829 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19830 isNullConstant(Op.getOperand(2)) &&
19831 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19832 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19833 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19834 isNullConstant(Op.getOperand(1).getOperand(1)))
19835 Op = Op.getOperand(1).getOperand(0);
19836
19837 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19838 return V.getOperand(0);
19839
19840 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19841 Op.getOperand(2) == VL) {
19842 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19843 SplatVal =
19844 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19845 return V.getOperand(0);
19846 }
19847 }
19848
19849 return SDValue();
19850 };
19851
19852 SDLoc DL(N);
19853
19854 auto DetectUSatPattern = [&](SDValue V) {
19855 APInt LoC, HiC;
19856
19857 // Simple case, V is a UMIN.
19858 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19859 if (HiC.isMask(VT.getScalarSizeInBits()))
19860 return UMinOp;
19861
19862 // If we have an SMAX that removes negative numbers first, then we can match
19863 // SMIN instead of UMIN.
19864 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19865 if (SDValue SMaxOp =
19866 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19867 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19868 return SMinOp;
19869
19870 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19871 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19872 // first.
19873 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19874 if (SDValue SMinOp =
19875 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19876 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19877 HiC.uge(LoC))
19878 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19879 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19880 Mask, VL);
19881
19882 return SDValue();
19883 };
19884
19885 auto DetectSSatPattern = [&](SDValue V) {
19886 unsigned NumDstBits = VT.getScalarSizeInBits();
19887 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19888 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19889 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19890
19891 APInt HiC, LoC;
19892 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19893 if (SDValue SMaxOp =
19894 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19895 if (HiC == SignedMax && LoC == SignedMin)
19896 return SMaxOp;
19897
19898 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19899 if (SDValue SMinOp =
19900 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19901 if (HiC == SignedMax && LoC == SignedMin)
19902 return SMinOp;
19903
19904 return SDValue();
19905 };
19906
19907 SDValue Src = N->getOperand(0);
19908
19909 // Look through multiple layers of truncates.
19910 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19911 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19912 Src.hasOneUse())
19913 Src = Src.getOperand(0);
19914
19915 SDValue Val;
19916 unsigned ClipOpc;
19917 if ((Val = DetectUSatPattern(Src)))
19918 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19919 else if ((Val = DetectSSatPattern(Src)))
19920 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19921 else
19922 return SDValue();
19923
19924 MVT ValVT = Val.getSimpleValueType();
19925
19926 do {
19927 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19928 ValVT = ValVT.changeVectorElementType(ValEltVT);
19929 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19930 } while (ValVT != VT);
19931
19932 return Val;
19933}
19934
19935// Convert
19936// (iX ctpop (bitcast (vXi1 A)))
19937// ->
19938// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19939// and
19940// (iN reduce.add (zext (vXi1 A to vXiN))
19941// ->
19942// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19943// FIXME: It's complicated to match all the variations of this after type
19944// legalization so we only handle the pre-type legalization pattern, but that
19945// requires the fixed vector type to be legal.
19947 const RISCVSubtarget &Subtarget) {
19948 unsigned Opc = N->getOpcode();
19949 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
19950 "Unexpected opcode");
19951 EVT VT = N->getValueType(0);
19952 if (!VT.isScalarInteger())
19953 return SDValue();
19954
19955 SDValue Src = N->getOperand(0);
19956
19957 if (Opc == ISD::CTPOP) {
19958 // Peek through zero_extend. It doesn't change the count.
19959 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19960 Src = Src.getOperand(0);
19961
19962 if (Src.getOpcode() != ISD::BITCAST)
19963 return SDValue();
19964 Src = Src.getOperand(0);
19965 } else if (Opc == ISD::VECREDUCE_ADD) {
19966 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19967 return SDValue();
19968 Src = Src.getOperand(0);
19969 }
19970
19971 EVT SrcEVT = Src.getValueType();
19972 if (!SrcEVT.isSimple())
19973 return SDValue();
19974
19975 MVT SrcMVT = SrcEVT.getSimpleVT();
19976 // Make sure the input is an i1 vector.
19977 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19978 return SDValue();
19979
19980 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19981 if (!TLI.isTypeLegal(SrcMVT))
19982 return SDValue();
19983
19984 // Check that destination type is large enough to hold result without
19985 // overflow.
19986 if (Opc == ISD::VECREDUCE_ADD) {
19987 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19988 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19989 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19990 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19991 ? SrcMVT.getVectorNumElements()
19993 VectorBitsMax, EltSize, MinSize);
19994 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19995 return SDValue();
19996 }
19997
19998 MVT ContainerVT = SrcMVT;
19999 if (SrcMVT.isFixedLengthVector()) {
20000 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20001 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20002 }
20003
20004 SDLoc DL(N);
20005 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20006
20007 MVT XLenVT = Subtarget.getXLenVT();
20008 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20009 return DAG.getZExtOrTrunc(Pop, DL, VT);
20010}
20011
20014 const RISCVSubtarget &Subtarget) {
20015 // (shl (zext x), y) -> (vwsll x, y)
20016 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20017 return V;
20018
20019 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20020 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20021
20022 if (!DCI.isAfterLegalizeDAG())
20023 return SDValue();
20024
20025 SDValue LHS = N->getOperand(0);
20026 if (!LHS.hasOneUse())
20027 return SDValue();
20028 unsigned Opcode;
20029 switch (LHS.getOpcode()) {
20030 case ISD::SIGN_EXTEND:
20031 case RISCVISD::VSEXT_VL:
20032 Opcode = RISCVISD::VWMULSU_VL;
20033 break;
20034 case ISD::ZERO_EXTEND:
20035 case RISCVISD::VZEXT_VL:
20036 Opcode = RISCVISD::VWMULU_VL;
20037 break;
20038 default:
20039 return SDValue();
20040 }
20041
20042 SDValue RHS = N->getOperand(1);
20043 APInt ShAmt;
20044 uint64_t ShAmtInt;
20045 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20046 ShAmtInt = ShAmt.getZExtValue();
20047 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20048 RHS.getOperand(1).getOpcode() == ISD::Constant)
20049 ShAmtInt = RHS.getConstantOperandVal(1);
20050 else
20051 return SDValue();
20052
20053 // Better foldings:
20054 // (shl (sext x), 1) -> (vwadd x, x)
20055 // (shl (zext x), 1) -> (vwaddu x, x)
20056 if (ShAmtInt <= 1)
20057 return SDValue();
20058
20059 SDValue NarrowOp = LHS.getOperand(0);
20060 MVT NarrowVT = NarrowOp.getSimpleValueType();
20061 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20062 if (ShAmtInt >= NarrowBits)
20063 return SDValue();
20064 MVT VT = N->getSimpleValueType(0);
20065 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20066 return SDValue();
20067
20068 SelectionDAG &DAG = DCI.DAG;
20069 SDLoc DL(N);
20070 SDValue Passthru, Mask, VL;
20071 switch (N->getOpcode()) {
20072 case ISD::SHL:
20073 Passthru = DAG.getUNDEF(VT);
20074 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20075 break;
20076 case RISCVISD::SHL_VL:
20077 Passthru = N->getOperand(2);
20078 Mask = N->getOperand(3);
20079 VL = N->getOperand(4);
20080 break;
20081 default:
20082 llvm_unreachable("Expected SHL");
20083 }
20084 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20085 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20086 Passthru, Mask, VL);
20087}
20088
20090 DAGCombinerInfo &DCI) const {
20091 SelectionDAG &DAG = DCI.DAG;
20092 const MVT XLenVT = Subtarget.getXLenVT();
20093 SDLoc DL(N);
20094
20095 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20096 // bits are demanded. N will be added to the Worklist if it was not deleted.
20097 // Caller should return SDValue(N, 0) if this returns true.
20098 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20099 SDValue Op = N->getOperand(OpNo);
20100 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20101 if (!SimplifyDemandedBits(Op, Mask, DCI))
20102 return false;
20103
20104 if (N->getOpcode() != ISD::DELETED_NODE)
20105 DCI.AddToWorklist(N);
20106 return true;
20107 };
20108
20109 switch (N->getOpcode()) {
20110 default:
20111 break;
20112 case RISCVISD::SplitF64: {
20113 SDValue Op0 = N->getOperand(0);
20114 // If the input to SplitF64 is just BuildPairF64 then the operation is
20115 // redundant. Instead, use BuildPairF64's operands directly.
20116 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20117 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20118
20119 if (Op0->isUndef()) {
20120 SDValue Lo = DAG.getUNDEF(MVT::i32);
20121 SDValue Hi = DAG.getUNDEF(MVT::i32);
20122 return DCI.CombineTo(N, Lo, Hi);
20123 }
20124
20125 // It's cheaper to materialise two 32-bit integers than to load a double
20126 // from the constant pool and transfer it to integer registers through the
20127 // stack.
20129 APInt V = C->getValueAPF().bitcastToAPInt();
20130 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20131 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20132 return DCI.CombineTo(N, Lo, Hi);
20133 }
20134
20135 // This is a target-specific version of a DAGCombine performed in
20136 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20137 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20138 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20139 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20140 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20141 break;
20142 SDValue NewSplitF64 =
20143 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20144 Op0.getOperand(0));
20145 SDValue Lo = NewSplitF64.getValue(0);
20146 SDValue Hi = NewSplitF64.getValue(1);
20147 APInt SignBit = APInt::getSignMask(32);
20148 if (Op0.getOpcode() == ISD::FNEG) {
20149 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20150 DAG.getConstant(SignBit, DL, MVT::i32));
20151 return DCI.CombineTo(N, Lo, NewHi);
20152 }
20153 assert(Op0.getOpcode() == ISD::FABS);
20154 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20155 DAG.getConstant(~SignBit, DL, MVT::i32));
20156 return DCI.CombineTo(N, Lo, NewHi);
20157 }
20158 case RISCVISD::SLLW:
20159 case RISCVISD::SRAW:
20160 case RISCVISD::SRLW:
20161 case RISCVISD::RORW:
20162 case RISCVISD::ROLW: {
20163 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20164 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20165 SimplifyDemandedLowBitsHelper(1, 5))
20166 return SDValue(N, 0);
20167
20168 break;
20169 }
20170 case RISCVISD::CLZW:
20171 case RISCVISD::CTZW: {
20172 // Only the lower 32 bits of the first operand are read
20173 if (SimplifyDemandedLowBitsHelper(0, 32))
20174 return SDValue(N, 0);
20175 break;
20176 }
20177 case RISCVISD::FMV_W_X_RV64: {
20178 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20179 // conversion is unnecessary and can be replaced with the
20180 // FMV_X_ANYEXTW_RV64 operand.
20181 SDValue Op0 = N->getOperand(0);
20182 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20183 return Op0.getOperand(0);
20184 break;
20185 }
20186 case RISCVISD::FMV_X_ANYEXTH:
20187 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20188 SDLoc DL(N);
20189 SDValue Op0 = N->getOperand(0);
20190 MVT VT = N->getSimpleValueType(0);
20191
20192 // Constant fold.
20193 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20194 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20195 return DAG.getConstant(Val, DL, VT);
20196 }
20197
20198 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20199 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20200 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20201 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20202 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20203 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20204 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20205 assert(Op0.getOperand(0).getValueType() == VT &&
20206 "Unexpected value type!");
20207 return Op0.getOperand(0);
20208 }
20209
20210 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20211 cast<LoadSDNode>(Op0)->isSimple()) {
20213 auto *LN0 = cast<LoadSDNode>(Op0);
20214 SDValue Load =
20215 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20216 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20217 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20218 return Load;
20219 }
20220
20221 // This is a target-specific version of a DAGCombine performed in
20222 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20223 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20224 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20225 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20226 !Op0.getNode()->hasOneUse())
20227 break;
20228 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20229 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20230 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20231 if (Op0.getOpcode() == ISD::FNEG)
20232 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20233 DAG.getConstant(SignBit, DL, VT));
20234
20235 assert(Op0.getOpcode() == ISD::FABS);
20236 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20237 DAG.getConstant(~SignBit, DL, VT));
20238 }
20239 case ISD::ABS: {
20240 EVT VT = N->getValueType(0);
20241 SDValue N0 = N->getOperand(0);
20242 // abs (sext) -> zext (abs)
20243 // abs (zext) -> zext (handled elsewhere)
20244 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20245 SDValue Src = N0.getOperand(0);
20246 SDLoc DL(N);
20247 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20248 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20249 }
20250 break;
20251 }
20252 case ISD::ADD: {
20253 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20254 return V;
20255 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20256 return V;
20257 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20258 return V;
20259 return performADDCombine(N, DCI, Subtarget);
20260 }
20261 case ISD::SUB: {
20262 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20263 return V;
20264 return performSUBCombine(N, DAG, Subtarget);
20265 }
20266 case ISD::AND:
20267 return performANDCombine(N, DCI, Subtarget);
20268 case ISD::OR: {
20269 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20270 return V;
20271 return performORCombine(N, DCI, Subtarget);
20272 }
20273 case ISD::XOR:
20274 return performXORCombine(N, DAG, Subtarget);
20275 case ISD::MUL:
20276 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20277 return V;
20278 return performMULCombine(N, DAG, DCI, Subtarget);
20279 case ISD::SDIV:
20280 case ISD::UDIV:
20281 case ISD::SREM:
20282 case ISD::UREM:
20283 if (SDValue V = combineBinOpOfZExt(N, DAG))
20284 return V;
20285 break;
20286 case ISD::FMUL: {
20287 using namespace SDPatternMatch;
20288 SDLoc DL(N);
20289 EVT VT = N->getValueType(0);
20290 SDValue X, Y;
20291 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20292 // hoistFNegAboveFMulFDiv.
20293 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20295 return DAG.getNode(ISD::FNEG, DL, VT,
20296 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20297
20298 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20299 SDValue N0 = N->getOperand(0);
20300 SDValue N1 = N->getOperand(1);
20301 if (N0->getOpcode() != ISD::FCOPYSIGN)
20302 std::swap(N0, N1);
20303 if (N0->getOpcode() != ISD::FCOPYSIGN)
20304 return SDValue();
20306 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20307 return SDValue();
20308 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20309 return SDValue();
20310 SDValue Sign = N0->getOperand(1);
20311 if (Sign.getValueType() != VT)
20312 return SDValue();
20313 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20314 }
20315 case ISD::FADD:
20316 case ISD::UMAX:
20317 case ISD::UMIN:
20318 case ISD::SMAX:
20319 case ISD::SMIN:
20320 case ISD::FMAXNUM:
20321 case ISD::FMINNUM: {
20322 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20323 return V;
20324 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20325 return V;
20326 return SDValue();
20327 }
20328 case ISD::SETCC:
20329 return performSETCCCombine(N, DCI, Subtarget);
20331 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20332 case ISD::ZERO_EXTEND:
20333 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20334 // type legalization. This is safe because fp_to_uint produces poison if
20335 // it overflows.
20336 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20337 SDValue Src = N->getOperand(0);
20338 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20339 isTypeLegal(Src.getOperand(0).getValueType()))
20340 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20341 Src.getOperand(0));
20342 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20343 isTypeLegal(Src.getOperand(1).getValueType())) {
20344 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20345 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20346 Src.getOperand(0), Src.getOperand(1));
20347 DCI.CombineTo(N, Res);
20348 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20349 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20350 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20351 }
20352 }
20353 return SDValue();
20354 case RISCVISD::TRUNCATE_VECTOR_VL:
20355 if (SDValue V = combineTruncOfSraSext(N, DAG))
20356 return V;
20357 return combineTruncToVnclip(N, DAG, Subtarget);
20358 case ISD::VP_TRUNCATE:
20359 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20360 case ISD::TRUNCATE:
20361 return performTRUNCATECombine(N, DAG, Subtarget);
20362 case ISD::SELECT:
20363 return performSELECTCombine(N, DAG, Subtarget);
20364 case ISD::VSELECT:
20365 return performVSELECTCombine(N, DAG);
20366 case RISCVISD::CZERO_EQZ:
20367 case RISCVISD::CZERO_NEZ: {
20368 SDValue Val = N->getOperand(0);
20369 SDValue Cond = N->getOperand(1);
20370
20371 unsigned Opc = N->getOpcode();
20372
20373 // czero_eqz x, x -> x
20374 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20375 return Val;
20376
20377 unsigned InvOpc =
20378 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20379
20380 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20381 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20382 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20383 SDValue NewCond = Cond.getOperand(0);
20384 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20385 if (DAG.MaskedValueIsZero(NewCond, Mask))
20386 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20387 }
20388 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20389 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20390 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20391 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20392 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20393 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20394 if (ISD::isIntEqualitySetCC(CCVal))
20395 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20396 N->getValueType(0), Val, Cond.getOperand(0));
20397 }
20398 return SDValue();
20399 }
20400 case RISCVISD::SELECT_CC: {
20401 // Transform
20402 SDValue LHS = N->getOperand(0);
20403 SDValue RHS = N->getOperand(1);
20404 SDValue CC = N->getOperand(2);
20405 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20406 SDValue TrueV = N->getOperand(3);
20407 SDValue FalseV = N->getOperand(4);
20408 SDLoc DL(N);
20409 EVT VT = N->getValueType(0);
20410
20411 // If the True and False values are the same, we don't need a select_cc.
20412 if (TrueV == FalseV)
20413 return TrueV;
20414
20415 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20416 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20417 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20418 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20419 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20420 if (CCVal == ISD::CondCode::SETGE)
20421 std::swap(TrueV, FalseV);
20422
20423 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20424 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20425 // Only handle simm12, if it is not in this range, it can be considered as
20426 // register.
20427 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20428 isInt<12>(TrueSImm - FalseSImm)) {
20429 SDValue SRA =
20430 DAG.getNode(ISD::SRA, DL, VT, LHS,
20431 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20432 SDValue AND =
20433 DAG.getNode(ISD::AND, DL, VT, SRA,
20434 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20435 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20436 }
20437
20438 if (CCVal == ISD::CondCode::SETGE)
20439 std::swap(TrueV, FalseV);
20440 }
20441
20442 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20443 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20444 {LHS, RHS, CC, TrueV, FalseV});
20445
20446 if (!Subtarget.hasConditionalMoveFusion()) {
20447 // (select c, -1, y) -> -c | y
20448 if (isAllOnesConstant(TrueV)) {
20449 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20450 SDValue Neg = DAG.getNegative(C, DL, VT);
20451 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20452 }
20453 // (select c, y, -1) -> -!c | y
20454 if (isAllOnesConstant(FalseV)) {
20455 SDValue C =
20456 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20457 SDValue Neg = DAG.getNegative(C, DL, VT);
20458 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20459 }
20460
20461 // (select c, 0, y) -> -!c & y
20462 if (isNullConstant(TrueV)) {
20463 SDValue C =
20464 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20465 SDValue Neg = DAG.getNegative(C, DL, VT);
20466 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20467 }
20468 // (select c, y, 0) -> -c & y
20469 if (isNullConstant(FalseV)) {
20470 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20471 SDValue Neg = DAG.getNegative(C, DL, VT);
20472 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20473 }
20474 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20475 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20476 if (((isOneConstant(FalseV) && LHS == TrueV &&
20477 CCVal == ISD::CondCode::SETNE) ||
20478 (isOneConstant(TrueV) && LHS == FalseV &&
20479 CCVal == ISD::CondCode::SETEQ)) &&
20480 isNullConstant(RHS)) {
20481 // freeze it to be safe.
20482 LHS = DAG.getFreeze(LHS);
20483 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20484 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20485 }
20486 }
20487
20488 // If both true/false are an xor with 1, pull through the select.
20489 // This can occur after op legalization if both operands are setccs that
20490 // require an xor to invert.
20491 // FIXME: Generalize to other binary ops with identical operand?
20492 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20493 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20494 isOneConstant(TrueV.getOperand(1)) &&
20495 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20496 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20497 TrueV.getOperand(0), FalseV.getOperand(0));
20498 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20499 }
20500
20501 return SDValue();
20502 }
20503 case RISCVISD::BR_CC: {
20504 SDValue LHS = N->getOperand(1);
20505 SDValue RHS = N->getOperand(2);
20506 SDValue CC = N->getOperand(3);
20507 SDLoc DL(N);
20508
20509 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20510 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20511 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20512
20513 return SDValue();
20514 }
20515 case ISD::BITREVERSE:
20516 return performBITREVERSECombine(N, DAG, Subtarget);
20517 case ISD::FP_TO_SINT:
20518 case ISD::FP_TO_UINT:
20519 return performFP_TO_INTCombine(N, DCI, Subtarget);
20522 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20523 case ISD::FCOPYSIGN: {
20524 EVT VT = N->getValueType(0);
20525 if (!VT.isVector())
20526 break;
20527 // There is a form of VFSGNJ which injects the negated sign of its second
20528 // operand. Try and bubble any FNEG up after the extend/round to produce
20529 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20530 // TRUNC=1.
20531 SDValue In2 = N->getOperand(1);
20532 // Avoid cases where the extend/round has multiple uses, as duplicating
20533 // those is typically more expensive than removing a fneg.
20534 if (!In2.hasOneUse())
20535 break;
20536 if (In2.getOpcode() != ISD::FP_EXTEND &&
20537 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20538 break;
20539 In2 = In2.getOperand(0);
20540 if (In2.getOpcode() != ISD::FNEG)
20541 break;
20542 SDLoc DL(N);
20543 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20544 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20545 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20546 }
20547 case ISD::MGATHER: {
20548 const auto *MGN = cast<MaskedGatherSDNode>(N);
20549 const EVT VT = N->getValueType(0);
20550 SDValue Index = MGN->getIndex();
20551 SDValue ScaleOp = MGN->getScale();
20552 ISD::MemIndexType IndexType = MGN->getIndexType();
20553 assert(!MGN->isIndexScaled() &&
20554 "Scaled gather/scatter should not be formed");
20555
20556 SDLoc DL(N);
20557 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20558 return DAG.getMaskedGather(
20559 N->getVTList(), MGN->getMemoryVT(), DL,
20560 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20561 MGN->getBasePtr(), Index, ScaleOp},
20562 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20563
20564 if (narrowIndex(Index, IndexType, DAG))
20565 return DAG.getMaskedGather(
20566 N->getVTList(), MGN->getMemoryVT(), DL,
20567 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20568 MGN->getBasePtr(), Index, ScaleOp},
20569 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20570
20571 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20572 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20573 // The sequence will be XLenVT, not the type of Index. Tell
20574 // isSimpleVIDSequence this so we avoid overflow.
20575 if (std::optional<VIDSequence> SimpleVID =
20576 isSimpleVIDSequence(Index, Subtarget.getXLen());
20577 SimpleVID && SimpleVID->StepDenominator == 1) {
20578 const int64_t StepNumerator = SimpleVID->StepNumerator;
20579 const int64_t Addend = SimpleVID->Addend;
20580
20581 // Note: We don't need to check alignment here since (by assumption
20582 // from the existence of the gather), our offsets must be sufficiently
20583 // aligned.
20584
20585 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20586 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20587 assert(IndexType == ISD::UNSIGNED_SCALED);
20588 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20589 DAG.getSignedConstant(Addend, DL, PtrVT));
20590
20591 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20593 SDValue StridedLoad = DAG.getStridedLoadVP(
20594 VT, DL, MGN->getChain(), BasePtr,
20595 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20596 EVL, MGN->getMemOperand());
20597 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20598 MGN->getPassThru());
20599 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20600 DL);
20601 }
20602 }
20603
20604 SmallVector<int> ShuffleMask;
20605 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20606 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20607 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20608 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20609 MGN->getMask(), DAG.getUNDEF(VT),
20610 MGN->getMemoryVT(), MGN->getMemOperand(),
20612 SDValue Shuffle =
20613 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20614 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20615 }
20616
20617 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20618 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20619 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20620 SmallVector<SDValue> NewIndices;
20621 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20622 NewIndices.push_back(Index.getOperand(i));
20623 EVT IndexVT = Index.getValueType()
20625 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20626
20627 unsigned ElementSize = VT.getScalarStoreSize();
20628 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20629 auto EltCnt = VT.getVectorElementCount();
20630 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20631 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20632 EltCnt.divideCoefficientBy(2));
20633 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20634 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20635 EltCnt.divideCoefficientBy(2));
20636 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20637
20638 SDValue Gather =
20639 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20640 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20641 Index, ScaleOp},
20642 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20643 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20644 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20645 }
20646 break;
20647 }
20648 case ISD::MSCATTER:{
20649 const auto *MSN = cast<MaskedScatterSDNode>(N);
20650 SDValue Index = MSN->getIndex();
20651 SDValue ScaleOp = MSN->getScale();
20652 ISD::MemIndexType IndexType = MSN->getIndexType();
20653 assert(!MSN->isIndexScaled() &&
20654 "Scaled gather/scatter should not be formed");
20655
20656 SDLoc DL(N);
20657 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20658 return DAG.getMaskedScatter(
20659 N->getVTList(), MSN->getMemoryVT(), DL,
20660 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20661 Index, ScaleOp},
20662 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20663
20664 if (narrowIndex(Index, IndexType, DAG))
20665 return DAG.getMaskedScatter(
20666 N->getVTList(), MSN->getMemoryVT(), DL,
20667 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20668 Index, ScaleOp},
20669 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20670
20671 EVT VT = MSN->getValue()->getValueType(0);
20672 SmallVector<int> ShuffleMask;
20673 if (!MSN->isTruncatingStore() &&
20674 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20675 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20676 DAG.getUNDEF(VT), ShuffleMask);
20677 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20678 DAG.getUNDEF(XLenVT), MSN->getMask(),
20679 MSN->getMemoryVT(), MSN->getMemOperand(),
20680 ISD::UNINDEXED, false);
20681 }
20682 break;
20683 }
20684 case ISD::VP_GATHER: {
20685 const auto *VPGN = cast<VPGatherSDNode>(N);
20686 SDValue Index = VPGN->getIndex();
20687 SDValue ScaleOp = VPGN->getScale();
20688 ISD::MemIndexType IndexType = VPGN->getIndexType();
20689 assert(!VPGN->isIndexScaled() &&
20690 "Scaled gather/scatter should not be formed");
20691
20692 SDLoc DL(N);
20693 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20694 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20695 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20696 ScaleOp, VPGN->getMask(),
20697 VPGN->getVectorLength()},
20698 VPGN->getMemOperand(), IndexType);
20699
20700 if (narrowIndex(Index, IndexType, DAG))
20701 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20702 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20703 ScaleOp, VPGN->getMask(),
20704 VPGN->getVectorLength()},
20705 VPGN->getMemOperand(), IndexType);
20706
20707 break;
20708 }
20709 case ISD::VP_SCATTER: {
20710 const auto *VPSN = cast<VPScatterSDNode>(N);
20711 SDValue Index = VPSN->getIndex();
20712 SDValue ScaleOp = VPSN->getScale();
20713 ISD::MemIndexType IndexType = VPSN->getIndexType();
20714 assert(!VPSN->isIndexScaled() &&
20715 "Scaled gather/scatter should not be formed");
20716
20717 SDLoc DL(N);
20718 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20719 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20720 {VPSN->getChain(), VPSN->getValue(),
20721 VPSN->getBasePtr(), Index, ScaleOp,
20722 VPSN->getMask(), VPSN->getVectorLength()},
20723 VPSN->getMemOperand(), IndexType);
20724
20725 if (narrowIndex(Index, IndexType, DAG))
20726 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20727 {VPSN->getChain(), VPSN->getValue(),
20728 VPSN->getBasePtr(), Index, ScaleOp,
20729 VPSN->getMask(), VPSN->getVectorLength()},
20730 VPSN->getMemOperand(), IndexType);
20731 break;
20732 }
20733 case RISCVISD::SHL_VL:
20734 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20735 return V;
20736 [[fallthrough]];
20737 case RISCVISD::SRA_VL:
20738 case RISCVISD::SRL_VL: {
20739 SDValue ShAmt = N->getOperand(1);
20740 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20741 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20742 SDLoc DL(N);
20743 SDValue VL = N->getOperand(4);
20744 EVT VT = N->getValueType(0);
20745 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20746 ShAmt.getOperand(1), VL);
20747 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20748 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20749 }
20750 break;
20751 }
20752 case ISD::SRA:
20753 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20754 return V;
20755 [[fallthrough]];
20756 case ISD::SRL:
20757 case ISD::SHL: {
20758 if (N->getOpcode() == ISD::SHL) {
20759 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20760 return V;
20761 }
20762 SDValue ShAmt = N->getOperand(1);
20763 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20764 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20765 SDLoc DL(N);
20766 EVT VT = N->getValueType(0);
20767 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20768 ShAmt.getOperand(1),
20769 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20770 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20771 }
20772 break;
20773 }
20774 case RISCVISD::ADD_VL:
20775 if (SDValue V = simplifyOp_VL(N))
20776 return V;
20777 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20778 return V;
20779 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20780 return V;
20781 return combineToVWMACC(N, DAG, Subtarget);
20782 case RISCVISD::VWADD_W_VL:
20783 case RISCVISD::VWADDU_W_VL:
20784 case RISCVISD::VWSUB_W_VL:
20785 case RISCVISD::VWSUBU_W_VL:
20786 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20787 case RISCVISD::OR_VL:
20788 case RISCVISD::SUB_VL:
20789 case RISCVISD::MUL_VL:
20790 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20791 case RISCVISD::VFMADD_VL:
20792 case RISCVISD::VFNMADD_VL:
20793 case RISCVISD::VFMSUB_VL:
20794 case RISCVISD::VFNMSUB_VL:
20795 case RISCVISD::STRICT_VFMADD_VL:
20796 case RISCVISD::STRICT_VFNMADD_VL:
20797 case RISCVISD::STRICT_VFMSUB_VL:
20798 case RISCVISD::STRICT_VFNMSUB_VL:
20799 return performVFMADD_VLCombine(N, DCI, Subtarget);
20800 case RISCVISD::FADD_VL:
20801 case RISCVISD::FSUB_VL:
20802 case RISCVISD::FMUL_VL:
20803 case RISCVISD::VFWADD_W_VL:
20804 case RISCVISD::VFWSUB_W_VL:
20805 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20806 case ISD::LOAD:
20807 case ISD::STORE: {
20808 if (DCI.isAfterLegalizeDAG())
20809 if (SDValue V = performMemPairCombine(N, DCI))
20810 return V;
20811
20812 if (N->getOpcode() != ISD::STORE)
20813 break;
20814
20815 auto *Store = cast<StoreSDNode>(N);
20816 SDValue Chain = Store->getChain();
20817 EVT MemVT = Store->getMemoryVT();
20818 SDValue Val = Store->getValue();
20819 SDLoc DL(N);
20820
20821 bool IsScalarizable =
20822 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20823 Store->isSimple() &&
20824 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20825 isPowerOf2_64(MemVT.getSizeInBits()) &&
20826 MemVT.getSizeInBits() <= Subtarget.getXLen();
20827
20828 // If sufficiently aligned we can scalarize stores of constant vectors of
20829 // any power-of-two size up to XLen bits, provided that they aren't too
20830 // expensive to materialize.
20831 // vsetivli zero, 2, e8, m1, ta, ma
20832 // vmv.v.i v8, 4
20833 // vse64.v v8, (a0)
20834 // ->
20835 // li a1, 1028
20836 // sh a1, 0(a0)
20837 if (DCI.isBeforeLegalize() && IsScalarizable &&
20839 // Get the constant vector bits
20840 APInt NewC(Val.getValueSizeInBits(), 0);
20841 uint64_t EltSize = Val.getScalarValueSizeInBits();
20842 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20843 if (Val.getOperand(i).isUndef())
20844 continue;
20845 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20846 i * EltSize);
20847 }
20848 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20849
20850 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20851 true) <= 2 &&
20853 NewVT, *Store->getMemOperand())) {
20854 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20855 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20856 Store->getPointerInfo(), Store->getBaseAlign(),
20857 Store->getMemOperand()->getFlags());
20858 }
20859 }
20860
20861 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20862 // vsetivli zero, 2, e16, m1, ta, ma
20863 // vle16.v v8, (a0)
20864 // vse16.v v8, (a1)
20865 if (auto *L = dyn_cast<LoadSDNode>(Val);
20866 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20867 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20868 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20869 L->getMemoryVT() == MemVT) {
20870 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20872 NewVT, *Store->getMemOperand()) &&
20874 NewVT, *L->getMemOperand())) {
20875 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20876 L->getPointerInfo(), L->getBaseAlign(),
20877 L->getMemOperand()->getFlags());
20878 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20879 Store->getPointerInfo(), Store->getBaseAlign(),
20880 Store->getMemOperand()->getFlags());
20881 }
20882 }
20883
20884 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20885 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20886 // any illegal types.
20887 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20888 (DCI.isAfterLegalizeDAG() &&
20890 isNullConstant(Val.getOperand(1)))) &&
20891 Val.hasOneUse()) {
20892 SDValue Src = Val.getOperand(0);
20893 MVT VecVT = Src.getSimpleValueType();
20894 // VecVT should be scalable and memory VT should match the element type.
20895 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20896 MemVT == VecVT.getVectorElementType()) {
20897 SDLoc DL(N);
20898 MVT MaskVT = getMaskTypeFor(VecVT);
20899 return DAG.getStoreVP(
20900 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20901 DAG.getConstant(1, DL, MaskVT),
20902 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20903 Store->getMemOperand(), Store->getAddressingMode(),
20904 Store->isTruncatingStore(), /*IsCompress*/ false);
20905 }
20906 }
20907
20908 break;
20909 }
20910 case ISD::SPLAT_VECTOR: {
20911 EVT VT = N->getValueType(0);
20912 // Only perform this combine on legal MVT types.
20913 if (!isTypeLegal(VT))
20914 break;
20915 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20916 DAG, Subtarget))
20917 return Gather;
20918 break;
20919 }
20920 case ISD::BUILD_VECTOR:
20921 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20922 return V;
20923 break;
20925 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20926 return V;
20927 break;
20929 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20930 return V;
20931 break;
20933 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20934 return V;
20935 break;
20936 case RISCVISD::VFMV_V_F_VL: {
20937 const MVT VT = N->getSimpleValueType(0);
20938 SDValue Passthru = N->getOperand(0);
20939 SDValue Scalar = N->getOperand(1);
20940 SDValue VL = N->getOperand(2);
20941
20942 // If VL is 1, we can use vfmv.s.f.
20943 if (isOneConstant(VL))
20944 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20945 break;
20946 }
20947 case RISCVISD::VMV_V_X_VL: {
20948 const MVT VT = N->getSimpleValueType(0);
20949 SDValue Passthru = N->getOperand(0);
20950 SDValue Scalar = N->getOperand(1);
20951 SDValue VL = N->getOperand(2);
20952
20953 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20954 // scalar input.
20955 unsigned ScalarSize = Scalar.getValueSizeInBits();
20956 unsigned EltWidth = VT.getScalarSizeInBits();
20957 if (ScalarSize > EltWidth && Passthru.isUndef())
20958 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20959 return SDValue(N, 0);
20960
20961 // If VL is 1 and the scalar value won't benefit from immediate, we can
20962 // use vmv.s.x.
20964 if (isOneConstant(VL) &&
20965 (!Const || Const->isZero() ||
20966 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20967 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20968
20969 break;
20970 }
20971 case RISCVISD::VFMV_S_F_VL: {
20972 SDValue Src = N->getOperand(1);
20973 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20974 // into an undef vector.
20975 // TODO: Could use a vslide or vmv.v.v for non-undef.
20976 if (N->getOperand(0).isUndef() &&
20977 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20978 isNullConstant(Src.getOperand(1)) &&
20979 Src.getOperand(0).getValueType().isScalableVector()) {
20980 EVT VT = N->getValueType(0);
20981 SDValue EVSrc = Src.getOperand(0);
20982 EVT EVSrcVT = EVSrc.getValueType();
20984 // Widths match, just return the original vector.
20985 if (EVSrcVT == VT)
20986 return EVSrc;
20987 SDLoc DL(N);
20988 // Width is narrower, using insert_subvector.
20989 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20990 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20991 EVSrc,
20992 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20993 }
20994 // Width is wider, using extract_subvector.
20995 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20996 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20997 }
20998 [[fallthrough]];
20999 }
21000 case RISCVISD::VMV_S_X_VL: {
21001 const MVT VT = N->getSimpleValueType(0);
21002 SDValue Passthru = N->getOperand(0);
21003 SDValue Scalar = N->getOperand(1);
21004 SDValue VL = N->getOperand(2);
21005
21006 // The vmv.s.x instruction copies the scalar integer register to element 0
21007 // of the destination vector register. If SEW < XLEN, the least-significant
21008 // bits are copied and the upper XLEN-SEW bits are ignored.
21009 unsigned ScalarSize = Scalar.getValueSizeInBits();
21010 unsigned EltWidth = VT.getScalarSizeInBits();
21011 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21012 return SDValue(N, 0);
21013
21014 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21015 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21016 return Scalar.getOperand(0);
21017
21018 // Use M1 or smaller to avoid over constraining register allocation
21019 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21020 if (M1VT.bitsLT(VT)) {
21021 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21022 SDValue Result =
21023 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21024 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21025 return Result;
21026 }
21027
21028 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21029 // higher would involve overly constraining the register allocator for
21030 // no purpose.
21031 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21032 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21033 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21034 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21035
21036 break;
21037 }
21038 case RISCVISD::VMV_X_S: {
21039 SDValue Vec = N->getOperand(0);
21040 MVT VecVT = N->getOperand(0).getSimpleValueType();
21041 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21042 if (M1VT.bitsLT(VecVT)) {
21043 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21044 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21045 }
21046 break;
21047 }
21051 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21052 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21053 switch (IntNo) {
21054 // By default we do not combine any intrinsic.
21055 default:
21056 return SDValue();
21057 case Intrinsic::riscv_vcpop:
21058 case Intrinsic::riscv_vcpop_mask:
21059 case Intrinsic::riscv_vfirst:
21060 case Intrinsic::riscv_vfirst_mask: {
21061 SDValue VL = N->getOperand(2);
21062 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21063 IntNo == Intrinsic::riscv_vfirst_mask)
21064 VL = N->getOperand(3);
21065 if (!isNullConstant(VL))
21066 return SDValue();
21067 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21068 SDLoc DL(N);
21069 EVT VT = N->getValueType(0);
21070 if (IntNo == Intrinsic::riscv_vfirst ||
21071 IntNo == Intrinsic::riscv_vfirst_mask)
21072 return DAG.getAllOnesConstant(DL, VT);
21073 return DAG.getConstant(0, DL, VT);
21074 }
21075 case Intrinsic::riscv_vsseg2_mask:
21076 case Intrinsic::riscv_vsseg3_mask:
21077 case Intrinsic::riscv_vsseg4_mask:
21078 case Intrinsic::riscv_vsseg5_mask:
21079 case Intrinsic::riscv_vsseg6_mask:
21080 case Intrinsic::riscv_vsseg7_mask:
21081 case Intrinsic::riscv_vsseg8_mask: {
21082 SDValue Tuple = N->getOperand(2);
21083 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21084
21085 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21086 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21087 !Tuple.getOperand(0).isUndef())
21088 return SDValue();
21089
21090 SDValue Val = Tuple.getOperand(1);
21091 unsigned Idx = Tuple.getConstantOperandVal(2);
21092
21093 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21094 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21095 "Type mismatch without bitcast?");
21096 unsigned Stride = SEW / 8 * NF;
21097 unsigned Offset = SEW / 8 * Idx;
21098
21099 SDValue Ops[] = {
21100 /*Chain=*/N->getOperand(0),
21101 /*IntID=*/
21102 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21103 /*StoredVal=*/Val,
21104 /*Ptr=*/
21105 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21106 DAG.getConstant(Offset, DL, XLenVT)),
21107 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21108 /*Mask=*/N->getOperand(4),
21109 /*VL=*/N->getOperand(5)};
21110
21111 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21112 // Match getTgtMemIntrinsic for non-unit stride case
21113 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21116 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21117
21118 SDVTList VTs = DAG.getVTList(MVT::Other);
21119 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21120 MMO);
21121 }
21122 }
21123 }
21124 case ISD::EXPERIMENTAL_VP_REVERSE:
21125 return performVP_REVERSECombine(N, DAG, Subtarget);
21126 case ISD::VP_STORE:
21127 return performVP_STORECombine(N, DAG, Subtarget);
21128 case ISD::BITCAST: {
21129 assert(Subtarget.useRVVForFixedLengthVectors());
21130 SDValue N0 = N->getOperand(0);
21131 EVT VT = N->getValueType(0);
21132 EVT SrcVT = N0.getValueType();
21133 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21134 unsigned NF = VT.getRISCVVectorTupleNumFields();
21135 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21136 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21137 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21138
21139 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21140
21141 SDValue Result = DAG.getUNDEF(VT);
21142 for (unsigned i = 0; i < NF; ++i)
21143 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21144 DAG.getTargetConstant(i, DL, MVT::i32));
21145 return Result;
21146 }
21147 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21148 // type, widen both sides to avoid a trip through memory.
21149 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21150 VT.isScalarInteger()) {
21151 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21152 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21153 Ops[0] = N0;
21154 SDLoc DL(N);
21155 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21156 N0 = DAG.getBitcast(MVT::i8, N0);
21157 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21158 }
21159
21160 return SDValue();
21161 }
21162 case ISD::VECREDUCE_ADD:
21163 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21164 return V;
21165 [[fallthrough]];
21166 case ISD::CTPOP:
21167 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21168 return V;
21169 break;
21170 case RISCVISD::VRGATHER_VX_VL: {
21171 // Note this assumes that out of bounds indices produce poison
21172 // and can thus be replaced without having to prove them inbounds..
21173 EVT VT = N->getValueType(0);
21174 SDValue Src = N->getOperand(0);
21175 SDValue Idx = N->getOperand(1);
21176 SDValue Passthru = N->getOperand(2);
21177 SDValue VL = N->getOperand(4);
21178
21179 // Warning: Unlike most cases we strip an insert_subvector, this one
21180 // does not require the first operand to be undef.
21181 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21182 isNullConstant(Src.getOperand(2)))
21183 Src = Src.getOperand(1);
21184
21185 switch (Src.getOpcode()) {
21186 default:
21187 break;
21188 case RISCVISD::VMV_V_X_VL:
21189 case RISCVISD::VFMV_V_F_VL:
21190 // Drop a redundant vrgather_vx.
21191 // TODO: Remove the type restriction if we find a motivating
21192 // test case?
21193 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21194 Src.getValueType() == VT)
21195 return Src;
21196 break;
21197 case RISCVISD::VMV_S_X_VL:
21198 case RISCVISD::VFMV_S_F_VL:
21199 // If this use only demands lane zero from the source vmv.s.x, and
21200 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21201 // a vmv.v.x. Note that there can be other uses of the original
21202 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21203 if (isNullConstant(Idx) && Passthru.isUndef() &&
21204 VL == Src.getOperand(2)) {
21205 unsigned Opc =
21206 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21207 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21208 VL);
21209 }
21210 break;
21211 }
21212 break;
21213 }
21214 case RISCVISD::TUPLE_EXTRACT: {
21215 EVT VT = N->getValueType(0);
21216 SDValue Tuple = N->getOperand(0);
21217 unsigned Idx = N->getConstantOperandVal(1);
21218 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21219 break;
21220
21221 unsigned NF = 0;
21222 switch (Tuple.getConstantOperandVal(1)) {
21223 default:
21224 break;
21225 case Intrinsic::riscv_vlseg2_mask:
21226 case Intrinsic::riscv_vlseg3_mask:
21227 case Intrinsic::riscv_vlseg4_mask:
21228 case Intrinsic::riscv_vlseg5_mask:
21229 case Intrinsic::riscv_vlseg6_mask:
21230 case Intrinsic::riscv_vlseg7_mask:
21231 case Intrinsic::riscv_vlseg8_mask:
21232 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21233 break;
21234 }
21235
21236 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21237 break;
21238
21239 unsigned SEW = VT.getScalarSizeInBits();
21240 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21241 "Type mismatch without bitcast?");
21242 unsigned Stride = SEW / 8 * NF;
21243 unsigned Offset = SEW / 8 * Idx;
21244
21245 SDValue Ops[] = {
21246 /*Chain=*/Tuple.getOperand(0),
21247 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21248 /*Passthru=*/Tuple.getOperand(2),
21249 /*Ptr=*/
21250 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21251 DAG.getConstant(Offset, DL, XLenVT)),
21252 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21253 /*Mask=*/Tuple.getOperand(4),
21254 /*VL=*/Tuple.getOperand(5),
21255 /*Policy=*/Tuple.getOperand(6)};
21256
21257 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21258 // Match getTgtMemIntrinsic for non-unit stride case
21259 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21262 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21263
21264 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21266 Ops, MemVT, MMO);
21267 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21268 return Result.getValue(0);
21269 }
21270 case RISCVISD::TUPLE_INSERT: {
21271 // tuple_insert tuple, undef, idx -> tuple
21272 if (N->getOperand(1).isUndef())
21273 return N->getOperand(0);
21274 break;
21275 }
21276 case RISCVISD::VSLIDE1UP_VL:
21277 case RISCVISD::VFSLIDE1UP_VL: {
21278 using namespace SDPatternMatch;
21279 SDValue SrcVec;
21280 SDLoc DL(N);
21281 MVT VT = N->getSimpleValueType(0);
21282 // If the scalar we're sliding in was extracted from the first element of a
21283 // vector, we can use that vector as the passthru in a normal slideup of 1.
21284 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21285 if (!N->getOperand(0).isUndef() ||
21286 !sd_match(N->getOperand(2),
21287 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21288 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21289 break;
21290
21291 MVT SrcVecVT = SrcVec.getSimpleValueType();
21292 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21293 break;
21294 // Adapt the value type of source vector.
21295 if (SrcVecVT.isFixedLengthVector()) {
21296 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21297 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21298 }
21300 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21301 else
21302 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21303
21304 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21305 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21306 N->getOperand(4));
21307 }
21308 }
21309
21310 return SDValue();
21311}
21312
21314 EVT XVT, unsigned KeptBits) const {
21315 // For vectors, we don't have a preference..
21316 if (XVT.isVector())
21317 return false;
21318
21319 if (XVT != MVT::i32 && XVT != MVT::i64)
21320 return false;
21321
21322 // We can use sext.w for RV64 or an srai 31 on RV32.
21323 if (KeptBits == 32 || KeptBits == 64)
21324 return true;
21325
21326 // With Zbb we can use sext.h/sext.b.
21327 return Subtarget.hasStdExtZbb() &&
21328 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21329 KeptBits == 16);
21330}
21331
21333 const SDNode *N, CombineLevel Level) const {
21334 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21335 N->getOpcode() == ISD::SRL) &&
21336 "Expected shift op");
21337
21338 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21339 // materialised in fewer instructions than `(OP _, c1)`:
21340 //
21341 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21342 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21343 SDValue N0 = N->getOperand(0);
21344 EVT Ty = N0.getValueType();
21345
21346 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21347 // LD/ST, it can still complete the folding optimization operation performed
21348 // above.
21349 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21350 for (SDNode *Use : X->users()) {
21351 // This use is the one we're on right now. Skip it
21352 if (Use == User || Use->getOpcode() == ISD::SELECT)
21353 continue;
21355 return false;
21356 }
21357 return true;
21358 };
21359
21360 if (Ty.isScalarInteger() &&
21361 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21362 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21363 return isUsedByLdSt(N0.getNode(), N);
21364
21365 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21366 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21367
21368 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21369 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21370 N->user_begin()->getOpcode() == ISD::ADD &&
21371 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21372 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21373 return false;
21374
21375 if (C1 && C2) {
21376 const APInt &C1Int = C1->getAPIntValue();
21377 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21378
21379 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21380 // and the combine should happen, to potentially allow further combines
21381 // later.
21382 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21383 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21384 return true;
21385
21386 // We can materialise `c1` in an add immediate, so it's "free", and the
21387 // combine should be prevented.
21388 if (C1Int.getSignificantBits() <= 64 &&
21390 return false;
21391
21392 // Neither constant will fit into an immediate, so find materialisation
21393 // costs.
21394 int C1Cost =
21395 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21396 /*CompressionCost*/ true);
21397 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21398 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21399 /*CompressionCost*/ true);
21400
21401 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21402 // combine should be prevented.
21403 if (C1Cost < ShiftedC1Cost)
21404 return false;
21405 }
21406 }
21407
21408 if (!N0->hasOneUse())
21409 return false;
21410
21411 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21412 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21413 !N0->getOperand(0)->hasOneUse())
21414 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21415
21416 return true;
21417}
21418
21420 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21421 TargetLoweringOpt &TLO) const {
21422 // Delay this optimization as late as possible.
21423 if (!TLO.LegalOps)
21424 return false;
21425
21426 EVT VT = Op.getValueType();
21427 if (VT.isVector())
21428 return false;
21429
21430 unsigned Opcode = Op.getOpcode();
21431 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21432 return false;
21433
21434 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21435 if (!C)
21436 return false;
21437
21438 const APInt &Mask = C->getAPIntValue();
21439
21440 // Clear all non-demanded bits initially.
21441 APInt ShrunkMask = Mask & DemandedBits;
21442
21443 // Try to make a smaller immediate by setting undemanded bits.
21444
21445 APInt ExpandedMask = Mask | ~DemandedBits;
21446
21447 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21448 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21449 };
21450 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21451 if (NewMask == Mask)
21452 return true;
21453 SDLoc DL(Op);
21454 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21455 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21456 Op.getOperand(0), NewC);
21457 return TLO.CombineTo(Op, NewOp);
21458 };
21459
21460 // If the shrunk mask fits in sign extended 12 bits, let the target
21461 // independent code apply it.
21462 if (ShrunkMask.isSignedIntN(12))
21463 return false;
21464
21465 // And has a few special cases for zext.
21466 if (Opcode == ISD::AND) {
21467 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21468 // otherwise use SLLI + SRLI.
21469 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21470 if (IsLegalMask(NewMask))
21471 return UseMask(NewMask);
21472
21473 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21474 if (VT == MVT::i64) {
21475 APInt NewMask = APInt(64, 0xffffffff);
21476 if (IsLegalMask(NewMask))
21477 return UseMask(NewMask);
21478 }
21479 }
21480
21481 // For the remaining optimizations, we need to be able to make a negative
21482 // number through a combination of mask and undemanded bits.
21483 if (!ExpandedMask.isNegative())
21484 return false;
21485
21486 // What is the fewest number of bits we need to represent the negative number.
21487 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21488
21489 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21490 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21491 // If we can't create a simm12, we shouldn't change opaque constants.
21492 APInt NewMask = ShrunkMask;
21493 if (MinSignedBits <= 12)
21494 NewMask.setBitsFrom(11);
21495 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21496 NewMask.setBitsFrom(31);
21497 else
21498 return false;
21499
21500 // Check that our new mask is a subset of the demanded mask.
21501 assert(IsLegalMask(NewMask));
21502 return UseMask(NewMask);
21503}
21504
21505static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21506 static const uint64_t GREVMasks[] = {
21507 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21508 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21509
21510 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21511 unsigned Shift = 1 << Stage;
21512 if (ShAmt & Shift) {
21513 uint64_t Mask = GREVMasks[Stage];
21514 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21515 if (IsGORC)
21516 Res |= x;
21517 x = Res;
21518 }
21519 }
21520
21521 return x;
21522}
21523
21525 KnownBits &Known,
21526 const APInt &DemandedElts,
21527 const SelectionDAG &DAG,
21528 unsigned Depth) const {
21529 unsigned BitWidth = Known.getBitWidth();
21530 unsigned Opc = Op.getOpcode();
21535 "Should use MaskedValueIsZero if you don't know whether Op"
21536 " is a target node!");
21537
21538 Known.resetAll();
21539 switch (Opc) {
21540 default: break;
21541 case RISCVISD::SELECT_CC: {
21542 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21543 // If we don't know any bits, early out.
21544 if (Known.isUnknown())
21545 break;
21546 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21547
21548 // Only known if known in both the LHS and RHS.
21549 Known = Known.intersectWith(Known2);
21550 break;
21551 }
21552 case RISCVISD::VCPOP_VL: {
21553 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21554 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21555 break;
21556 }
21557 case RISCVISD::CZERO_EQZ:
21558 case RISCVISD::CZERO_NEZ:
21559 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21560 // Result is either all zero or operand 0. We can propagate zeros, but not
21561 // ones.
21562 Known.One.clearAllBits();
21563 break;
21564 case RISCVISD::REMUW: {
21565 KnownBits Known2;
21566 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21567 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21568 // We only care about the lower 32 bits.
21569 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21570 // Restore the original width by sign extending.
21571 Known = Known.sext(BitWidth);
21572 break;
21573 }
21574 case RISCVISD::DIVUW: {
21575 KnownBits Known2;
21576 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21577 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21578 // We only care about the lower 32 bits.
21579 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21580 // Restore the original width by sign extending.
21581 Known = Known.sext(BitWidth);
21582 break;
21583 }
21584 case RISCVISD::SLLW: {
21585 KnownBits Known2;
21586 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21587 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21588 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21589 // Restore the original width by sign extending.
21590 Known = Known.sext(BitWidth);
21591 break;
21592 }
21593 case RISCVISD::SRLW: {
21594 KnownBits Known2;
21595 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21596 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21597 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21598 // Restore the original width by sign extending.
21599 Known = Known.sext(BitWidth);
21600 break;
21601 }
21602 case RISCVISD::SRAW: {
21603 KnownBits Known2;
21604 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21605 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21606 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21607 // Restore the original width by sign extending.
21608 Known = Known.sext(BitWidth);
21609 break;
21610 }
21611 case RISCVISD::SHL_ADD: {
21612 KnownBits Known2;
21613 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21614 unsigned ShAmt = Op.getConstantOperandVal(1);
21615 Known <<= ShAmt;
21616 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
21617 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
21618 Known = KnownBits::add(Known, Known2);
21619 break;
21620 }
21621 case RISCVISD::CTZW: {
21622 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21623 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21624 unsigned LowBits = llvm::bit_width(PossibleTZ);
21625 Known.Zero.setBitsFrom(LowBits);
21626 break;
21627 }
21628 case RISCVISD::CLZW: {
21629 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21630 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21631 unsigned LowBits = llvm::bit_width(PossibleLZ);
21632 Known.Zero.setBitsFrom(LowBits);
21633 break;
21634 }
21635 case RISCVISD::BREV8:
21636 case RISCVISD::ORC_B: {
21637 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21638 // control value of 7 is equivalent to brev8 and orc.b.
21639 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21640 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21641 // To compute zeros for ORC_B, we need to invert the value and invert it
21642 // back after. This inverting is harmless for BREV8.
21643 Known.Zero =
21644 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21645 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21646 break;
21647 }
21648 case RISCVISD::READ_VLENB: {
21649 // We can use the minimum and maximum VLEN values to bound VLENB. We
21650 // know VLEN must be a power of two.
21651 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21652 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21653 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21654 Known.Zero.setLowBits(Log2_32(MinVLenB));
21655 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21656 if (MaxVLenB == MinVLenB)
21657 Known.One.setBit(Log2_32(MinVLenB));
21658 break;
21659 }
21660 case RISCVISD::FCLASS: {
21661 // fclass will only set one of the low 10 bits.
21662 Known.Zero.setBitsFrom(10);
21663 break;
21664 }
21667 unsigned IntNo =
21668 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21669 switch (IntNo) {
21670 default:
21671 // We can't do anything for most intrinsics.
21672 break;
21673 case Intrinsic::riscv_vsetvli:
21674 case Intrinsic::riscv_vsetvlimax: {
21675 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21676 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21677 RISCVVType::VLMUL VLMUL =
21678 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21679 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21680 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21681 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21682 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21683
21684 // Result of vsetvli must be not larger than AVL.
21685 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21686 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21687
21688 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21689 if (BitWidth > KnownZeroFirstBit)
21690 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21691 break;
21692 }
21693 }
21694 break;
21695 }
21696 }
21697}
21698
21700 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21701 unsigned Depth) const {
21702 switch (Op.getOpcode()) {
21703 default:
21704 break;
21705 case RISCVISD::SELECT_CC: {
21706 unsigned Tmp =
21707 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21708 if (Tmp == 1) return 1; // Early out.
21709 unsigned Tmp2 =
21710 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21711 return std::min(Tmp, Tmp2);
21712 }
21713 case RISCVISD::CZERO_EQZ:
21714 case RISCVISD::CZERO_NEZ:
21715 // Output is either all zero or operand 0. We can propagate sign bit count
21716 // from operand 0.
21717 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21718 case RISCVISD::ABSW: {
21719 // We expand this at isel to negw+max. The result will have 33 sign bits
21720 // if the input has at least 33 sign bits.
21721 unsigned Tmp =
21722 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21723 if (Tmp < 33) return 1;
21724 return 33;
21725 }
21726 case RISCVISD::SRAW: {
21727 unsigned Tmp =
21728 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21729 // sraw produces at least 33 sign bits. If the input already has more than
21730 // 33 sign bits sraw, will preserve them.
21731 // TODO: A more precise answer could be calculated depending on known bits
21732 // in the shift amount.
21733 return std::max(Tmp, 33U);
21734 }
21735 case RISCVISD::SLLW:
21736 case RISCVISD::SRLW:
21737 case RISCVISD::DIVW:
21738 case RISCVISD::DIVUW:
21739 case RISCVISD::REMUW:
21740 case RISCVISD::ROLW:
21741 case RISCVISD::RORW:
21742 case RISCVISD::FCVT_W_RV64:
21743 case RISCVISD::FCVT_WU_RV64:
21744 case RISCVISD::STRICT_FCVT_W_RV64:
21745 case RISCVISD::STRICT_FCVT_WU_RV64:
21746 // TODO: As the result is sign-extended, this is conservatively correct.
21747 return 33;
21748 case RISCVISD::VMV_X_S: {
21749 // The number of sign bits of the scalar result is computed by obtaining the
21750 // element type of the input vector operand, subtracting its width from the
21751 // XLEN, and then adding one (sign bit within the element type). If the
21752 // element type is wider than XLen, the least-significant XLEN bits are
21753 // taken.
21754 unsigned XLen = Subtarget.getXLen();
21755 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21756 if (EltBits <= XLen)
21757 return XLen - EltBits + 1;
21758 break;
21759 }
21761 unsigned IntNo = Op.getConstantOperandVal(1);
21762 switch (IntNo) {
21763 default:
21764 break;
21765 case Intrinsic::riscv_masked_atomicrmw_xchg:
21766 case Intrinsic::riscv_masked_atomicrmw_add:
21767 case Intrinsic::riscv_masked_atomicrmw_sub:
21768 case Intrinsic::riscv_masked_atomicrmw_nand:
21769 case Intrinsic::riscv_masked_atomicrmw_max:
21770 case Intrinsic::riscv_masked_atomicrmw_min:
21771 case Intrinsic::riscv_masked_atomicrmw_umax:
21772 case Intrinsic::riscv_masked_atomicrmw_umin:
21773 case Intrinsic::riscv_masked_cmpxchg:
21774 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21775 // narrow atomic operation. These are implemented using atomic
21776 // operations at the minimum supported atomicrmw/cmpxchg width whose
21777 // result is then sign extended to XLEN. With +A, the minimum width is
21778 // 32 for both 64 and 32.
21780 assert(Subtarget.hasStdExtA());
21781 return Op.getValueSizeInBits() - 31;
21782 }
21783 break;
21784 }
21785 }
21786
21787 return 1;
21788}
21789
21791 SDValue Op, const APInt &OriginalDemandedBits,
21792 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21793 unsigned Depth) const {
21794 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21795
21796 switch (Op.getOpcode()) {
21797 case RISCVISD::BREV8:
21798 case RISCVISD::ORC_B: {
21799 KnownBits Known2;
21800 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21801 // For BREV8, we need to do BREV8 on the demanded bits.
21802 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21803 // So we need to do ORC_B on the demanded bits.
21805 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21806 7, IsGORC));
21807 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21808 OriginalDemandedElts, Known2, TLO, Depth + 1))
21809 return true;
21810
21811 // To compute zeros for ORC_B, we need to invert the value and invert it
21812 // back after. This inverting is harmless for BREV8.
21813 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21814 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21815 return false;
21816 }
21817 }
21818
21820 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21821}
21822
21824 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21825 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21826
21827 // TODO: Add more target nodes.
21828 switch (Op.getOpcode()) {
21829 case RISCVISD::SLLW:
21830 case RISCVISD::SRAW:
21831 case RISCVISD::SRLW:
21832 case RISCVISD::RORW:
21833 case RISCVISD::ROLW:
21834 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21835 // amount is bounds.
21836 return false;
21837 case RISCVISD::SELECT_CC:
21838 // Integer comparisons cannot create poison.
21839 assert(Op.getOperand(0).getValueType().isInteger() &&
21840 "RISCVISD::SELECT_CC only compares integers");
21841 return false;
21842 }
21844 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21845}
21846
21847const Constant *
21849 assert(Ld && "Unexpected null LoadSDNode");
21850 if (!ISD::isNormalLoad(Ld))
21851 return nullptr;
21852
21853 SDValue Ptr = Ld->getBasePtr();
21854
21855 // Only constant pools with no offset are supported.
21856 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21857 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21858 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21859 CNode->getOffset() != 0)
21860 return nullptr;
21861
21862 return CNode;
21863 };
21864
21865 // Simple case, LLA.
21866 if (Ptr.getOpcode() == RISCVISD::LLA) {
21867 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21868 if (!CNode || CNode->getTargetFlags() != 0)
21869 return nullptr;
21870
21871 return CNode->getConstVal();
21872 }
21873
21874 // Look for a HI and ADD_LO pair.
21875 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21876 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21877 return nullptr;
21878
21879 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21880 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21881
21882 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21883 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21884 return nullptr;
21885
21886 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21887 return nullptr;
21888
21889 return CNodeLo->getConstVal();
21890}
21891
21893 MachineBasicBlock *BB) {
21894 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21895
21896 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21897 // Should the count have wrapped while it was being read, we need to try
21898 // again.
21899 // For example:
21900 // ```
21901 // read:
21902 // csrrs x3, counterh # load high word of counter
21903 // csrrs x2, counter # load low word of counter
21904 // csrrs x4, counterh # load high word of counter
21905 // bne x3, x4, read # check if high word reads match, otherwise try again
21906 // ```
21907
21908 MachineFunction &MF = *BB->getParent();
21909 const BasicBlock *LLVMBB = BB->getBasicBlock();
21911
21912 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21913 MF.insert(It, LoopMBB);
21914
21915 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21916 MF.insert(It, DoneMBB);
21917
21918 // Transfer the remainder of BB and its successor edges to DoneMBB.
21919 DoneMBB->splice(DoneMBB->begin(), BB,
21920 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21922
21923 BB->addSuccessor(LoopMBB);
21924
21926 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21927 Register LoReg = MI.getOperand(0).getReg();
21928 Register HiReg = MI.getOperand(1).getReg();
21929 int64_t LoCounter = MI.getOperand(2).getImm();
21930 int64_t HiCounter = MI.getOperand(3).getImm();
21931 DebugLoc DL = MI.getDebugLoc();
21932
21934 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21935 .addImm(HiCounter)
21936 .addReg(RISCV::X0);
21937 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21938 .addImm(LoCounter)
21939 .addReg(RISCV::X0);
21940 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21941 .addImm(HiCounter)
21942 .addReg(RISCV::X0);
21943
21944 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21945 .addReg(HiReg)
21946 .addReg(ReadAgainReg)
21947 .addMBB(LoopMBB);
21948
21949 LoopMBB->addSuccessor(LoopMBB);
21950 LoopMBB->addSuccessor(DoneMBB);
21951
21952 MI.eraseFromParent();
21953
21954 return DoneMBB;
21955}
21956
21959 const RISCVSubtarget &Subtarget) {
21960 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21961
21962 MachineFunction &MF = *BB->getParent();
21963 DebugLoc DL = MI.getDebugLoc();
21966 Register LoReg = MI.getOperand(0).getReg();
21967 Register HiReg = MI.getOperand(1).getReg();
21968 Register SrcReg = MI.getOperand(2).getReg();
21969
21970 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21971 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21972
21973 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21974 RI, Register());
21976 MachineMemOperand *MMOLo =
21980 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21981 .addFrameIndex(FI)
21982 .addImm(0)
21983 .addMemOperand(MMOLo);
21984 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21985 .addFrameIndex(FI)
21986 .addImm(4)
21987 .addMemOperand(MMOHi);
21988 MI.eraseFromParent(); // The pseudo instruction is gone now.
21989 return BB;
21990}
21991
21994 const RISCVSubtarget &Subtarget) {
21995 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21996 "Unexpected instruction");
21997
21998 MachineFunction &MF = *BB->getParent();
21999 DebugLoc DL = MI.getDebugLoc();
22002 Register DstReg = MI.getOperand(0).getReg();
22003 Register LoReg = MI.getOperand(1).getReg();
22004 Register HiReg = MI.getOperand(2).getReg();
22005
22006 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
22007 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22008
22010 MachineMemOperand *MMOLo =
22014 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22015 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22016 .addFrameIndex(FI)
22017 .addImm(0)
22018 .addMemOperand(MMOLo);
22019 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22020 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22021 .addFrameIndex(FI)
22022 .addImm(4)
22023 .addMemOperand(MMOHi);
22024 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
22025 MI.eraseFromParent(); // The pseudo instruction is gone now.
22026 return BB;
22027}
22028
22030 unsigned RelOpcode, unsigned EqOpcode,
22031 const RISCVSubtarget &Subtarget) {
22032 DebugLoc DL = MI.getDebugLoc();
22033 Register DstReg = MI.getOperand(0).getReg();
22034 Register Src1Reg = MI.getOperand(1).getReg();
22035 Register Src2Reg = MI.getOperand(2).getReg();
22037 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22039
22040 // Save the current FFLAGS.
22041 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22042
22043 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22044 .addReg(Src1Reg)
22045 .addReg(Src2Reg);
22048
22049 // Restore the FFLAGS.
22050 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22051 .addReg(SavedFFlags, RegState::Kill);
22052
22053 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22054 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22055 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22056 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22059
22060 // Erase the pseudoinstruction.
22061 MI.eraseFromParent();
22062 return BB;
22063}
22064
22065static MachineBasicBlock *
22067 MachineBasicBlock *ThisMBB,
22068 const RISCVSubtarget &Subtarget) {
22069 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22070 // Without this, custom-inserter would have generated:
22071 //
22072 // A
22073 // | \
22074 // | B
22075 // | /
22076 // C
22077 // | \
22078 // | D
22079 // | /
22080 // E
22081 //
22082 // A: X = ...; Y = ...
22083 // B: empty
22084 // C: Z = PHI [X, A], [Y, B]
22085 // D: empty
22086 // E: PHI [X, C], [Z, D]
22087 //
22088 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22089 //
22090 // A
22091 // | \
22092 // | C
22093 // | /|
22094 // |/ |
22095 // | |
22096 // | D
22097 // | /
22098 // E
22099 //
22100 // A: X = ...; Y = ...
22101 // D: empty
22102 // E: PHI [X, A], [X, C], [Y, D]
22103
22104 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22105 const DebugLoc &DL = First.getDebugLoc();
22106 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22107 MachineFunction *F = ThisMBB->getParent();
22108 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22109 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22110 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22111 MachineFunction::iterator It = ++ThisMBB->getIterator();
22112 F->insert(It, FirstMBB);
22113 F->insert(It, SecondMBB);
22114 F->insert(It, SinkMBB);
22115
22116 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22117 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22119 ThisMBB->end());
22120 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22121
22122 // Fallthrough block for ThisMBB.
22123 ThisMBB->addSuccessor(FirstMBB);
22124 // Fallthrough block for FirstMBB.
22125 FirstMBB->addSuccessor(SecondMBB);
22126 ThisMBB->addSuccessor(SinkMBB);
22127 FirstMBB->addSuccessor(SinkMBB);
22128 // This is fallthrough.
22129 SecondMBB->addSuccessor(SinkMBB);
22130
22131 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22132 Register FLHS = First.getOperand(1).getReg();
22133 Register FRHS = First.getOperand(2).getReg();
22134 // Insert appropriate branch.
22135 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22136 .addReg(FLHS)
22137 .addReg(FRHS)
22138 .addMBB(SinkMBB);
22139
22140 Register SLHS = Second.getOperand(1).getReg();
22141 Register SRHS = Second.getOperand(2).getReg();
22142 Register Op1Reg4 = First.getOperand(4).getReg();
22143 Register Op1Reg5 = First.getOperand(5).getReg();
22144
22145 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22146 // Insert appropriate branch.
22147 BuildMI(ThisMBB, DL,
22148 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22149 .addReg(SLHS)
22150 .addReg(SRHS)
22151 .addMBB(SinkMBB);
22152
22153 Register DestReg = Second.getOperand(0).getReg();
22154 Register Op2Reg4 = Second.getOperand(4).getReg();
22155 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22156 .addReg(Op2Reg4)
22157 .addMBB(ThisMBB)
22158 .addReg(Op1Reg4)
22159 .addMBB(FirstMBB)
22160 .addReg(Op1Reg5)
22161 .addMBB(SecondMBB);
22162
22163 // Now remove the Select_FPRX_s.
22164 First.eraseFromParent();
22165 Second.eraseFromParent();
22166 return SinkMBB;
22167}
22168
22171 const RISCVSubtarget &Subtarget) {
22172 // To "insert" Select_* instructions, we actually have to insert the triangle
22173 // control-flow pattern. The incoming instructions know the destination vreg
22174 // to set, the condition code register to branch on, the true/false values to
22175 // select between, and the condcode to use to select the appropriate branch.
22176 //
22177 // We produce the following control flow:
22178 // HeadMBB
22179 // | \
22180 // | IfFalseMBB
22181 // | /
22182 // TailMBB
22183 //
22184 // When we find a sequence of selects we attempt to optimize their emission
22185 // by sharing the control flow. Currently we only handle cases where we have
22186 // multiple selects with the exact same condition (same LHS, RHS and CC).
22187 // The selects may be interleaved with other instructions if the other
22188 // instructions meet some requirements we deem safe:
22189 // - They are not pseudo instructions.
22190 // - They are debug instructions. Otherwise,
22191 // - They do not have side-effects, do not access memory and their inputs do
22192 // not depend on the results of the select pseudo-instructions.
22193 // The TrueV/FalseV operands of the selects cannot depend on the result of
22194 // previous selects in the sequence.
22195 // These conditions could be further relaxed. See the X86 target for a
22196 // related approach and more information.
22197 //
22198 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22199 // is checked here and handled by a separate function -
22200 // EmitLoweredCascadedSelect.
22201
22202 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22203 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22204 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22205 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22206 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22207 Next->getOperand(5).isKill())
22208 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22209
22210 Register LHS = MI.getOperand(1).getReg();
22211 Register RHS;
22212 if (MI.getOperand(2).isReg())
22213 RHS = MI.getOperand(2).getReg();
22214 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22215
22216 SmallVector<MachineInstr *, 4> SelectDebugValues;
22217 SmallSet<Register, 4> SelectDests;
22218 SelectDests.insert(MI.getOperand(0).getReg());
22219
22220 MachineInstr *LastSelectPseudo = &MI;
22221 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22222 SequenceMBBI != E; ++SequenceMBBI) {
22223 if (SequenceMBBI->isDebugInstr())
22224 continue;
22225 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22226 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22227 !SequenceMBBI->getOperand(2).isReg() ||
22228 SequenceMBBI->getOperand(2).getReg() != RHS ||
22229 SequenceMBBI->getOperand(3).getImm() != CC ||
22230 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22231 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22232 break;
22233 LastSelectPseudo = &*SequenceMBBI;
22234 SequenceMBBI->collectDebugValues(SelectDebugValues);
22235 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22236 continue;
22237 }
22238 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22239 SequenceMBBI->mayLoadOrStore() ||
22240 SequenceMBBI->usesCustomInsertionHook())
22241 break;
22242 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22243 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22244 }))
22245 break;
22246 }
22247
22248 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22249 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22250 DebugLoc DL = MI.getDebugLoc();
22252
22253 MachineBasicBlock *HeadMBB = BB;
22254 MachineFunction *F = BB->getParent();
22255 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22256 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22257
22258 F->insert(I, IfFalseMBB);
22259 F->insert(I, TailMBB);
22260
22261 // Set the call frame size on entry to the new basic blocks.
22262 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22263 IfFalseMBB->setCallFrameSize(CallFrameSize);
22264 TailMBB->setCallFrameSize(CallFrameSize);
22265
22266 // Transfer debug instructions associated with the selects to TailMBB.
22267 for (MachineInstr *DebugInstr : SelectDebugValues) {
22268 TailMBB->push_back(DebugInstr->removeFromParent());
22269 }
22270
22271 // Move all instructions after the sequence to TailMBB.
22272 TailMBB->splice(TailMBB->end(), HeadMBB,
22273 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22274 // Update machine-CFG edges by transferring all successors of the current
22275 // block to the new block which will contain the Phi nodes for the selects.
22276 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22277 // Set the successors for HeadMBB.
22278 HeadMBB->addSuccessor(IfFalseMBB);
22279 HeadMBB->addSuccessor(TailMBB);
22280
22281 // Insert appropriate branch.
22282 if (MI.getOperand(2).isImm())
22283 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22284 .addReg(LHS)
22285 .addImm(MI.getOperand(2).getImm())
22286 .addMBB(TailMBB);
22287 else
22288 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22289 .addReg(LHS)
22290 .addReg(RHS)
22291 .addMBB(TailMBB);
22292
22293 // IfFalseMBB just falls through to TailMBB.
22294 IfFalseMBB->addSuccessor(TailMBB);
22295
22296 // Create PHIs for all of the select pseudo-instructions.
22297 auto SelectMBBI = MI.getIterator();
22298 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22299 auto InsertionPoint = TailMBB->begin();
22300 while (SelectMBBI != SelectEnd) {
22301 auto Next = std::next(SelectMBBI);
22302 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22303 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22304 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22305 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22306 .addReg(SelectMBBI->getOperand(4).getReg())
22307 .addMBB(HeadMBB)
22308 .addReg(SelectMBBI->getOperand(5).getReg())
22309 .addMBB(IfFalseMBB);
22310 SelectMBBI->eraseFromParent();
22311 }
22312 SelectMBBI = Next;
22313 }
22314
22315 F->getProperties().resetNoPHIs();
22316 return TailMBB;
22317}
22318
22319// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22320static const RISCV::RISCVMaskedPseudoInfo *
22321lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22323 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22324 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22326 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22327 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22328 return Masked;
22329}
22330
22333 unsigned CVTXOpc) {
22334 DebugLoc DL = MI.getDebugLoc();
22335
22337
22339 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22340
22341 // Save the old value of FFLAGS.
22342 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22343
22344 assert(MI.getNumOperands() == 7);
22345
22346 // Emit a VFCVT_X_F
22347 const TargetRegisterInfo *TRI =
22349 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22350 Register Tmp = MRI.createVirtualRegister(RC);
22351 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22352 .add(MI.getOperand(1))
22353 .add(MI.getOperand(2))
22354 .add(MI.getOperand(3))
22355 .add(MachineOperand::CreateImm(7)) // frm = DYN
22356 .add(MI.getOperand(4))
22357 .add(MI.getOperand(5))
22358 .add(MI.getOperand(6))
22359 .add(MachineOperand::CreateReg(RISCV::FRM,
22360 /*IsDef*/ false,
22361 /*IsImp*/ true));
22362
22363 // Emit a VFCVT_F_X
22364 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22365 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22366 // There is no E8 variant for VFCVT_F_X.
22367 assert(Log2SEW >= 4);
22368 unsigned CVTFOpc =
22369 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22370 ->MaskedPseudo;
22371
22372 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22373 .add(MI.getOperand(0))
22374 .add(MI.getOperand(1))
22375 .addReg(Tmp)
22376 .add(MI.getOperand(3))
22377 .add(MachineOperand::CreateImm(7)) // frm = DYN
22378 .add(MI.getOperand(4))
22379 .add(MI.getOperand(5))
22380 .add(MI.getOperand(6))
22381 .add(MachineOperand::CreateReg(RISCV::FRM,
22382 /*IsDef*/ false,
22383 /*IsImp*/ true));
22384
22385 // Restore FFLAGS.
22386 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22387 .addReg(SavedFFLAGS, RegState::Kill);
22388
22389 // Erase the pseudoinstruction.
22390 MI.eraseFromParent();
22391 return BB;
22392}
22393
22395 const RISCVSubtarget &Subtarget) {
22396 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22397 const TargetRegisterClass *RC;
22398 switch (MI.getOpcode()) {
22399 default:
22400 llvm_unreachable("Unexpected opcode");
22401 case RISCV::PseudoFROUND_H:
22402 CmpOpc = RISCV::FLT_H;
22403 F2IOpc = RISCV::FCVT_W_H;
22404 I2FOpc = RISCV::FCVT_H_W;
22405 FSGNJOpc = RISCV::FSGNJ_H;
22406 FSGNJXOpc = RISCV::FSGNJX_H;
22407 RC = &RISCV::FPR16RegClass;
22408 break;
22409 case RISCV::PseudoFROUND_H_INX:
22410 CmpOpc = RISCV::FLT_H_INX;
22411 F2IOpc = RISCV::FCVT_W_H_INX;
22412 I2FOpc = RISCV::FCVT_H_W_INX;
22413 FSGNJOpc = RISCV::FSGNJ_H_INX;
22414 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22415 RC = &RISCV::GPRF16RegClass;
22416 break;
22417 case RISCV::PseudoFROUND_S:
22418 CmpOpc = RISCV::FLT_S;
22419 F2IOpc = RISCV::FCVT_W_S;
22420 I2FOpc = RISCV::FCVT_S_W;
22421 FSGNJOpc = RISCV::FSGNJ_S;
22422 FSGNJXOpc = RISCV::FSGNJX_S;
22423 RC = &RISCV::FPR32RegClass;
22424 break;
22425 case RISCV::PseudoFROUND_S_INX:
22426 CmpOpc = RISCV::FLT_S_INX;
22427 F2IOpc = RISCV::FCVT_W_S_INX;
22428 I2FOpc = RISCV::FCVT_S_W_INX;
22429 FSGNJOpc = RISCV::FSGNJ_S_INX;
22430 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22431 RC = &RISCV::GPRF32RegClass;
22432 break;
22433 case RISCV::PseudoFROUND_D:
22434 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22435 CmpOpc = RISCV::FLT_D;
22436 F2IOpc = RISCV::FCVT_L_D;
22437 I2FOpc = RISCV::FCVT_D_L;
22438 FSGNJOpc = RISCV::FSGNJ_D;
22439 FSGNJXOpc = RISCV::FSGNJX_D;
22440 RC = &RISCV::FPR64RegClass;
22441 break;
22442 case RISCV::PseudoFROUND_D_INX:
22443 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22444 CmpOpc = RISCV::FLT_D_INX;
22445 F2IOpc = RISCV::FCVT_L_D_INX;
22446 I2FOpc = RISCV::FCVT_D_L_INX;
22447 FSGNJOpc = RISCV::FSGNJ_D_INX;
22448 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22449 RC = &RISCV::GPRRegClass;
22450 break;
22451 }
22452
22453 const BasicBlock *BB = MBB->getBasicBlock();
22454 DebugLoc DL = MI.getDebugLoc();
22455 MachineFunction::iterator I = ++MBB->getIterator();
22456
22457 MachineFunction *F = MBB->getParent();
22458 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22459 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22460
22461 F->insert(I, CvtMBB);
22462 F->insert(I, DoneMBB);
22463 // Move all instructions after the sequence to DoneMBB.
22464 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22465 MBB->end());
22466 // Update machine-CFG edges by transferring all successors of the current
22467 // block to the new block which will contain the Phi nodes for the selects.
22469 // Set the successors for MBB.
22470 MBB->addSuccessor(CvtMBB);
22471 MBB->addSuccessor(DoneMBB);
22472
22473 Register DstReg = MI.getOperand(0).getReg();
22474 Register SrcReg = MI.getOperand(1).getReg();
22475 Register MaxReg = MI.getOperand(2).getReg();
22476 int64_t FRM = MI.getOperand(3).getImm();
22477
22478 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22479 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22480
22481 Register FabsReg = MRI.createVirtualRegister(RC);
22482 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22483
22484 // Compare the FP value to the max value.
22485 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22486 auto MIB =
22487 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22490
22491 // Insert branch.
22492 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22493 .addReg(CmpReg)
22494 .addReg(RISCV::X0)
22495 .addMBB(DoneMBB);
22496
22497 CvtMBB->addSuccessor(DoneMBB);
22498
22499 // Convert to integer.
22500 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22501 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22504
22505 // Convert back to FP.
22506 Register I2FReg = MRI.createVirtualRegister(RC);
22507 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22510
22511 // Restore the sign bit.
22512 Register CvtReg = MRI.createVirtualRegister(RC);
22513 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22514
22515 // Merge the results.
22516 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22517 .addReg(SrcReg)
22518 .addMBB(MBB)
22519 .addReg(CvtReg)
22520 .addMBB(CvtMBB);
22521
22522 MI.eraseFromParent();
22523 return DoneMBB;
22524}
22525
22528 MachineBasicBlock *BB) const {
22529 switch (MI.getOpcode()) {
22530 default:
22531 llvm_unreachable("Unexpected instr type to insert");
22532 case RISCV::ReadCounterWide:
22533 assert(!Subtarget.is64Bit() &&
22534 "ReadCounterWide is only to be used on riscv32");
22535 return emitReadCounterWidePseudo(MI, BB);
22536 case RISCV::Select_GPR_Using_CC_GPR:
22537 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
22538 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22539 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22540 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22541 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22542 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22543 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22544 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22545 case RISCV::Select_FPR16_Using_CC_GPR:
22546 case RISCV::Select_FPR16INX_Using_CC_GPR:
22547 case RISCV::Select_FPR32_Using_CC_GPR:
22548 case RISCV::Select_FPR32INX_Using_CC_GPR:
22549 case RISCV::Select_FPR64_Using_CC_GPR:
22550 case RISCV::Select_FPR64INX_Using_CC_GPR:
22551 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22552 return emitSelectPseudo(MI, BB, Subtarget);
22553 case RISCV::BuildPairF64Pseudo:
22554 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22555 case RISCV::SplitF64Pseudo:
22556 return emitSplitF64Pseudo(MI, BB, Subtarget);
22557 case RISCV::PseudoQuietFLE_H:
22558 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22559 case RISCV::PseudoQuietFLE_H_INX:
22560 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22561 case RISCV::PseudoQuietFLT_H:
22562 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22563 case RISCV::PseudoQuietFLT_H_INX:
22564 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22565 case RISCV::PseudoQuietFLE_S:
22566 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22567 case RISCV::PseudoQuietFLE_S_INX:
22568 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22569 case RISCV::PseudoQuietFLT_S:
22570 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22571 case RISCV::PseudoQuietFLT_S_INX:
22572 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22573 case RISCV::PseudoQuietFLE_D:
22574 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22575 case RISCV::PseudoQuietFLE_D_INX:
22576 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22577 case RISCV::PseudoQuietFLE_D_IN32X:
22578 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22579 Subtarget);
22580 case RISCV::PseudoQuietFLT_D:
22581 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22582 case RISCV::PseudoQuietFLT_D_INX:
22583 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22584 case RISCV::PseudoQuietFLT_D_IN32X:
22585 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22586 Subtarget);
22587
22588 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22589 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22590 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22591 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22592 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22593 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22594 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22595 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22596 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22597 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22598 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22599 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22600 case RISCV::PseudoFROUND_H:
22601 case RISCV::PseudoFROUND_H_INX:
22602 case RISCV::PseudoFROUND_S:
22603 case RISCV::PseudoFROUND_S_INX:
22604 case RISCV::PseudoFROUND_D:
22605 case RISCV::PseudoFROUND_D_INX:
22606 case RISCV::PseudoFROUND_D_IN32X:
22607 return emitFROUND(MI, BB, Subtarget);
22608 case RISCV::PROBED_STACKALLOC_DYN:
22609 return emitDynamicProbedAlloc(MI, BB);
22610 case TargetOpcode::STATEPOINT:
22611 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22612 // while jal call instruction (where statepoint will be lowered at the end)
22613 // has implicit def. This def is early-clobber as it will be set at
22614 // the moment of the call and earlier than any use is read.
22615 // Add this implicit dead def here as a workaround.
22616 MI.addOperand(*MI.getMF(),
22618 RISCV::X1, /*isDef*/ true,
22619 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22620 /*isUndef*/ false, /*isEarlyClobber*/ true));
22621 [[fallthrough]];
22622 case TargetOpcode::STACKMAP:
22623 case TargetOpcode::PATCHPOINT:
22624 if (!Subtarget.is64Bit())
22625 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22626 "supported on 64-bit targets");
22627 return emitPatchPoint(MI, BB);
22628 }
22629}
22630
22632 SDNode *Node) const {
22633 // If instruction defines FRM operand, conservatively set it as non-dead to
22634 // express data dependency with FRM users and prevent incorrect instruction
22635 // reordering.
22636 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22637 FRMDef->setIsDead(false);
22638 return;
22639 }
22640 // Add FRM dependency to any instructions with dynamic rounding mode.
22641 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22642 if (Idx < 0) {
22643 // Vector pseudos have FRM index indicated by TSFlags.
22644 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22645 if (Idx < 0)
22646 return;
22647 }
22648 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22649 return;
22650 // If the instruction already reads FRM, don't add another read.
22651 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22652 return;
22653 MI.addOperand(
22654 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22655}
22656
22657void RISCVTargetLowering::analyzeInputArgs(
22658 MachineFunction &MF, CCState &CCInfo,
22659 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22660 RISCVCCAssignFn Fn) const {
22661 for (const auto &[Idx, In] : enumerate(Ins)) {
22662 MVT ArgVT = In.VT;
22663 ISD::ArgFlagsTy ArgFlags = In.Flags;
22664
22665 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22666 In.OrigTy)) {
22667 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22668 << ArgVT << '\n');
22669 llvm_unreachable(nullptr);
22670 }
22671 }
22672}
22673
22674void RISCVTargetLowering::analyzeOutputArgs(
22675 MachineFunction &MF, CCState &CCInfo,
22676 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22677 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22678 for (const auto &[Idx, Out] : enumerate(Outs)) {
22679 MVT ArgVT = Out.VT;
22680 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22681
22682 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22683 Out.OrigTy)) {
22684 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22685 << ArgVT << "\n");
22686 llvm_unreachable(nullptr);
22687 }
22688 }
22689}
22690
22691// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22692// values.
22694 const CCValAssign &VA, const SDLoc &DL,
22695 const RISCVSubtarget &Subtarget) {
22696 if (VA.needsCustom()) {
22697 if (VA.getLocVT().isInteger() &&
22698 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22699 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22700 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22701 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22703 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22704 llvm_unreachable("Unexpected Custom handling.");
22705 }
22706
22707 switch (VA.getLocInfo()) {
22708 default:
22709 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22710 case CCValAssign::Full:
22711 break;
22712 case CCValAssign::BCvt:
22713 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22714 break;
22715 }
22716 return Val;
22717}
22718
22719// The caller is responsible for loading the full value if the argument is
22720// passed with CCValAssign::Indirect.
22722 const CCValAssign &VA, const SDLoc &DL,
22723 const ISD::InputArg &In,
22724 const RISCVTargetLowering &TLI) {
22727 EVT LocVT = VA.getLocVT();
22728 SDValue Val;
22729 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22730 Register VReg = RegInfo.createVirtualRegister(RC);
22731 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22732 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22733
22734 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22735 if (In.isOrigArg()) {
22736 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22737 if (OrigArg->getType()->isIntegerTy()) {
22738 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22739 // An input zero extended from i31 can also be considered sign extended.
22740 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22741 (BitWidth < 32 && In.Flags.isZExt())) {
22743 RVFI->addSExt32Register(VReg);
22744 }
22745 }
22746 }
22747
22749 return Val;
22750
22751 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22752}
22753
22755 const CCValAssign &VA, const SDLoc &DL,
22756 const RISCVSubtarget &Subtarget) {
22757 EVT LocVT = VA.getLocVT();
22758
22759 if (VA.needsCustom()) {
22760 if (LocVT.isInteger() &&
22761 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22762 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22763 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22764 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22765 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22766 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22767 llvm_unreachable("Unexpected Custom handling.");
22768 }
22769
22770 switch (VA.getLocInfo()) {
22771 default:
22772 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22773 case CCValAssign::Full:
22774 break;
22775 case CCValAssign::BCvt:
22776 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22777 break;
22778 }
22779 return Val;
22780}
22781
22782// The caller is responsible for loading the full value if the argument is
22783// passed with CCValAssign::Indirect.
22785 const CCValAssign &VA, const SDLoc &DL) {
22787 MachineFrameInfo &MFI = MF.getFrameInfo();
22788 EVT LocVT = VA.getLocVT();
22789 EVT ValVT = VA.getValVT();
22791 if (VA.getLocInfo() == CCValAssign::Indirect) {
22792 // When the value is a scalable vector, we save the pointer which points to
22793 // the scalable vector value in the stack. The ValVT will be the pointer
22794 // type, instead of the scalable vector type.
22795 ValVT = LocVT;
22796 }
22797 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22798 /*IsImmutable=*/true);
22799 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22800 SDValue Val;
22801
22803 switch (VA.getLocInfo()) {
22804 default:
22805 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22806 case CCValAssign::Full:
22808 case CCValAssign::BCvt:
22809 break;
22810 }
22811 Val = DAG.getExtLoad(
22812 ExtType, DL, LocVT, Chain, FIN,
22814 return Val;
22815}
22816
22818 const CCValAssign &VA,
22819 const CCValAssign &HiVA,
22820 const SDLoc &DL) {
22821 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22822 "Unexpected VA");
22824 MachineFrameInfo &MFI = MF.getFrameInfo();
22826
22827 assert(VA.isRegLoc() && "Expected register VA assignment");
22828
22829 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22830 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22831 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22832 SDValue Hi;
22833 if (HiVA.isMemLoc()) {
22834 // Second half of f64 is passed on the stack.
22835 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22836 /*IsImmutable=*/true);
22837 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22838 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22840 } else {
22841 // Second half of f64 is passed in another GPR.
22842 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22843 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22844 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22845 }
22846 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22847}
22848
22849// Transform physical registers into virtual registers.
22851 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22852 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22853 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22854
22856
22857 switch (CallConv) {
22858 default:
22859 reportFatalUsageError("Unsupported calling convention");
22860 case CallingConv::C:
22861 case CallingConv::Fast:
22864 case CallingConv::GRAAL:
22866#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22867 CC_VLS_CASE(32)
22868 CC_VLS_CASE(64)
22869 CC_VLS_CASE(128)
22870 CC_VLS_CASE(256)
22871 CC_VLS_CASE(512)
22872 CC_VLS_CASE(1024)
22873 CC_VLS_CASE(2048)
22874 CC_VLS_CASE(4096)
22875 CC_VLS_CASE(8192)
22876 CC_VLS_CASE(16384)
22877 CC_VLS_CASE(32768)
22878 CC_VLS_CASE(65536)
22879#undef CC_VLS_CASE
22880 break;
22881 case CallingConv::GHC:
22882 if (Subtarget.hasStdExtE())
22883 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22884 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22885 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22886 "(Zdinx/D) instruction set extensions");
22887 }
22888
22889 const Function &Func = MF.getFunction();
22890 if (Func.hasFnAttribute("interrupt")) {
22891 if (!Func.arg_empty())
22893 "Functions with the interrupt attribute cannot have arguments!");
22894
22895 StringRef Kind =
22896 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22897
22898 constexpr StringLiteral SupportedInterruptKinds[] = {
22899 "machine",
22900 "supervisor",
22901 "rnmi",
22902 "qci-nest",
22903 "qci-nonest",
22904 "SiFive-CLIC-preemptible",
22905 "SiFive-CLIC-stack-swap",
22906 "SiFive-CLIC-preemptible-stack-swap",
22907 };
22908 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22910 "Function interrupt attribute argument not supported!");
22911
22912 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22914 "'qci-*' interrupt kinds require Xqciint extension");
22915
22916 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22918 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22919
22920 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22921 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22922 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22923 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22924 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22925 "have a frame pointer");
22926 }
22927
22928 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22929 MVT XLenVT = Subtarget.getXLenVT();
22930 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22931 // Used with vargs to accumulate store chains.
22932 std::vector<SDValue> OutChains;
22933
22934 // Assign locations to all of the incoming arguments.
22936 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22937
22938 if (CallConv == CallingConv::GHC)
22940 else
22941 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22943 : CC_RISCV);
22944
22945 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22946 CCValAssign &VA = ArgLocs[i];
22947 SDValue ArgValue;
22948 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22949 // case.
22950 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22951 assert(VA.needsCustom());
22952 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22953 } else if (VA.isRegLoc())
22954 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22955 else
22956 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22957
22958 if (VA.getLocInfo() == CCValAssign::Indirect) {
22959 // If the original argument was split and passed by reference (e.g. i128
22960 // on RV32), we need to load all parts of it here (using the same
22961 // address). Vectors may be partly split to registers and partly to the
22962 // stack, in which case the base address is partly offset and subsequent
22963 // stores are relative to that.
22964 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22966 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22967 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22968 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22969 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22970 CCValAssign &PartVA = ArgLocs[i + 1];
22971 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22972 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22973 if (PartVA.getValVT().isScalableVector())
22974 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22975 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22976 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22978 ++i;
22979 ++InsIdx;
22980 }
22981 continue;
22982 }
22983 InVals.push_back(ArgValue);
22984 }
22985
22986 if (any_of(ArgLocs,
22987 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22989
22990 if (IsVarArg) {
22991 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22992 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22993 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22994 MachineFrameInfo &MFI = MF.getFrameInfo();
22995 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22997
22998 // Size of the vararg save area. For now, the varargs save area is either
22999 // zero or large enough to hold a0-a7.
23000 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
23001 int FI;
23002
23003 // If all registers are allocated, then all varargs must be passed on the
23004 // stack and we don't need to save any argregs.
23005 if (VarArgsSaveSize == 0) {
23006 int VaArgOffset = CCInfo.getStackSize();
23007 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
23008 } else {
23009 int VaArgOffset = -VarArgsSaveSize;
23010 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
23011
23012 // If saving an odd number of registers then create an extra stack slot to
23013 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
23014 // offsets to even-numbered registered remain 2*XLEN-aligned.
23015 if (Idx % 2) {
23017 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
23018 VarArgsSaveSize += XLenInBytes;
23019 }
23020
23021 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23022
23023 // Copy the integer registers that may have been used for passing varargs
23024 // to the vararg save area.
23025 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
23026 const Register Reg = RegInfo.createVirtualRegister(RC);
23027 RegInfo.addLiveIn(ArgRegs[I], Reg);
23028 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23029 SDValue Store = DAG.getStore(
23030 Chain, DL, ArgValue, FIN,
23031 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23032 OutChains.push_back(Store);
23033 FIN =
23034 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23035 }
23036 }
23037
23038 // Record the frame index of the first variable argument
23039 // which is a value necessary to VASTART.
23040 RVFI->setVarArgsFrameIndex(FI);
23041 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23042 }
23043
23044 // All stores are grouped in one node to allow the matching between
23045 // the size of Ins and InVals. This only happens for vararg functions.
23046 if (!OutChains.empty()) {
23047 OutChains.push_back(Chain);
23048 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23049 }
23050
23051 return Chain;
23052}
23053
23054/// isEligibleForTailCallOptimization - Check whether the call is eligible
23055/// for tail call optimization.
23056/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23057bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23058 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23059 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23060
23061 auto CalleeCC = CLI.CallConv;
23062 auto &Outs = CLI.Outs;
23063 auto &Caller = MF.getFunction();
23064 auto CallerCC = Caller.getCallingConv();
23065
23066 // Exception-handling functions need a special set of instructions to
23067 // indicate a return to the hardware. Tail-calling another function would
23068 // probably break this.
23069 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23070 // should be expanded as new function attributes are introduced.
23071 if (Caller.hasFnAttribute("interrupt"))
23072 return false;
23073
23074 // Do not tail call opt if the stack is used to pass parameters.
23075 if (CCInfo.getStackSize() != 0)
23076 return false;
23077
23078 // Do not tail call opt if any parameters need to be passed indirectly.
23079 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23080 // passed indirectly. So the address of the value will be passed in a
23081 // register, or if not available, then the address is put on the stack. In
23082 // order to pass indirectly, space on the stack often needs to be allocated
23083 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23084 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23085 // are passed CCValAssign::Indirect.
23086 for (auto &VA : ArgLocs)
23087 if (VA.getLocInfo() == CCValAssign::Indirect)
23088 return false;
23089
23090 // Do not tail call opt if either caller or callee uses struct return
23091 // semantics.
23092 auto IsCallerStructRet = Caller.hasStructRetAttr();
23093 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23094 if (IsCallerStructRet || IsCalleeStructRet)
23095 return false;
23096
23097 // The callee has to preserve all registers the caller needs to preserve.
23098 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23099 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23100 if (CalleeCC != CallerCC) {
23101 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23102 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23103 return false;
23104 }
23105
23106 // Byval parameters hand the function a pointer directly into the stack area
23107 // we want to reuse during a tail call. Working around this *is* possible
23108 // but less efficient and uglier in LowerCall.
23109 for (auto &Arg : Outs)
23110 if (Arg.Flags.isByVal())
23111 return false;
23112
23113 return true;
23114}
23115
23117 return DAG.getDataLayout().getPrefTypeAlign(
23118 VT.getTypeForEVT(*DAG.getContext()));
23119}
23120
23121// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23122// and output parameter nodes.
23124 SmallVectorImpl<SDValue> &InVals) const {
23125 SelectionDAG &DAG = CLI.DAG;
23126 SDLoc &DL = CLI.DL;
23128 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23130 SDValue Chain = CLI.Chain;
23131 SDValue Callee = CLI.Callee;
23132 bool &IsTailCall = CLI.IsTailCall;
23133 CallingConv::ID CallConv = CLI.CallConv;
23134 bool IsVarArg = CLI.IsVarArg;
23135 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23136 MVT XLenVT = Subtarget.getXLenVT();
23137 const CallBase *CB = CLI.CB;
23138
23141
23142 // Set type id for call site info.
23143 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23144 CSInfo = MachineFunction::CallSiteInfo(*CB);
23145
23146 // Analyze the operands of the call, assigning locations to each operand.
23148 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23149
23150 if (CallConv == CallingConv::GHC) {
23151 if (Subtarget.hasStdExtE())
23152 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23153 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23154 } else
23155 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23157 : CC_RISCV);
23158
23159 // Check if it's really possible to do a tail call.
23160 if (IsTailCall)
23161 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23162
23163 if (IsTailCall)
23164 ++NumTailCalls;
23165 else if (CLI.CB && CLI.CB->isMustTailCall())
23166 reportFatalInternalError("failed to perform tail call elimination on a "
23167 "call site marked musttail");
23168
23169 // Get a count of how many bytes are to be pushed on the stack.
23170 unsigned NumBytes = ArgCCInfo.getStackSize();
23171
23172 // Create local copies for byval args
23173 SmallVector<SDValue, 8> ByValArgs;
23174 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23175 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23176 if (!Flags.isByVal())
23177 continue;
23178
23179 SDValue Arg = OutVals[i];
23180 unsigned Size = Flags.getByValSize();
23181 Align Alignment = Flags.getNonZeroByValAlign();
23182
23183 int FI =
23184 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23185 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23186 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23187
23188 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23189 /*IsVolatile=*/false,
23190 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23192 ByValArgs.push_back(FIPtr);
23193 }
23194
23195 if (!IsTailCall)
23196 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23197
23198 // Copy argument values to their designated locations.
23200 SmallVector<SDValue, 8> MemOpChains;
23201 SDValue StackPtr;
23202 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23203 ++i, ++OutIdx) {
23204 CCValAssign &VA = ArgLocs[i];
23205 SDValue ArgValue = OutVals[OutIdx];
23206 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23207
23208 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23209 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23210 assert(VA.isRegLoc() && "Expected register VA assignment");
23211 assert(VA.needsCustom());
23212 SDValue SplitF64 = DAG.getNode(
23213 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23214 SDValue Lo = SplitF64.getValue(0);
23215 SDValue Hi = SplitF64.getValue(1);
23216
23217 Register RegLo = VA.getLocReg();
23218 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23219
23220 // Get the CCValAssign for the Hi part.
23221 CCValAssign &HiVA = ArgLocs[++i];
23222
23223 if (HiVA.isMemLoc()) {
23224 // Second half of f64 is passed on the stack.
23225 if (!StackPtr.getNode())
23226 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23228 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23229 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23230 // Emit the store.
23231 MemOpChains.push_back(DAG.getStore(
23232 Chain, DL, Hi, Address,
23234 } else {
23235 // Second half of f64 is passed in another GPR.
23236 Register RegHigh = HiVA.getLocReg();
23237 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23238 }
23239 continue;
23240 }
23241
23242 // Promote the value if needed.
23243 // For now, only handle fully promoted and indirect arguments.
23244 if (VA.getLocInfo() == CCValAssign::Indirect) {
23245 // Store the argument in a stack slot and pass its address.
23246 Align StackAlign =
23247 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23248 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23249 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23250 // If the original argument was split (e.g. i128), we need
23251 // to store the required parts of it here (and pass just one address).
23252 // Vectors may be partly split to registers and partly to the stack, in
23253 // which case the base address is partly offset and subsequent stores are
23254 // relative to that.
23255 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23256 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23257 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23258 // Calculate the total size to store. We don't have access to what we're
23259 // actually storing other than performing the loop and collecting the
23260 // info.
23262 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23263 SDValue PartValue = OutVals[OutIdx + 1];
23264 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23265 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23266 EVT PartVT = PartValue.getValueType();
23267 if (PartVT.isScalableVector())
23268 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23269 StoredSize += PartVT.getStoreSize();
23270 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23271 Parts.push_back(std::make_pair(PartValue, Offset));
23272 ++i;
23273 ++OutIdx;
23274 }
23275 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23276 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23277 MemOpChains.push_back(
23278 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23280 for (const auto &Part : Parts) {
23281 SDValue PartValue = Part.first;
23282 SDValue PartOffset = Part.second;
23284 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23285 MemOpChains.push_back(
23286 DAG.getStore(Chain, DL, PartValue, Address,
23288 }
23289 ArgValue = SpillSlot;
23290 } else {
23291 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23292 }
23293
23294 // Use local copy if it is a byval arg.
23295 if (Flags.isByVal())
23296 ArgValue = ByValArgs[j++];
23297
23298 if (VA.isRegLoc()) {
23299 // Queue up the argument copies and emit them at the end.
23300 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23301
23302 const TargetOptions &Options = DAG.getTarget().Options;
23303 if (Options.EmitCallSiteInfo)
23304 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23305 } else {
23306 assert(VA.isMemLoc() && "Argument not register or memory");
23307 assert(!IsTailCall && "Tail call not allowed if stack is used "
23308 "for passing parameters");
23309
23310 // Work out the address of the stack slot.
23311 if (!StackPtr.getNode())
23312 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23314 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23316
23317 // Emit the store.
23318 MemOpChains.push_back(
23319 DAG.getStore(Chain, DL, ArgValue, Address,
23321 }
23322 }
23323
23324 // Join the stores, which are independent of one another.
23325 if (!MemOpChains.empty())
23326 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23327
23328 SDValue Glue;
23329
23330 // Build a sequence of copy-to-reg nodes, chained and glued together.
23331 for (auto &Reg : RegsToPass) {
23332 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23333 Glue = Chain.getValue(1);
23334 }
23335
23336 // Validate that none of the argument registers have been marked as
23337 // reserved, if so report an error. Do the same for the return address if this
23338 // is not a tailcall.
23339 validateCCReservedRegs(RegsToPass, MF);
23340 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23342 MF.getFunction(),
23343 "Return address register required, but has been reserved."});
23344
23345 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23346 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23347 // split it and then direct call can be matched by PseudoCALL.
23348 bool CalleeIsLargeExternalSymbol = false;
23350 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23351 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23352 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23353 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23354 CalleeIsLargeExternalSymbol = true;
23355 }
23356 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23357 const GlobalValue *GV = S->getGlobal();
23358 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23359 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23360 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23361 }
23362
23363 // The first call operand is the chain and the second is the target address.
23365 Ops.push_back(Chain);
23366 Ops.push_back(Callee);
23367
23368 // Add argument registers to the end of the list so that they are
23369 // known live into the call.
23370 for (auto &Reg : RegsToPass)
23371 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23372
23373 // Add a register mask operand representing the call-preserved registers.
23374 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23375 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23376 assert(Mask && "Missing call preserved mask for calling convention");
23377 Ops.push_back(DAG.getRegisterMask(Mask));
23378
23379 // Glue the call to the argument copies, if any.
23380 if (Glue.getNode())
23381 Ops.push_back(Glue);
23382
23383 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23384 "Unexpected CFI type for a direct call");
23385
23386 // Emit the call.
23387 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23388
23389 // Use software guarded branch for large code model non-indirect calls
23390 // Tail call to external symbol will have a null CLI.CB and we need another
23391 // way to determine the callsite type
23392 bool NeedSWGuarded = false;
23394 Subtarget.hasStdExtZicfilp() &&
23395 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23396 NeedSWGuarded = true;
23397
23398 if (IsTailCall) {
23400 unsigned CallOpc =
23401 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23402 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23403 if (CLI.CFIType)
23404 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23405 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23406 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23407 return Ret;
23408 }
23409
23410 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23411 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23412 if (CLI.CFIType)
23413 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23414
23415 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23416 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23417 Glue = Chain.getValue(1);
23418
23419 // Mark the end of the call, which is glued to the call itself.
23420 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23421 Glue = Chain.getValue(1);
23422
23423 // Assign locations to each value returned by this call.
23425 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23426 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23427
23428 // Copy all of the result registers out of their specified physreg.
23429 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23430 auto &VA = RVLocs[i];
23431 // Copy the value out
23432 SDValue RetValue =
23433 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23434 // Glue the RetValue to the end of the call sequence
23435 Chain = RetValue.getValue(1);
23436 Glue = RetValue.getValue(2);
23437
23438 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23439 assert(VA.needsCustom());
23440 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23441 MVT::i32, Glue);
23442 Chain = RetValue2.getValue(1);
23443 Glue = RetValue2.getValue(2);
23444 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23445 RetValue2);
23446 } else
23447 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23448
23449 InVals.push_back(RetValue);
23450 }
23451
23452 return Chain;
23453}
23454
23456 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23457 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23458 const Type *RetTy) const {
23460 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23461
23462 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23463 MVT VT = Outs[i].VT;
23464 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23465 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23466 /*IsRet=*/true, Outs[i].OrigTy))
23467 return false;
23468 }
23469 return true;
23470}
23471
23472SDValue
23474 bool IsVarArg,
23476 const SmallVectorImpl<SDValue> &OutVals,
23477 const SDLoc &DL, SelectionDAG &DAG) const {
23479
23480 // Stores the assignment of the return value to a location.
23482
23483 // Info about the registers and stack slot.
23484 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23485 *DAG.getContext());
23486
23487 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23488 nullptr, CC_RISCV);
23489
23490 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23491 reportFatalUsageError("GHC functions return void only");
23492
23493 SDValue Glue;
23494 SmallVector<SDValue, 4> RetOps(1, Chain);
23495
23496 // Copy the result values into the output registers.
23497 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23498 SDValue Val = OutVals[OutIdx];
23499 CCValAssign &VA = RVLocs[i];
23500 assert(VA.isRegLoc() && "Can only return in registers!");
23501
23502 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23503 // Handle returning f64 on RV32D with a soft float ABI.
23504 assert(VA.isRegLoc() && "Expected return via registers");
23505 assert(VA.needsCustom());
23506 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23507 DAG.getVTList(MVT::i32, MVT::i32), Val);
23508 SDValue Lo = SplitF64.getValue(0);
23509 SDValue Hi = SplitF64.getValue(1);
23510 Register RegLo = VA.getLocReg();
23511 Register RegHi = RVLocs[++i].getLocReg();
23512
23513 if (Subtarget.isRegisterReservedByUser(RegLo) ||
23514 Subtarget.isRegisterReservedByUser(RegHi))
23516 MF.getFunction(),
23517 "Return value register required, but has been reserved."});
23518
23519 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23520 Glue = Chain.getValue(1);
23521 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23522 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23523 Glue = Chain.getValue(1);
23524 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23525 } else {
23526 // Handle a 'normal' return.
23527 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23528 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23529
23530 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
23532 MF.getFunction(),
23533 "Return value register required, but has been reserved."});
23534
23535 // Guarantee that all emitted copies are stuck together.
23536 Glue = Chain.getValue(1);
23537 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23538 }
23539 }
23540
23541 RetOps[0] = Chain; // Update chain.
23542
23543 // Add the glue node if we have it.
23544 if (Glue.getNode()) {
23545 RetOps.push_back(Glue);
23546 }
23547
23548 if (any_of(RVLocs,
23549 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23551
23552 unsigned RetOpc = RISCVISD::RET_GLUE;
23553 // Interrupt service routines use different return instructions.
23554 const Function &Func = DAG.getMachineFunction().getFunction();
23555 if (Func.hasFnAttribute("interrupt")) {
23556 if (!Func.getReturnType()->isVoidTy())
23558 "Functions with the interrupt attribute must have void return type!");
23559
23561 StringRef Kind =
23562 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23563
23564 if (Kind == "supervisor")
23565 RetOpc = RISCVISD::SRET_GLUE;
23566 else if (Kind == "rnmi") {
23567 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23568 "Need Smrnmi extension for rnmi");
23569 RetOpc = RISCVISD::MNRET_GLUE;
23570 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23571 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
23572 "Need Xqciint for qci-(no)nest");
23573 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23574 } else
23575 RetOpc = RISCVISD::MRET_GLUE;
23576 }
23577
23578 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23579}
23580
23581void RISCVTargetLowering::validateCCReservedRegs(
23582 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23583 MachineFunction &MF) const {
23584 const Function &F = MF.getFunction();
23585
23586 if (llvm::any_of(Regs, [this](auto Reg) {
23587 return Subtarget.isRegisterReservedByUser(Reg.first);
23588 }))
23589 F.getContext().diagnose(DiagnosticInfoUnsupported{
23590 F, "Argument register required, but has been reserved."});
23591}
23592
23593// Check if the result of the node is only used as a return value, as
23594// otherwise we can't perform a tail-call.
23596 if (N->getNumValues() != 1)
23597 return false;
23598 if (!N->hasNUsesOfValue(1, 0))
23599 return false;
23600
23601 SDNode *Copy = *N->user_begin();
23602
23603 if (Copy->getOpcode() == ISD::BITCAST) {
23604 return isUsedByReturnOnly(Copy, Chain);
23605 }
23606
23607 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23608 // with soft float ABIs.
23609 if (Copy->getOpcode() != ISD::CopyToReg) {
23610 return false;
23611 }
23612
23613 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23614 // isn't safe to perform a tail call.
23615 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23616 return false;
23617
23618 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23619 bool HasRet = false;
23620 for (SDNode *Node : Copy->users()) {
23621 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23622 return false;
23623 HasRet = true;
23624 }
23625 if (!HasRet)
23626 return false;
23627
23628 Chain = Copy->getOperand(0);
23629 return true;
23630}
23631
23633 return CI->isTailCall();
23634}
23635
23636/// getConstraintType - Given a constraint letter, return the type of
23637/// constraint it is for this target.
23640 if (Constraint.size() == 1) {
23641 switch (Constraint[0]) {
23642 default:
23643 break;
23644 case 'f':
23645 case 'R':
23646 return C_RegisterClass;
23647 case 'I':
23648 case 'J':
23649 case 'K':
23650 return C_Immediate;
23651 case 'A':
23652 return C_Memory;
23653 case 's':
23654 case 'S': // A symbolic address
23655 return C_Other;
23656 }
23657 } else {
23658 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23659 return C_RegisterClass;
23660 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23661 return C_RegisterClass;
23662 }
23663 return TargetLowering::getConstraintType(Constraint);
23664}
23665
23666std::pair<unsigned, const TargetRegisterClass *>
23668 StringRef Constraint,
23669 MVT VT) const {
23670 // First, see if this is a constraint that directly corresponds to a RISC-V
23671 // register class.
23672 if (Constraint.size() == 1) {
23673 switch (Constraint[0]) {
23674 case 'r':
23675 // TODO: Support fixed vectors up to XLen for P extension?
23676 if (VT.isVector())
23677 break;
23678 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23679 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23680 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23681 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23682 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23683 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23684 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23685 case 'f':
23686 if (VT == MVT::f16) {
23687 if (Subtarget.hasStdExtZfhmin())
23688 return std::make_pair(0U, &RISCV::FPR16RegClass);
23689 if (Subtarget.hasStdExtZhinxmin())
23690 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23691 } else if (VT == MVT::f32) {
23692 if (Subtarget.hasStdExtF())
23693 return std::make_pair(0U, &RISCV::FPR32RegClass);
23694 if (Subtarget.hasStdExtZfinx())
23695 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23696 } else if (VT == MVT::f64) {
23697 if (Subtarget.hasStdExtD())
23698 return std::make_pair(0U, &RISCV::FPR64RegClass);
23699 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23700 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23701 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23702 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23703 }
23704 break;
23705 case 'R':
23706 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23707 (VT == MVT::i128 && Subtarget.is64Bit()))
23708 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23709 break;
23710 default:
23711 break;
23712 }
23713 } else if (Constraint == "vr") {
23714 for (const auto *RC :
23715 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23716 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23717 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23718 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23719 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23720 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23721 &RISCV::VRN2M4RegClass}) {
23722 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23723 return std::make_pair(0U, RC);
23724
23725 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23726 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23727 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23728 return std::make_pair(0U, RC);
23729 }
23730 }
23731 } else if (Constraint == "vd") {
23732 for (const auto *RC :
23733 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23734 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23735 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23736 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23737 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23738 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23739 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23740 &RISCV::VRN2M4NoV0RegClass}) {
23741 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23742 return std::make_pair(0U, RC);
23743
23744 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23745 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23746 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23747 return std::make_pair(0U, RC);
23748 }
23749 }
23750 } else if (Constraint == "vm") {
23751 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23752 return std::make_pair(0U, &RISCV::VMV0RegClass);
23753
23754 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23755 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23756 // VT here might be coerced to vector with i8 elements, so we need to
23757 // check if this is a M1 register here instead of checking VMV0RegClass.
23758 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23759 return std::make_pair(0U, &RISCV::VMV0RegClass);
23760 }
23761 } else if (Constraint == "cr") {
23762 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23763 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23764 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23765 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23766 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23767 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23768 if (!VT.isVector())
23769 return std::make_pair(0U, &RISCV::GPRCRegClass);
23770 } else if (Constraint == "cR") {
23771 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23772 (VT == MVT::i128 && Subtarget.is64Bit()))
23773 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23774 } else if (Constraint == "cf") {
23775 if (VT == MVT::f16) {
23776 if (Subtarget.hasStdExtZfhmin())
23777 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23778 if (Subtarget.hasStdExtZhinxmin())
23779 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23780 } else if (VT == MVT::f32) {
23781 if (Subtarget.hasStdExtF())
23782 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23783 if (Subtarget.hasStdExtZfinx())
23784 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23785 } else if (VT == MVT::f64) {
23786 if (Subtarget.hasStdExtD())
23787 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23788 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23789 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23790 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23791 return std::make_pair(0U, &RISCV::GPRCRegClass);
23792 }
23793 }
23794
23795 // Clang will correctly decode the usage of register name aliases into their
23796 // official names. However, other frontends like `rustc` do not. This allows
23797 // users of these frontends to use the ABI names for registers in LLVM-style
23798 // register constraints.
23799 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23800 .Case("{zero}", RISCV::X0)
23801 .Case("{ra}", RISCV::X1)
23802 .Case("{sp}", RISCV::X2)
23803 .Case("{gp}", RISCV::X3)
23804 .Case("{tp}", RISCV::X4)
23805 .Case("{t0}", RISCV::X5)
23806 .Case("{t1}", RISCV::X6)
23807 .Case("{t2}", RISCV::X7)
23808 .Cases("{s0}", "{fp}", RISCV::X8)
23809 .Case("{s1}", RISCV::X9)
23810 .Case("{a0}", RISCV::X10)
23811 .Case("{a1}", RISCV::X11)
23812 .Case("{a2}", RISCV::X12)
23813 .Case("{a3}", RISCV::X13)
23814 .Case("{a4}", RISCV::X14)
23815 .Case("{a5}", RISCV::X15)
23816 .Case("{a6}", RISCV::X16)
23817 .Case("{a7}", RISCV::X17)
23818 .Case("{s2}", RISCV::X18)
23819 .Case("{s3}", RISCV::X19)
23820 .Case("{s4}", RISCV::X20)
23821 .Case("{s5}", RISCV::X21)
23822 .Case("{s6}", RISCV::X22)
23823 .Case("{s7}", RISCV::X23)
23824 .Case("{s8}", RISCV::X24)
23825 .Case("{s9}", RISCV::X25)
23826 .Case("{s10}", RISCV::X26)
23827 .Case("{s11}", RISCV::X27)
23828 .Case("{t3}", RISCV::X28)
23829 .Case("{t4}", RISCV::X29)
23830 .Case("{t5}", RISCV::X30)
23831 .Case("{t6}", RISCV::X31)
23832 .Default(RISCV::NoRegister);
23833 if (XRegFromAlias != RISCV::NoRegister)
23834 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23835
23836 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23837 // TableGen record rather than the AsmName to choose registers for InlineAsm
23838 // constraints, plus we want to match those names to the widest floating point
23839 // register type available, manually select floating point registers here.
23840 //
23841 // The second case is the ABI name of the register, so that frontends can also
23842 // use the ABI names in register constraint lists.
23843 if (Subtarget.hasStdExtF()) {
23844 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23845 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23846 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23847 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23848 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23849 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23850 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23851 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23852 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23853 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23854 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23855 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23856 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23857 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23858 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23859 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23860 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23861 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23862 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23863 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23864 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23865 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23866 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23867 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23868 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23869 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23870 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23871 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23872 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23873 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23874 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23875 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23876 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23877 .Default(RISCV::NoRegister);
23878 if (FReg != RISCV::NoRegister) {
23879 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23880 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23881 unsigned RegNo = FReg - RISCV::F0_F;
23882 unsigned DReg = RISCV::F0_D + RegNo;
23883 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23884 }
23885 if (VT == MVT::f32 || VT == MVT::Other)
23886 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23887 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23888 unsigned RegNo = FReg - RISCV::F0_F;
23889 unsigned HReg = RISCV::F0_H + RegNo;
23890 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23891 }
23892 }
23893 }
23894
23895 if (Subtarget.hasVInstructions()) {
23896 Register VReg = StringSwitch<Register>(Constraint.lower())
23897 .Case("{v0}", RISCV::V0)
23898 .Case("{v1}", RISCV::V1)
23899 .Case("{v2}", RISCV::V2)
23900 .Case("{v3}", RISCV::V3)
23901 .Case("{v4}", RISCV::V4)
23902 .Case("{v5}", RISCV::V5)
23903 .Case("{v6}", RISCV::V6)
23904 .Case("{v7}", RISCV::V7)
23905 .Case("{v8}", RISCV::V8)
23906 .Case("{v9}", RISCV::V9)
23907 .Case("{v10}", RISCV::V10)
23908 .Case("{v11}", RISCV::V11)
23909 .Case("{v12}", RISCV::V12)
23910 .Case("{v13}", RISCV::V13)
23911 .Case("{v14}", RISCV::V14)
23912 .Case("{v15}", RISCV::V15)
23913 .Case("{v16}", RISCV::V16)
23914 .Case("{v17}", RISCV::V17)
23915 .Case("{v18}", RISCV::V18)
23916 .Case("{v19}", RISCV::V19)
23917 .Case("{v20}", RISCV::V20)
23918 .Case("{v21}", RISCV::V21)
23919 .Case("{v22}", RISCV::V22)
23920 .Case("{v23}", RISCV::V23)
23921 .Case("{v24}", RISCV::V24)
23922 .Case("{v25}", RISCV::V25)
23923 .Case("{v26}", RISCV::V26)
23924 .Case("{v27}", RISCV::V27)
23925 .Case("{v28}", RISCV::V28)
23926 .Case("{v29}", RISCV::V29)
23927 .Case("{v30}", RISCV::V30)
23928 .Case("{v31}", RISCV::V31)
23929 .Default(RISCV::NoRegister);
23930 if (VReg != RISCV::NoRegister) {
23931 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23932 return std::make_pair(VReg, &RISCV::VMRegClass);
23933 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23934 return std::make_pair(VReg, &RISCV::VRRegClass);
23935 for (const auto *RC :
23936 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23937 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23938 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23939 return std::make_pair(VReg, RC);
23940 }
23941 }
23942 }
23943 }
23944
23945 std::pair<Register, const TargetRegisterClass *> Res =
23947
23948 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23949 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23950 // Subtarget into account.
23951 if (Res.second == &RISCV::GPRF16RegClass ||
23952 Res.second == &RISCV::GPRF32RegClass ||
23953 Res.second == &RISCV::GPRPairRegClass)
23954 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23955
23956 return Res;
23957}
23958
23961 // Currently only support length 1 constraints.
23962 if (ConstraintCode.size() == 1) {
23963 switch (ConstraintCode[0]) {
23964 case 'A':
23966 default:
23967 break;
23968 }
23969 }
23970
23971 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23972}
23973
23975 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23976 SelectionDAG &DAG) const {
23977 // Currently only support length 1 constraints.
23978 if (Constraint.size() == 1) {
23979 switch (Constraint[0]) {
23980 case 'I':
23981 // Validate & create a 12-bit signed immediate operand.
23982 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23983 uint64_t CVal = C->getSExtValue();
23984 if (isInt<12>(CVal))
23985 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23986 Subtarget.getXLenVT()));
23987 }
23988 return;
23989 case 'J':
23990 // Validate & create an integer zero operand.
23991 if (isNullConstant(Op))
23992 Ops.push_back(
23993 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23994 return;
23995 case 'K':
23996 // Validate & create a 5-bit unsigned immediate operand.
23997 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23998 uint64_t CVal = C->getZExtValue();
23999 if (isUInt<5>(CVal))
24000 Ops.push_back(
24001 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
24002 }
24003 return;
24004 case 'S':
24006 return;
24007 default:
24008 break;
24009 }
24010 }
24012}
24013
24015 Instruction *Inst,
24016 AtomicOrdering Ord) const {
24017 if (Subtarget.hasStdExtZtso()) {
24019 return Builder.CreateFence(Ord);
24020 return nullptr;
24021 }
24022
24024 return Builder.CreateFence(Ord);
24025 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24026 return Builder.CreateFence(AtomicOrdering::Release);
24027 return nullptr;
24028}
24029
24031 Instruction *Inst,
24032 AtomicOrdering Ord) const {
24033 if (Subtarget.hasStdExtZtso()) {
24035 return Builder.CreateFence(Ord);
24036 return nullptr;
24037 }
24038
24039 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24040 return Builder.CreateFence(AtomicOrdering::Acquire);
24041 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24043 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24044 return nullptr;
24045}
24046
24049 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24050 // point operations can't be used in an lr/sc sequence without breaking the
24051 // forward-progress guarantee.
24052 if (AI->isFloatingPointOperation() ||
24058
24059 // Don't expand forced atomics, we want to have __sync libcalls instead.
24060 if (Subtarget.hasForcedAtomics())
24062
24063 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24064 if (AI->getOperation() == AtomicRMWInst::Nand) {
24065 if (Subtarget.hasStdExtZacas() &&
24066 (Size >= 32 || Subtarget.hasStdExtZabha()))
24068 if (Size < 32)
24070 }
24071
24072 if (Size < 32 && !Subtarget.hasStdExtZabha())
24074
24076}
24077
24078static Intrinsic::ID
24080 switch (BinOp) {
24081 default:
24082 llvm_unreachable("Unexpected AtomicRMW BinOp");
24084 return Intrinsic::riscv_masked_atomicrmw_xchg;
24085 case AtomicRMWInst::Add:
24086 return Intrinsic::riscv_masked_atomicrmw_add;
24087 case AtomicRMWInst::Sub:
24088 return Intrinsic::riscv_masked_atomicrmw_sub;
24090 return Intrinsic::riscv_masked_atomicrmw_nand;
24091 case AtomicRMWInst::Max:
24092 return Intrinsic::riscv_masked_atomicrmw_max;
24093 case AtomicRMWInst::Min:
24094 return Intrinsic::riscv_masked_atomicrmw_min;
24096 return Intrinsic::riscv_masked_atomicrmw_umax;
24098 return Intrinsic::riscv_masked_atomicrmw_umin;
24099 }
24100}
24101
24103 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24104 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24105 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24106 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24107 // mask, as this produces better code than the LR/SC loop emitted by
24108 // int_riscv_masked_atomicrmw_xchg.
24109 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24112 if (CVal->isZero())
24113 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24114 Builder.CreateNot(Mask, "Inv_Mask"),
24115 AI->getAlign(), Ord);
24116 if (CVal->isMinusOne())
24117 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24118 AI->getAlign(), Ord);
24119 }
24120
24121 unsigned XLen = Subtarget.getXLen();
24122 Value *Ordering =
24123 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24124 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24126 AI->getModule(),
24128
24129 if (XLen == 64) {
24130 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24131 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24132 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24133 }
24134
24135 Value *Result;
24136
24137 // Must pass the shift amount needed to sign extend the loaded value prior
24138 // to performing a signed comparison for min/max. ShiftAmt is the number of
24139 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24140 // is the number of bits to left+right shift the value in order to
24141 // sign-extend.
24142 if (AI->getOperation() == AtomicRMWInst::Min ||
24144 const DataLayout &DL = AI->getDataLayout();
24145 unsigned ValWidth =
24146 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24147 Value *SextShamt =
24148 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24149 Result = Builder.CreateCall(LrwOpScwLoop,
24150 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24151 } else {
24152 Result =
24153 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24154 }
24155
24156 if (XLen == 64)
24157 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24158 return Result;
24159}
24160
24163 AtomicCmpXchgInst *CI) const {
24164 // Don't expand forced atomics, we want to have __sync libcalls instead.
24165 if (Subtarget.hasForcedAtomics())
24167
24169 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24170 (Size == 8 || Size == 16))
24173}
24174
24176 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24177 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24178 unsigned XLen = Subtarget.getXLen();
24179 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24180 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24181 if (XLen == 64) {
24182 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24183 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24184 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24185 }
24186 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24187 Value *Result = Builder.CreateIntrinsic(
24188 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24189 if (XLen == 64)
24190 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24191 return Result;
24192}
24193
24195 EVT DataVT) const {
24196 // We have indexed loads for all supported EEW types. Indices are always
24197 // zero extended.
24198 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24199 isTypeLegal(Extend.getValueType()) &&
24200 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24201 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24202}
24203
24205 EVT VT) const {
24206 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24207 return false;
24208
24209 switch (FPVT.getSimpleVT().SimpleTy) {
24210 case MVT::f16:
24211 return Subtarget.hasStdExtZfhmin();
24212 case MVT::f32:
24213 return Subtarget.hasStdExtF();
24214 case MVT::f64:
24215 return Subtarget.hasStdExtD();
24216 default:
24217 return false;
24218 }
24219}
24220
24222 // If we are using the small code model, we can reduce size of jump table
24223 // entry to 4 bytes.
24224 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24227 }
24229}
24230
24232 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24233 unsigned uid, MCContext &Ctx) const {
24234 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24236 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24237}
24238
24240 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24241 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24242 // a power of two as well.
24243 // FIXME: This doesn't work for zve32, but that's already broken
24244 // elsewhere for the same reason.
24245 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24246 static_assert(RISCV::RVVBitsPerBlock == 64,
24247 "RVVBitsPerBlock changed, audit needed");
24248 return true;
24249}
24250
24252 SDValue &Offset,
24254 SelectionDAG &DAG) const {
24255 // Target does not support indexed loads.
24256 if (!Subtarget.hasVendorXTHeadMemIdx())
24257 return false;
24258
24259 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24260 return false;
24261
24262 Base = Op->getOperand(0);
24263 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24264 int64_t RHSC = RHS->getSExtValue();
24265 if (Op->getOpcode() == ISD::SUB)
24266 RHSC = -(uint64_t)RHSC;
24267
24268 // The constants that can be encoded in the THeadMemIdx instructions
24269 // are of the form (sign_extend(imm5) << imm2).
24270 bool isLegalIndexedOffset = false;
24271 for (unsigned i = 0; i < 4; i++)
24272 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24273 isLegalIndexedOffset = true;
24274 break;
24275 }
24276
24277 if (!isLegalIndexedOffset)
24278 return false;
24279
24280 Offset = Op->getOperand(1);
24281 return true;
24282 }
24283
24284 return false;
24285}
24286
24288 SDValue &Offset,
24290 SelectionDAG &DAG) const {
24291 EVT VT;
24292 SDValue Ptr;
24293 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24294 VT = LD->getMemoryVT();
24295 Ptr = LD->getBasePtr();
24296 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24297 VT = ST->getMemoryVT();
24298 Ptr = ST->getBasePtr();
24299 } else
24300 return false;
24301
24302 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24303 return false;
24304
24305 AM = ISD::PRE_INC;
24306 return true;
24307}
24308
24310 SDValue &Base,
24311 SDValue &Offset,
24313 SelectionDAG &DAG) const {
24314 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24315 if (Op->getOpcode() != ISD::ADD)
24316 return false;
24317
24319 Base = LS->getBasePtr();
24320 else
24321 return false;
24322
24323 if (Base == Op->getOperand(0))
24324 Offset = Op->getOperand(1);
24325 else if (Base == Op->getOperand(1))
24326 Offset = Op->getOperand(0);
24327 else
24328 return false;
24329
24330 AM = ISD::POST_INC;
24331 return true;
24332 }
24333
24334 EVT VT;
24335 SDValue Ptr;
24336 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24337 VT = LD->getMemoryVT();
24338 Ptr = LD->getBasePtr();
24339 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24340 VT = ST->getMemoryVT();
24341 Ptr = ST->getBasePtr();
24342 } else
24343 return false;
24344
24345 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24346 return false;
24347 // Post-indexing updates the base, so it's not a valid transform
24348 // if that's not the same as the load's pointer.
24349 if (Ptr != Base)
24350 return false;
24351
24352 AM = ISD::POST_INC;
24353 return true;
24354}
24355
24357 EVT VT) const {
24358 EVT SVT = VT.getScalarType();
24359
24360 if (!SVT.isSimple())
24361 return false;
24362
24363 switch (SVT.getSimpleVT().SimpleTy) {
24364 case MVT::f16:
24365 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24366 : Subtarget.hasStdExtZfhOrZhinx();
24367 case MVT::f32:
24368 return Subtarget.hasStdExtFOrZfinx();
24369 case MVT::f64:
24370 return Subtarget.hasStdExtDOrZdinx();
24371 default:
24372 break;
24373 }
24374
24375 return false;
24376}
24377
24379 // Zacas will use amocas.w which does not require extension.
24380 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24381}
24382
24384 const Constant *PersonalityFn) const {
24385 return RISCV::X10;
24386}
24387
24389 const Constant *PersonalityFn) const {
24390 return RISCV::X11;
24391}
24392
24394 // Return false to suppress the unnecessary extensions if the LibCall
24395 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24396 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24397 Type.getSizeInBits() < Subtarget.getXLen()))
24398 return false;
24399
24400 return true;
24401}
24402
24404 bool IsSigned) const {
24405 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24406 return true;
24407
24408 return IsSigned;
24409}
24410
24412 SDValue C) const {
24413 // Check integral scalar types.
24414 if (!VT.isScalarInteger())
24415 return false;
24416
24417 // Omit the optimization if the sub target has the M extension and the data
24418 // size exceeds XLen.
24419 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24420 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24421 return false;
24422
24423 auto *ConstNode = cast<ConstantSDNode>(C);
24424 const APInt &Imm = ConstNode->getAPIntValue();
24425
24426 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24427 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24428 return false;
24429
24430 // Break the MUL to a SLLI and an ADD/SUB.
24431 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24432 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24433 return true;
24434
24435 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24436 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24437 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24438 (Imm - 8).isPowerOf2()))
24439 return true;
24440
24441 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24442 // a pair of LUI/ADDI.
24443 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24444 ConstNode->hasOneUse()) {
24445 APInt ImmS = Imm.ashr(Imm.countr_zero());
24446 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24447 (1 - ImmS).isPowerOf2())
24448 return true;
24449 }
24450
24451 return false;
24452}
24453
24455 SDValue ConstNode) const {
24456 // Let the DAGCombiner decide for vectors.
24457 EVT VT = AddNode.getValueType();
24458 if (VT.isVector())
24459 return true;
24460
24461 // Let the DAGCombiner decide for larger types.
24462 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24463 return true;
24464
24465 // It is worse if c1 is simm12 while c1*c2 is not.
24466 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24467 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24468 const APInt &C1 = C1Node->getAPIntValue();
24469 const APInt &C2 = C2Node->getAPIntValue();
24470 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24471 return false;
24472
24473 // Default to true and let the DAGCombiner decide.
24474 return true;
24475}
24476
24478 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24479 unsigned *Fast) const {
24480 if (!VT.isVector()) {
24481 if (Fast)
24482 *Fast = Subtarget.enableUnalignedScalarMem();
24483 return Subtarget.enableUnalignedScalarMem();
24484 }
24485
24486 // All vector implementations must support element alignment
24487 EVT ElemVT = VT.getVectorElementType();
24488 if (Alignment >= ElemVT.getStoreSize()) {
24489 if (Fast)
24490 *Fast = 1;
24491 return true;
24492 }
24493
24494 // Note: We lower an unmasked unaligned vector access to an equally sized
24495 // e8 element type access. Given this, we effectively support all unmasked
24496 // misaligned accesses. TODO: Work through the codegen implications of
24497 // allowing such accesses to be formed, and considered fast.
24498 if (Fast)
24499 *Fast = Subtarget.enableUnalignedVectorMem();
24500 return Subtarget.enableUnalignedVectorMem();
24501}
24502
24504 LLVMContext &Context, const MemOp &Op,
24505 const AttributeList &FuncAttributes) const {
24506 if (!Subtarget.hasVInstructions())
24507 return MVT::Other;
24508
24509 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24510 return MVT::Other;
24511
24512 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24513 // has an expansion threshold, and we want the number of hardware memory
24514 // operations to correspond roughly to that threshold. LMUL>1 operations
24515 // are typically expanded linearly internally, and thus correspond to more
24516 // than one actual memory operation. Note that store merging and load
24517 // combining will typically form larger LMUL operations from the LMUL1
24518 // operations emitted here, and that's okay because combining isn't
24519 // introducing new memory operations; it's just merging existing ones.
24520 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24521 const unsigned MinVLenInBytes =
24522 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24523
24524 if (Op.size() < MinVLenInBytes)
24525 // TODO: Figure out short memops. For the moment, do the default thing
24526 // which ends up using scalar sequences.
24527 return MVT::Other;
24528
24529 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24530 // fixed vectors.
24531 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24532 return MVT::Other;
24533
24534 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24535 // a large scalar constant and instead use vmv.v.x/i to do the
24536 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24537 // maximize the chance we can encode the size in the vsetvli.
24538 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24539 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24540
24541 // Do we have sufficient alignment for our preferred VT? If not, revert
24542 // to largest size allowed by our alignment criteria.
24543 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24544 Align RequiredAlign(PreferredVT.getStoreSize());
24545 if (Op.isFixedDstAlign())
24546 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24547 if (Op.isMemcpy())
24548 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24549 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24550 }
24551 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24552}
24553
24555 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24556 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24557 bool IsABIRegCopy = CC.has_value();
24558 EVT ValueVT = Val.getValueType();
24559
24560 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24561 if ((ValueVT == PairVT ||
24562 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24563 ValueVT == MVT::f64)) &&
24564 NumParts == 1 && PartVT == MVT::Untyped) {
24565 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24566 MVT XLenVT = Subtarget.getXLenVT();
24567 if (ValueVT == MVT::f64)
24568 Val = DAG.getBitcast(MVT::i64, Val);
24569 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24570 // Always creating an MVT::Untyped part, so always use
24571 // RISCVISD::BuildGPRPair.
24572 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24573 return true;
24574 }
24575
24576 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24577 PartVT == MVT::f32) {
24578 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24579 // nan, and cast to f32.
24580 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24581 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24582 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24583 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24584 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24585 Parts[0] = Val;
24586 return true;
24587 }
24588
24589 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24590#ifndef NDEBUG
24591 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24592 [[maybe_unused]] unsigned ValLMUL =
24594 ValNF * RISCV::RVVBitsPerBlock);
24595 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24596 [[maybe_unused]] unsigned PartLMUL =
24598 PartNF * RISCV::RVVBitsPerBlock);
24599 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24600 "RISC-V vector tuple type only accepts same register class type "
24601 "TUPLE_INSERT");
24602#endif
24603
24604 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24605 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24606 Parts[0] = Val;
24607 return true;
24608 }
24609
24610 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24611 PartVT.isScalableVector()) {
24612 if (ValueVT.isFixedLengthVector()) {
24613 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24614 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24615 }
24616 LLVMContext &Context = *DAG.getContext();
24617 EVT ValueEltVT = ValueVT.getVectorElementType();
24618 EVT PartEltVT = PartVT.getVectorElementType();
24619 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24620 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24621 if (PartVTBitSize % ValueVTBitSize == 0) {
24622 assert(PartVTBitSize >= ValueVTBitSize);
24623 // If the element types are different, bitcast to the same element type of
24624 // PartVT first.
24625 // Give an example here, we want copy a <vscale x 1 x i8> value to
24626 // <vscale x 4 x i16>.
24627 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24628 // subvector, then we can bitcast to <vscale x 4 x i16>.
24629 if (ValueEltVT != PartEltVT) {
24630 if (PartVTBitSize > ValueVTBitSize) {
24631 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24632 assert(Count != 0 && "The number of element should not be zero.");
24633 EVT SameEltTypeVT =
24634 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24635 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24636 }
24637 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24638 } else {
24639 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24640 }
24641 Parts[0] = Val;
24642 return true;
24643 }
24644 }
24645
24646 return false;
24647}
24648
24650 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24651 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24652 bool IsABIRegCopy = CC.has_value();
24653
24654 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24655 if ((ValueVT == PairVT ||
24656 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24657 ValueVT == MVT::f64)) &&
24658 NumParts == 1 && PartVT == MVT::Untyped) {
24659 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24660 MVT XLenVT = Subtarget.getXLenVT();
24661
24662 SDValue Val = Parts[0];
24663 // Always starting with an MVT::Untyped part, so always use
24664 // RISCVISD::SplitGPRPair
24665 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24666 Val);
24667 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24668 Val.getValue(1));
24669 if (ValueVT == MVT::f64)
24670 Val = DAG.getBitcast(ValueVT, Val);
24671 return Val;
24672 }
24673
24674 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24675 PartVT == MVT::f32) {
24676 SDValue Val = Parts[0];
24677
24678 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24679 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24680 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24681 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24682 return Val;
24683 }
24684
24685 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24686 PartVT.isScalableVector()) {
24687 LLVMContext &Context = *DAG.getContext();
24688 SDValue Val = Parts[0];
24689 EVT ValueEltVT = ValueVT.getVectorElementType();
24690 EVT PartEltVT = PartVT.getVectorElementType();
24691 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24692 if (ValueVT.isFixedLengthVector())
24693 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24694 .getSizeInBits()
24696 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24697 if (PartVTBitSize % ValueVTBitSize == 0) {
24698 assert(PartVTBitSize >= ValueVTBitSize);
24699 EVT SameEltTypeVT = ValueVT;
24700 // If the element types are different, convert it to the same element type
24701 // of PartVT.
24702 // Give an example here, we want copy a <vscale x 1 x i8> value from
24703 // <vscale x 4 x i16>.
24704 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24705 // then we can extract <vscale x 1 x i8>.
24706 if (ValueEltVT != PartEltVT) {
24707 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24708 assert(Count != 0 && "The number of element should not be zero.");
24709 SameEltTypeVT =
24710 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24711 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24712 }
24713 if (ValueVT.isFixedLengthVector())
24714 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24715 else
24716 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24717 return Val;
24718 }
24719 }
24720 return SDValue();
24721}
24722
24723bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24724 // When aggressively optimizing for code size, we prefer to use a div
24725 // instruction, as it is usually smaller than the alternative sequence.
24726 // TODO: Add vector division?
24727 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24728 return OptSize && !VT.isVector();
24729}
24730
24732 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24733 // some situation.
24734 unsigned Opc = N->getOpcode();
24736 return false;
24737 return true;
24738}
24739
24740static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24741 Module *M = IRB.GetInsertBlock()->getModule();
24742 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24743 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24744 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24745 IRB.CreateCall(ThreadPointerFunc), Offset);
24746}
24747
24749 // Fuchsia provides a fixed TLS slot for the stack cookie.
24750 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24751 if (Subtarget.isTargetFuchsia())
24752 return useTpOffset(IRB, -0x10);
24753
24754 // Android provides a fixed TLS slot for the stack cookie. See the definition
24755 // of TLS_SLOT_STACK_GUARD in
24756 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24757 if (Subtarget.isTargetAndroid())
24758 return useTpOffset(IRB, -0x18);
24759
24760 Module *M = IRB.GetInsertBlock()->getModule();
24761
24762 if (M->getStackProtectorGuard() == "tls") {
24763 // Users must specify the offset explicitly
24764 int Offset = M->getStackProtectorGuardOffset();
24765 return useTpOffset(IRB, Offset);
24766 }
24767
24769}
24770
24772 Align Alignment) const {
24773 if (!Subtarget.hasVInstructions())
24774 return false;
24775
24776 // Only support fixed vectors if we know the minimum vector size.
24777 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24778 return false;
24779
24780 EVT ScalarType = DataType.getScalarType();
24781 if (!isLegalElementTypeForRVV(ScalarType))
24782 return false;
24783
24784 if (!Subtarget.enableUnalignedVectorMem() &&
24785 Alignment < ScalarType.getStoreSize())
24786 return false;
24787
24788 return true;
24789}
24790
24794 const TargetInstrInfo *TII) const {
24795 assert(MBBI->isCall() && MBBI->getCFIType() &&
24796 "Invalid call instruction for a KCFI check");
24797 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24798 MBBI->getOpcode()));
24799
24800 MachineOperand &Target = MBBI->getOperand(0);
24801 Target.setIsRenamable(false);
24802
24803 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24804 .addReg(Target.getReg())
24805 .addImm(MBBI->getCFIType())
24806 .getInstr();
24807}
24808
24809#define GET_REGISTER_MATCHER
24810#include "RISCVGenAsmMatcher.inc"
24811
24814 const MachineFunction &MF) const {
24816 if (!Reg)
24818 if (!Reg)
24819 return Reg;
24820
24821 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24822 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24823 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24824 StringRef(RegName) + "\"."));
24825 return Reg;
24826}
24827
24830 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24831
24832 if (NontemporalInfo == nullptr)
24834
24835 // 1 for default value work as __RISCV_NTLH_ALL
24836 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24837 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24838 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24839 // 5 -> __RISCV_NTLH_ALL
24840 int NontemporalLevel = 5;
24841 const MDNode *RISCVNontemporalInfo =
24842 I.getMetadata("riscv-nontemporal-domain");
24843 if (RISCVNontemporalInfo != nullptr)
24844 NontemporalLevel =
24846 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24847 ->getValue())
24848 ->getZExtValue();
24849
24850 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24851 "RISC-V target doesn't support this non-temporal domain.");
24852
24853 NontemporalLevel -= 2;
24855 if (NontemporalLevel & 0b1)
24856 Flags |= MONontemporalBit0;
24857 if (NontemporalLevel & 0b10)
24858 Flags |= MONontemporalBit1;
24859
24860 return Flags;
24861}
24862
24865
24866 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24868 TargetFlags |= (NodeFlags & MONontemporalBit0);
24869 TargetFlags |= (NodeFlags & MONontemporalBit1);
24870 return TargetFlags;
24871}
24872
24874 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24875 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24876}
24877
24879 if (VT.isVector()) {
24880 EVT SVT = VT.getVectorElementType();
24881 // If the element type is legal we can use cpop.v if it is enabled.
24882 if (isLegalElementTypeForRVV(SVT))
24883 return Subtarget.hasStdExtZvbb();
24884 // Don't consider it fast if the type needs to be legalized or scalarized.
24885 return false;
24886 }
24887
24888 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
24889}
24890
24892 ISD::CondCode Cond) const {
24893 return isCtpopFast(VT) ? 0 : 1;
24894}
24895
24897 const Instruction *I) const {
24898 if (Subtarget.hasStdExtZalasr()) {
24899 if (Subtarget.hasStdExtZtso()) {
24900 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24901 // should be lowered to plain load/store. The easiest way to do this is
24902 // to say we should insert fences for them, and the fence insertion code
24903 // will just not insert any fences
24904 auto *LI = dyn_cast<LoadInst>(I);
24905 auto *SI = dyn_cast<StoreInst>(I);
24906 if ((LI &&
24907 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24908 (SI &&
24909 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24910 // Here, this is a load or store which is seq_cst, and needs a .aq or
24911 // .rl therefore we shouldn't try to insert fences
24912 return false;
24913 }
24914 // Here, we are a TSO inst that isn't a seq_cst load/store
24915 return isa<LoadInst>(I) || isa<StoreInst>(I);
24916 }
24917 return false;
24918 }
24919 // Note that one specific case requires fence insertion for an
24920 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24921 // than this hook due to limitations in the interface here.
24922 return isa<LoadInst>(I) || isa<StoreInst>(I);
24923}
24924
24926
24927 // GISel support is in progress or complete for these opcodes.
24928 unsigned Op = Inst.getOpcode();
24929 if (Op == Instruction::Add || Op == Instruction::Sub ||
24930 Op == Instruction::And || Op == Instruction::Or ||
24931 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24932 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24933 Op == Instruction::Freeze || Op == Instruction::Store)
24934 return false;
24935
24936 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
24937 // Mark RVV intrinsic as supported.
24938 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID()))
24939 return false;
24940 }
24941
24942 if (Inst.getType()->isScalableTy())
24943 return true;
24944
24945 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24946 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24947 !isa<ReturnInst>(&Inst))
24948 return true;
24949
24950 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24951 if (AI->getAllocatedType()->isScalableTy())
24952 return true;
24953 }
24954
24955 return false;
24956}
24957
24958SDValue
24959RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24960 SelectionDAG &DAG,
24961 SmallVectorImpl<SDNode *> &Created) const {
24962 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
24963 if (isIntDivCheap(N->getValueType(0), Attr))
24964 return SDValue(N, 0); // Lower SDIV as SDIV
24965
24966 // Only perform this transform if short forward branch opt is supported.
24967 if (!Subtarget.hasShortForwardBranchOpt())
24968 return SDValue();
24969 EVT VT = N->getValueType(0);
24970 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24971 return SDValue();
24972
24973 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24974 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24975 return SDValue();
24976 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24977}
24978
24979bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24980 EVT VT, const APInt &AndMask) const {
24981 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
24982 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
24984}
24985
24987 return Subtarget.getMinimumJumpTableEntries();
24988}
24989
24991 SDValue Value, SDValue Addr,
24992 int JTI,
24993 SelectionDAG &DAG) const {
24994 if (Subtarget.hasStdExtZicfilp()) {
24995 // When Zicfilp enabled, we need to use software guarded branch for jump
24996 // table branch.
24997 SDValue Chain = Value;
24998 // Jump table debug info is only needed if CodeView is enabled.
25000 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
25001 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
25002 }
25003 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
25004}
25005
25006// If an output pattern produces multiple instructions tablegen may pick an
25007// arbitrary type from an instructions destination register class to use for the
25008// VT of that MachineSDNode. This VT may be used to look up the representative
25009// register class. If the type isn't legal, the default implementation will
25010// not find a register class.
25011//
25012// Some integer types smaller than XLen are listed in the GPR register class to
25013// support isel patterns for GISel, but are not legal in SelectionDAG. The
25014// arbitrary type tablegen picks may be one of these smaller types.
25015//
25016// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
25017// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
25018std::pair<const TargetRegisterClass *, uint8_t>
25019RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
25020 MVT VT) const {
25021 switch (VT.SimpleTy) {
25022 default:
25023 break;
25024 case MVT::i8:
25025 case MVT::i16:
25026 case MVT::i32:
25028 case MVT::bf16:
25029 case MVT::f16:
25031 }
25032
25034}
25035
25037
25038#define GET_RISCVVIntrinsicsTable_IMPL
25039#include "RISCVGenSearchableTables.inc"
25040
25041} // namespace llvm::RISCVVIntrinsicsTable
25042
25044
25045 // If the function specifically requests inline stack probes, emit them.
25046 if (MF.getFunction().hasFnAttribute("probe-stack"))
25047 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25048 "inline-asm";
25049
25050 return false;
25051}
25052
25054 Align StackAlign) const {
25055 // The default stack probe size is 4096 if the function has no
25056 // stack-probe-size attribute.
25057 const Function &Fn = MF.getFunction();
25058 unsigned StackProbeSize =
25059 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25060 // Round down to the stack alignment.
25061 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25062 return StackProbeSize ? StackProbeSize : StackAlign.value();
25063}
25064
25065SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25066 SelectionDAG &DAG) const {
25068 if (!hasInlineStackProbe(MF))
25069 return SDValue();
25070
25071 MVT XLenVT = Subtarget.getXLenVT();
25072 // Get the inputs.
25073 SDValue Chain = Op.getOperand(0);
25074 SDValue Size = Op.getOperand(1);
25075
25077 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25078 SDLoc dl(Op);
25079 EVT VT = Op.getValueType();
25080
25081 // Construct the new SP value in a GPR.
25082 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25083 Chain = SP.getValue(1);
25084 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25085 if (Align)
25086 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25087 DAG.getSignedConstant(-Align->value(), dl, VT));
25088
25089 // Set the real SP to the new value with a probing loop.
25090 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25091 return DAG.getMergeValues({SP, Chain}, dl);
25092}
25093
25096 MachineBasicBlock *MBB) const {
25097 MachineFunction &MF = *MBB->getParent();
25098 MachineBasicBlock::iterator MBBI = MI.getIterator();
25099 DebugLoc DL = MBB->findDebugLoc(MBBI);
25100 Register TargetReg = MI.getOperand(0).getReg();
25101
25102 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25103 bool IsRV64 = Subtarget.is64Bit();
25104 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25105 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25106 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25107
25108 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25109 MachineBasicBlock *LoopTestMBB =
25110 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25111 MF.insert(MBBInsertPoint, LoopTestMBB);
25112 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25113 MF.insert(MBBInsertPoint, ExitMBB);
25114 Register SPReg = RISCV::X2;
25115 Register ScratchReg =
25116 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25117
25118 // ScratchReg = ProbeSize
25119 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25120
25121 // LoopTest:
25122 // SUB SP, SP, ProbeSize
25123 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25124 .addReg(SPReg)
25125 .addReg(ScratchReg);
25126
25127 // s[d|w] zero, 0(sp)
25128 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25129 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25130 .addReg(RISCV::X0)
25131 .addReg(SPReg)
25132 .addImm(0);
25133
25134 // BLT TargetReg, SP, LoopTest
25135 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25136 .addReg(TargetReg)
25137 .addReg(SPReg)
25138 .addMBB(LoopTestMBB);
25139
25140 // Adjust with: MV SP, TargetReg.
25141 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25142 .addReg(TargetReg)
25143 .addImm(0);
25144
25145 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25147
25148 LoopTestMBB->addSuccessor(ExitMBB);
25149 LoopTestMBB->addSuccessor(LoopTestMBB);
25150 MBB->addSuccessor(LoopTestMBB);
25151
25152 MI.eraseFromParent();
25153 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25154 return ExitMBB->begin()->getParent();
25155}
25156
25158 if (Subtarget.hasStdExtFOrZfinx()) {
25159 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25160 return RCRegs;
25161 }
25162 return {};
25163}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:269
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1936
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:130
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:428
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...