LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(2));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
325
326 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327 setOperationAction(ISD::LOAD, MVT::i64, Custom);
328 setOperationAction(ISD::STORE, MVT::i64, Custom);
329 }
330
331 if (Subtarget.is64Bit()) {
333
334 setOperationAction(ISD::LOAD, MVT::i32, Custom);
336 MVT::i32, Custom);
338 if (!Subtarget.hasStdExtZbb())
341 Custom);
343 }
344 if (!Subtarget.hasStdExtZmmul()) {
346 } else if (Subtarget.is64Bit()) {
349 } else {
351 }
352
353 if (!Subtarget.hasStdExtM()) {
355 Expand);
356 } else if (Subtarget.is64Bit()) {
358 {MVT::i8, MVT::i16, MVT::i32}, Custom);
359 }
360
363 Expand);
364
366 Custom);
367
368 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
369 if (Subtarget.is64Bit())
371 } else if (Subtarget.hasVendorXTHeadBb()) {
372 if (Subtarget.is64Bit())
375 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
377 } else {
379 }
380
381 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
382 // pattern match it directly in isel.
384 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
385 Subtarget.hasVendorXTHeadBb())
386 ? Legal
387 : Expand);
388
389 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
390 !Subtarget.is64Bit()) {
392 } else {
393 // Zbkb can use rev8+brev8 to implement bitreverse.
395 Subtarget.hasStdExtZbkb() ? Custom : Expand);
396 if (Subtarget.hasStdExtZbkb())
398 }
399
400 if (Subtarget.hasStdExtZbb() ||
401 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
403 Legal);
404 }
405
406 if (Subtarget.hasCTZLike()) {
407 if (Subtarget.is64Bit())
409 } else {
411 }
412
413 if (!Subtarget.hasCPOPLike()) {
414 // TODO: These should be set to LibCall, but this currently breaks
415 // the Linux kernel build. See #101786. Lacks i128 tests, too.
416 if (Subtarget.is64Bit())
418 else
421 }
422
423 if (Subtarget.hasCLZLike()) {
424 // We need the custom lowering to make sure that the resulting sequence
425 // for the 32bit case is efficient on 64bit targets.
426 // Use default promotion for i32 without Zbb.
427 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
429 } else {
431 }
432
433 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
435 } else if (Subtarget.hasShortForwardBranchOpt()) {
436 // We can use PseudoCCSUB to implement ABS.
438 } else if (Subtarget.is64Bit()) {
440 }
441
442 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
443 !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
445
446 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
453 }
454
455 static const unsigned FPLegalNodeTypes[] = {
456 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
457 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
458 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
463
464 static const ISD::CondCode FPCCToExpand[] = {
468
469 static const unsigned FPOpToExpand[] = {
470 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
471 ISD::FREM};
472
473 static const unsigned FPRndMode[] = {
474 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
475 ISD::FROUNDEVEN};
476
477 static const unsigned ZfhminZfbfminPromoteOps[] = {
478 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
479 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
484 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
485 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
486 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
487
488 if (Subtarget.hasStdExtZfbfmin()) {
489 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
493 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
494 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
496 setOperationAction(ISD::FABS, MVT::bf16, Custom);
497 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
501 }
502
503 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
504 if (Subtarget.hasStdExtZfhOrZhinx()) {
505 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
506 setOperationAction(FPRndMode, MVT::f16,
507 Subtarget.hasStdExtZfa() ? Legal : Custom);
509 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
510 Subtarget.hasStdExtZfa() ? Legal : Custom);
511 if (Subtarget.hasStdExtZfa())
513 } else {
514 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
515 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
516 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
519 setOperationAction(Op, MVT::f16, Custom);
520 setOperationAction(ISD::FABS, MVT::f16, Custom);
521 setOperationAction(ISD::FNEG, MVT::f16, Custom);
525 }
526
527 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
528
531 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
534 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
535
537 ISD::FNEARBYINT, MVT::f16,
538 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
539 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
540 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
541 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
542 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
543 MVT::f16, Promote);
544
545 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
546 // complete support for all operations in LegalizeDAG.
551 MVT::f16, Promote);
552
553 // We need to custom promote this.
554 if (Subtarget.is64Bit())
555 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
556 }
557
558 if (Subtarget.hasStdExtFOrZfinx()) {
559 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
560 setOperationAction(FPRndMode, MVT::f32,
561 Subtarget.hasStdExtZfa() ? Legal : Custom);
562 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
565 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
566 setOperationAction(FPOpToExpand, MVT::f32, Expand);
567 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
568 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
569 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
570 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
572 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
573 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
574 Subtarget.isSoftFPABI() ? LibCall : Custom);
575 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
576 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
577 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
578 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
579
580 if (Subtarget.hasStdExtZfa()) {
582 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
583 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
584 } else {
585 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
586 }
587 }
588
589 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
590 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
591
592 if (Subtarget.hasStdExtDOrZdinx()) {
593 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
594
595 if (!Subtarget.is64Bit())
596 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
597
598 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
599 !Subtarget.is64Bit()) {
600 setOperationAction(ISD::LOAD, MVT::f64, Custom);
601 setOperationAction(ISD::STORE, MVT::f64, Custom);
602 }
603
604 if (Subtarget.hasStdExtZfa()) {
606 setOperationAction(FPRndMode, MVT::f64, Legal);
607 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
608 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
609 } else {
610 if (Subtarget.is64Bit())
611 setOperationAction(FPRndMode, MVT::f64, Custom);
612
613 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
614 }
615
618 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
621 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
623 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
624 setOperationAction(FPOpToExpand, MVT::f64, Expand);
625 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
626 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
627 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
628 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
630 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
631 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
632 Subtarget.isSoftFPABI() ? LibCall : Custom);
633 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
634 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
635 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
636 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
637 }
638
639 if (Subtarget.is64Bit()) {
642 MVT::i32, Custom);
643 setOperationAction(ISD::LROUND, MVT::i32, Custom);
644 }
645
646 if (Subtarget.hasStdExtFOrZfinx()) {
648 Custom);
649
650 // f16/bf16 require custom handling.
652 Custom);
654 Custom);
655
657 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
658 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
659 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
660 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
661 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
662 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
663 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
664 }
665
668 XLenVT, Custom);
669
671
672 if (Subtarget.is64Bit())
674
675 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
676 // Unfortunately this can't be determined just from the ISA naming string.
677 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
678 Subtarget.is64Bit() ? Legal : Custom);
679 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
680 Subtarget.is64Bit() ? Legal : Custom);
681
682 if (Subtarget.is64Bit()) {
683 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
684 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
685 }
686
687 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
689 if (Subtarget.is64Bit())
691
692 if (Subtarget.hasVendorXMIPSCBOP())
693 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
694 else if (Subtarget.hasStdExtZicbop())
695 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
696
697 if (Subtarget.hasStdExtA()) {
698 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
699 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
701 else
703 } else if (Subtarget.hasForcedAtomics()) {
704 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
705 } else {
707 }
708
709 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
710
712
713 if (getTargetMachine().getTargetTriple().isOSLinux()) {
714 // Custom lowering of llvm.clear_cache.
716 }
717
718 if (Subtarget.hasVInstructions()) {
720
721 setOperationAction(ISD::VSCALE, XLenVT, Custom);
722
723 // RVV intrinsics may have illegal operands.
724 // We also need to custom legalize vmv.x.s.
727 {MVT::i8, MVT::i16}, Custom);
728 if (Subtarget.is64Bit())
730 MVT::i32, Custom);
731 else
733 MVT::i64, Custom);
734
736 MVT::Other, Custom);
737
738 static const unsigned IntegerVPOps[] = {
739 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
740 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
741 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
742 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
743 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
744 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
745 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
746 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
747 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
748 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
749 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
750 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
751 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
752 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
753 ISD::EXPERIMENTAL_VP_SPLAT};
754
755 static const unsigned FloatingPointVPOps[] = {
756 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
757 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
758 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
759 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
760 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
761 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
762 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
763 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
764 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
765 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
766 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
767 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
768 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
769
770 static const unsigned IntegerVecReduceOps[] = {
771 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
772 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
773 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
774
775 static const unsigned FloatingPointVecReduceOps[] = {
776 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
777 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
778
779 static const unsigned FloatingPointLibCallOps[] = {
780 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
781 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
782
783 if (!Subtarget.is64Bit()) {
784 // We must custom-lower certain vXi64 operations on RV32 due to the vector
785 // element type being illegal.
787 MVT::i64, Custom);
788
789 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
790
791 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
792 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
793 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
794 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
795 MVT::i64, Custom);
796 }
797
798 for (MVT VT : BoolVecVTs) {
799 if (!isTypeLegal(VT))
800 continue;
801
803
804 // Mask VTs are custom-expanded into a series of standard nodes
808 VT, Custom);
809
811 Custom);
812
814 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
815 Expand);
816 setOperationAction(ISD::VP_MERGE, VT, Custom);
817
818 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
819 Custom);
820
821 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
822
824 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
825 Custom);
826
828 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
829 Custom);
830
831 // RVV has native int->float & float->int conversions where the
832 // element type sizes are within one power-of-two of each other. Any
833 // wider distances between type sizes have to be lowered as sequences
834 // which progressively narrow the gap in stages.
839 VT, Custom);
841 Custom);
842
843 // Expand all extending loads to types larger than this, and truncating
844 // stores from types larger than this.
846 setTruncStoreAction(VT, OtherVT, Expand);
848 OtherVT, Expand);
849 }
850
851 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
852 ISD::VP_TRUNCATE, ISD::VP_SETCC},
853 VT, Custom);
854
857
859
860 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
861 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
862 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
863
866 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
867 }
868
869 for (MVT VT : IntVecVTs) {
870 if (!isTypeLegal(VT))
871 continue;
872
875
876 // Vectors implement MULHS/MULHU.
878
879 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
880 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
882
884 Legal);
885
887
888 // Custom-lower extensions and truncations from/to mask types.
890 VT, Custom);
891
892 // RVV has native int->float & float->int conversions where the
893 // element type sizes are within one power-of-two of each other. Any
894 // wider distances between type sizes have to be lowered as sequences
895 // which progressively narrow the gap in stages.
900 VT, Custom);
902 Custom);
906 VT, Legal);
907
908 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
909 // nodes which truncate by one power of two at a time.
912 Custom);
913
914 // Custom-lower insert/extract operations to simplify patterns.
916 Custom);
917
918 // Custom-lower reduction operations to set up the corresponding custom
919 // nodes' operands.
920 setOperationAction(IntegerVecReduceOps, VT, Custom);
921
922 setOperationAction(IntegerVPOps, VT, Custom);
923
924 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
925
926 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
927 VT, Custom);
928
930 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
931 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
932 VT, Custom);
933 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
934
937 VT, Custom);
938
941
943
945 setTruncStoreAction(VT, OtherVT, Expand);
947 OtherVT, Expand);
948 }
949
952
953 // Splice
955
956 if (Subtarget.hasStdExtZvkb()) {
958 setOperationAction(ISD::VP_BSWAP, VT, Custom);
959 } else {
960 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
962 }
963
964 if (Subtarget.hasStdExtZvbb()) {
966 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
967 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
968 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
969 VT, Custom);
970 } else {
971 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
973 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
974 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
975 VT, Expand);
976
977 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
978 // range of f32.
979 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
980 if (isTypeLegal(FloatVT)) {
982 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
983 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
984 VT, Custom);
985 }
986 }
987
989 }
990
991 for (MVT VT : VecTupleVTs) {
992 if (!isTypeLegal(VT))
993 continue;
994
995 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
996 }
997
998 // Expand various CCs to best match the RVV ISA, which natively supports UNE
999 // but no other unordered comparisons, and supports all ordered comparisons
1000 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1001 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1002 // and we pattern-match those back to the "original", swapping operands once
1003 // more. This way we catch both operations and both "vf" and "fv" forms with
1004 // fewer patterns.
1005 static const ISD::CondCode VFPCCToExpand[] = {
1009 };
1010
1011 // TODO: support more ops.
1012 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1013 ISD::FMINNUM,
1014 ISD::FMAXNUM,
1015 ISD::FMINIMUMNUM,
1016 ISD::FMAXIMUMNUM,
1017 ISD::FADD,
1018 ISD::FSUB,
1019 ISD::FMUL,
1020 ISD::FMA,
1021 ISD::FDIV,
1022 ISD::FSQRT,
1023 ISD::FCEIL,
1024 ISD::FTRUNC,
1025 ISD::FFLOOR,
1026 ISD::FROUND,
1027 ISD::FROUNDEVEN,
1028 ISD::FRINT,
1029 ISD::FNEARBYINT,
1031 ISD::SETCC,
1032 ISD::FMAXIMUM,
1033 ISD::FMINIMUM,
1040 ISD::VECREDUCE_FMIN,
1041 ISD::VECREDUCE_FMAX,
1042 ISD::VECREDUCE_FMINIMUM,
1043 ISD::VECREDUCE_FMAXIMUM};
1044
1045 // TODO: support more vp ops.
1046 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1047 ISD::VP_FADD,
1048 ISD::VP_FSUB,
1049 ISD::VP_FMUL,
1050 ISD::VP_FDIV,
1051 ISD::VP_FMA,
1052 ISD::VP_REDUCE_FMIN,
1053 ISD::VP_REDUCE_FMAX,
1054 ISD::VP_SQRT,
1055 ISD::VP_FMINNUM,
1056 ISD::VP_FMAXNUM,
1057 ISD::VP_FCEIL,
1058 ISD::VP_FFLOOR,
1059 ISD::VP_FROUND,
1060 ISD::VP_FROUNDEVEN,
1061 ISD::VP_FROUNDTOZERO,
1062 ISD::VP_FRINT,
1063 ISD::VP_FNEARBYINT,
1064 ISD::VP_SETCC,
1065 ISD::VP_FMINIMUM,
1066 ISD::VP_FMAXIMUM,
1067 ISD::VP_REDUCE_FMINIMUM,
1068 ISD::VP_REDUCE_FMAXIMUM};
1069
1070 // Sets common operation actions on RVV floating-point vector types.
1071 const auto SetCommonVFPActions = [&](MVT VT) {
1073 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1074 // sizes are within one power-of-two of each other. Therefore conversions
1075 // between vXf16 and vXf64 must be lowered as sequences which convert via
1076 // vXf32.
1077 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1078 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1079 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1080 // Custom-lower insert/extract operations to simplify patterns.
1082 Custom);
1083 // Expand various condition codes (explained above).
1084 setCondCodeAction(VFPCCToExpand, VT, Expand);
1085
1087 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1088 Legal);
1089 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1090
1091 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1092 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1094 VT, Custom);
1095
1096 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1097
1098 // Expand FP operations that need libcalls.
1099 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1100
1102
1103 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1104
1105 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1106 VT, Custom);
1107
1109 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1110 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1111 VT, Custom);
1112 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1113
1116
1119 VT, Custom);
1120
1123
1125 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1126 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1127
1128 setOperationAction(FloatingPointVPOps, VT, Custom);
1129
1131 Custom);
1134 VT, Legal);
1139 VT, Custom);
1140
1142 };
1143
1144 // Sets common extload/truncstore actions on RVV floating-point vector
1145 // types.
1146 const auto SetCommonVFPExtLoadTruncStoreActions =
1147 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1148 for (auto SmallVT : SmallerVTs) {
1149 setTruncStoreAction(VT, SmallVT, Expand);
1150 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1151 }
1152 };
1153
1154 // Sets common actions for f16 and bf16 for when there's only
1155 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1156 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1157 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1159 Custom);
1160 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1161 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1162 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1163 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1164 Custom);
1166 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1172 VT, Custom);
1173 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1174 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1175 MVT EltVT = VT.getVectorElementType();
1176 if (isTypeLegal(EltVT))
1177 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1179 VT, Custom);
1180 else
1181 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1182 EltVT, Custom);
1183 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1184 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1185 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1186 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1187 ISD::VP_SCATTER},
1188 VT, Custom);
1189 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1190
1191 setOperationAction(ISD::FNEG, VT, Expand);
1192 setOperationAction(ISD::FABS, VT, Expand);
1194
1195 // Expand FP operations that need libcalls.
1196 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1197
1198 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1199 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1200 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1201 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1202 } else {
1203 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1204 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1205 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1206 }
1207 };
1208
1209 if (Subtarget.hasVInstructionsF16()) {
1210 for (MVT VT : F16VecVTs) {
1211 if (!isTypeLegal(VT))
1212 continue;
1213 SetCommonVFPActions(VT);
1214 }
1215 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1216 for (MVT VT : F16VecVTs) {
1217 if (!isTypeLegal(VT))
1218 continue;
1219 SetCommonPromoteToF32Actions(VT);
1220 }
1221 }
1222
1223 if (Subtarget.hasVInstructionsBF16Minimal()) {
1224 for (MVT VT : BF16VecVTs) {
1225 if (!isTypeLegal(VT))
1226 continue;
1227 SetCommonPromoteToF32Actions(VT);
1228 }
1229 }
1230
1231 if (Subtarget.hasVInstructionsF32()) {
1232 for (MVT VT : F32VecVTs) {
1233 if (!isTypeLegal(VT))
1234 continue;
1235 SetCommonVFPActions(VT);
1236 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1237 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1238 }
1239 }
1240
1241 if (Subtarget.hasVInstructionsF64()) {
1242 for (MVT VT : F64VecVTs) {
1243 if (!isTypeLegal(VT))
1244 continue;
1245 SetCommonVFPActions(VT);
1246 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1247 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1248 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1249 }
1250 }
1251
1252 if (Subtarget.useRVVForFixedLengthVectors()) {
1254 if (!useRVVForFixedLengthVectorVT(VT))
1255 continue;
1256
1257 // By default everything must be expanded.
1258 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1261 setTruncStoreAction(VT, OtherVT, Expand);
1263 OtherVT, Expand);
1264 }
1265
1266 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1267 // expansion to a build_vector of 0s.
1269
1270 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1272 Custom);
1273
1276 Custom);
1277
1279 VT, Custom);
1280
1282 VT, Custom);
1283
1285
1286 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1287
1289
1291
1294 Custom);
1295
1296 setOperationAction(ISD::BITCAST, VT, Custom);
1297
1299 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1300 Custom);
1301
1303 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1304 Custom);
1305
1307 {
1316 },
1317 VT, Custom);
1319 Custom);
1320
1322
1323 // Operations below are different for between masks and other vectors.
1324 if (VT.getVectorElementType() == MVT::i1) {
1325 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1326 ISD::OR, ISD::XOR},
1327 VT, Custom);
1328
1329 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1330 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1331 VT, Custom);
1332
1333 setOperationAction(ISD::VP_MERGE, VT, Custom);
1334
1335 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1336 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1337 continue;
1338 }
1339
1340 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1341 // it before type legalization for i64 vectors on RV32. It will then be
1342 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1343 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1344 // improvements first.
1345 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1348
1349 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1351 }
1352
1354 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1355
1356 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1357 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1358 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1359 ISD::VP_SCATTER},
1360 VT, Custom);
1361 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1362
1366 VT, Custom);
1367
1370
1372
1373 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1374 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1376
1380 VT, Custom);
1381
1383
1386
1387 // Custom-lower reduction operations to set up the corresponding custom
1388 // nodes' operands.
1389 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1390 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1391 ISD::VECREDUCE_UMIN},
1392 VT, Custom);
1393
1394 setOperationAction(IntegerVPOps, VT, Custom);
1395
1396 if (Subtarget.hasStdExtZvkb())
1398
1399 if (Subtarget.hasStdExtZvbb()) {
1402 VT, Custom);
1403 } else {
1404 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1405 // range of f32.
1406 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1407 if (isTypeLegal(FloatVT))
1410 Custom);
1411 }
1412
1414 }
1415
1417 // There are no extending loads or truncating stores.
1418 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1419 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1420 setTruncStoreAction(VT, InnerVT, Expand);
1421 }
1422
1423 if (!useRVVForFixedLengthVectorVT(VT))
1424 continue;
1425
1426 // By default everything must be expanded.
1427 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1429
1430 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1431 // expansion to a build_vector of 0s.
1433
1438 VT, Custom);
1439 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1440 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1441
1443 VT, Custom);
1444
1445 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1446 ISD::MGATHER, ISD::MSCATTER},
1447 VT, Custom);
1448 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1449 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1450 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1451 VT, Custom);
1452 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1453
1454 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1456 Custom);
1457
1458 if (VT.getVectorElementType() == MVT::f16 &&
1459 !Subtarget.hasVInstructionsF16()) {
1460 setOperationAction(ISD::BITCAST, VT, Custom);
1461 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1463 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1464 Custom);
1465 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1466 Custom);
1467 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1468 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1469 if (Subtarget.hasStdExtZfhmin()) {
1471 } else {
1472 // We need to custom legalize f16 build vectors if Zfhmin isn't
1473 // available.
1475 }
1476 setOperationAction(ISD::FNEG, VT, Expand);
1477 setOperationAction(ISD::FABS, VT, Expand);
1479 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1480 // Don't promote f16 vector operations to f32 if f32 vector type is
1481 // not legal.
1482 // TODO: could split the f16 vector into two vectors and do promotion.
1483 if (!isTypeLegal(F32VecVT))
1484 continue;
1485 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1486 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1487 continue;
1488 }
1489
1490 if (VT.getVectorElementType() == MVT::bf16) {
1491 setOperationAction(ISD::BITCAST, VT, Custom);
1492 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1493 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1494 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1495 if (Subtarget.hasStdExtZfbfmin()) {
1497 } else {
1498 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1499 // available.
1501 }
1503 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1504 Custom);
1505 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1506 // Don't promote f16 vector operations to f32 if f32 vector type is
1507 // not legal.
1508 // TODO: could split the f16 vector into two vectors and do promotion.
1509 if (!isTypeLegal(F32VecVT))
1510 continue;
1511 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1512 // TODO: Promote VP ops to fp32.
1513 continue;
1514 }
1515
1517 Custom);
1518
1520 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1521 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1522 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1523 ISD::FMAXIMUM, ISD::FMINIMUM},
1524 VT, Custom);
1525
1526 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1527 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1528 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1529 ISD::FNEARBYINT},
1530 VT, Custom);
1531
1532 setCondCodeAction(VFPCCToExpand, VT, Expand);
1533
1536
1537 setOperationAction(ISD::BITCAST, VT, Custom);
1538
1539 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1540
1541 setOperationAction(FloatingPointVPOps, VT, Custom);
1542
1549 VT, Custom);
1550 }
1551
1552 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1553 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1554 if (Subtarget.is64Bit())
1555 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1556 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1557 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1558 if (Subtarget.hasStdExtZfbfmin())
1559 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1560 if (Subtarget.hasStdExtFOrZfinx())
1561 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1562 if (Subtarget.hasStdExtDOrZdinx())
1563 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1564 }
1565 }
1566
1567 if (Subtarget.hasStdExtA())
1568 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1569
1570 if (Subtarget.hasForcedAtomics()) {
1571 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1573 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1574 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1575 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1576 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1577 XLenVT, LibCall);
1578 }
1579
1580 if (Subtarget.hasVendorXTHeadMemIdx()) {
1581 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1582 setIndexedLoadAction(im, MVT::i8, Legal);
1583 setIndexedStoreAction(im, MVT::i8, Legal);
1584 setIndexedLoadAction(im, MVT::i16, Legal);
1585 setIndexedStoreAction(im, MVT::i16, Legal);
1586 setIndexedLoadAction(im, MVT::i32, Legal);
1587 setIndexedStoreAction(im, MVT::i32, Legal);
1588
1589 if (Subtarget.is64Bit()) {
1590 setIndexedLoadAction(im, MVT::i64, Legal);
1591 setIndexedStoreAction(im, MVT::i64, Legal);
1592 }
1593 }
1594 }
1595
1596 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1600
1604 }
1605
1606 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1607 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1608 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1609 ISD::PARTIAL_REDUCE_UMLA,
1610 ISD::PARTIAL_REDUCE_SUMLA};
1611 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1612 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1613 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1614 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1615 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1616
1617 if (Subtarget.useRVVForFixedLengthVectors()) {
1619 if (VT.getVectorElementType() != MVT::i32 ||
1620 !useRVVForFixedLengthVectorVT(VT))
1621 continue;
1622 ElementCount EC = VT.getVectorElementCount();
1623 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1624 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1625 }
1626 }
1627 }
1628
1629 // Customize load and store operation for bf16 if zfh isn't enabled.
1630 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1631 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1632 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1633 }
1634
1635 // Function alignments.
1636 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1637 setMinFunctionAlignment(FunctionAlignment);
1638 // Set preferred alignments.
1639 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1640 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1641
1647
1648 if (Subtarget.hasStdExtFOrZfinx())
1649 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1650
1651 if (Subtarget.hasStdExtZbb())
1653
1654 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1655 Subtarget.hasVInstructions())
1657
1658 if (Subtarget.hasStdExtZbkb())
1660
1661 if (Subtarget.hasStdExtFOrZfinx())
1664 if (Subtarget.hasVInstructions())
1666 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1667 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1668 ISD::SRL, ISD::SHL, ISD::STORE,
1670 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1674 ISD::VSELECT, ISD::VECREDUCE_ADD});
1675
1676 if (Subtarget.hasVendorXTHeadMemPair())
1677 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1678 if (Subtarget.useRVVForFixedLengthVectors())
1679 setTargetDAGCombine(ISD::BITCAST);
1680
1681 // Disable strict node mutation.
1682 IsStrictFPEnabled = true;
1683 EnableExtLdPromotion = true;
1684
1685 // Let the subtarget decide if a predictable select is more expensive than the
1686 // corresponding branch. This information is used in CGP/SelectOpt to decide
1687 // when to convert selects into branches.
1688 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1689
1690 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1691 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1692
1693 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1694 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1695 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1696
1698 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1699 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1700
1701 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1702 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1703}
1704
1706 LLVMContext &Context,
1707 EVT VT) const {
1708 if (!VT.isVector())
1709 return getPointerTy(DL);
1710 if (Subtarget.hasVInstructions() &&
1711 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1712 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1714}
1715
1717 return Subtarget.getXLenVT();
1718}
1719
1720// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1721bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1722 unsigned VF,
1723 bool IsScalable) const {
1724 if (!Subtarget.hasVInstructions())
1725 return true;
1726
1727 if (!IsScalable)
1728 return true;
1729
1730 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1731 return true;
1732
1733 // Don't allow VF=1 if those types are't legal.
1734 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1735 return true;
1736
1737 // VLEN=32 support is incomplete.
1738 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1739 return true;
1740
1741 // The maximum VF is for the smallest element width with LMUL=8.
1742 // VF must be a power of 2.
1743 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1744 return VF > MaxVF || !isPowerOf2_32(VF);
1745}
1746
1748 return !Subtarget.hasVInstructions() ||
1749 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1750}
1751
1753 const CallInst &I,
1754 MachineFunction &MF,
1755 unsigned Intrinsic) const {
1756 auto &DL = I.getDataLayout();
1757
1758 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1759 bool IsUnitStrided, bool UsePtrVal = false) {
1760 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1761 // We can't use ptrVal if the intrinsic can access memory before the
1762 // pointer. This means we can't use it for strided or indexed intrinsics.
1763 if (UsePtrVal)
1764 Info.ptrVal = I.getArgOperand(PtrOp);
1765 else
1766 Info.fallbackAddressSpace =
1767 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1768 Type *MemTy;
1769 if (IsStore) {
1770 // Store value is the first operand.
1771 MemTy = I.getArgOperand(0)->getType();
1772 } else {
1773 // Use return type. If it's segment load, return type is a struct.
1774 MemTy = I.getType();
1775 if (MemTy->isStructTy())
1776 MemTy = MemTy->getStructElementType(0);
1777 }
1778 if (!IsUnitStrided)
1779 MemTy = MemTy->getScalarType();
1780
1781 Info.memVT = getValueType(DL, MemTy);
1782 if (MemTy->isTargetExtTy()) {
1783 // RISC-V vector tuple type's alignment type should be its element type.
1784 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1785 MemTy = Type::getIntNTy(
1786 MemTy->getContext(),
1787 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1788 ->getZExtValue());
1789 Info.align = DL.getABITypeAlign(MemTy);
1790 } else {
1791 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1792 }
1793 Info.size = MemoryLocation::UnknownSize;
1794 Info.flags |=
1796 return true;
1797 };
1798
1799 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1801
1803 switch (Intrinsic) {
1804 default:
1805 return false;
1806 case Intrinsic::riscv_masked_atomicrmw_xchg:
1807 case Intrinsic::riscv_masked_atomicrmw_add:
1808 case Intrinsic::riscv_masked_atomicrmw_sub:
1809 case Intrinsic::riscv_masked_atomicrmw_nand:
1810 case Intrinsic::riscv_masked_atomicrmw_max:
1811 case Intrinsic::riscv_masked_atomicrmw_min:
1812 case Intrinsic::riscv_masked_atomicrmw_umax:
1813 case Intrinsic::riscv_masked_atomicrmw_umin:
1814 case Intrinsic::riscv_masked_cmpxchg:
1815 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1816 // narrow atomic operation. These will be expanded to an LR/SC loop that
1817 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1818 // will be used to modify the appropriate part of the 4 byte data and
1819 // preserve the rest.
1820 Info.opc = ISD::INTRINSIC_W_CHAIN;
1821 Info.memVT = MVT::i32;
1822 Info.ptrVal = I.getArgOperand(0);
1823 Info.offset = 0;
1824 Info.align = Align(4);
1827 return true;
1828 case Intrinsic::riscv_seg2_load_mask:
1829 case Intrinsic::riscv_seg3_load_mask:
1830 case Intrinsic::riscv_seg4_load_mask:
1831 case Intrinsic::riscv_seg5_load_mask:
1832 case Intrinsic::riscv_seg6_load_mask:
1833 case Intrinsic::riscv_seg7_load_mask:
1834 case Intrinsic::riscv_seg8_load_mask:
1835 case Intrinsic::riscv_sseg2_load_mask:
1836 case Intrinsic::riscv_sseg3_load_mask:
1837 case Intrinsic::riscv_sseg4_load_mask:
1838 case Intrinsic::riscv_sseg5_load_mask:
1839 case Intrinsic::riscv_sseg6_load_mask:
1840 case Intrinsic::riscv_sseg7_load_mask:
1841 case Intrinsic::riscv_sseg8_load_mask:
1842 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1843 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1844 case Intrinsic::riscv_seg2_store_mask:
1845 case Intrinsic::riscv_seg3_store_mask:
1846 case Intrinsic::riscv_seg4_store_mask:
1847 case Intrinsic::riscv_seg5_store_mask:
1848 case Intrinsic::riscv_seg6_store_mask:
1849 case Intrinsic::riscv_seg7_store_mask:
1850 case Intrinsic::riscv_seg8_store_mask:
1851 // Operands are (vec, ..., vec, ptr, mask, vl)
1852 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1853 /*IsStore*/ true,
1854 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1855 case Intrinsic::riscv_sseg2_store_mask:
1856 case Intrinsic::riscv_sseg3_store_mask:
1857 case Intrinsic::riscv_sseg4_store_mask:
1858 case Intrinsic::riscv_sseg5_store_mask:
1859 case Intrinsic::riscv_sseg6_store_mask:
1860 case Intrinsic::riscv_sseg7_store_mask:
1861 case Intrinsic::riscv_sseg8_store_mask:
1862 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1863 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1864 /*IsStore*/ true,
1865 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1866 case Intrinsic::riscv_vlm:
1867 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1868 /*IsStore*/ false,
1869 /*IsUnitStrided*/ true,
1870 /*UsePtrVal*/ true);
1871 case Intrinsic::riscv_vle:
1872 case Intrinsic::riscv_vle_mask:
1873 case Intrinsic::riscv_vleff:
1874 case Intrinsic::riscv_vleff_mask:
1875 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1876 /*IsStore*/ false,
1877 /*IsUnitStrided*/ true,
1878 /*UsePtrVal*/ true);
1879 case Intrinsic::riscv_vsm:
1880 case Intrinsic::riscv_vse:
1881 case Intrinsic::riscv_vse_mask:
1882 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1883 /*IsStore*/ true,
1884 /*IsUnitStrided*/ true,
1885 /*UsePtrVal*/ true);
1886 case Intrinsic::riscv_vlse:
1887 case Intrinsic::riscv_vlse_mask:
1888 case Intrinsic::riscv_vloxei:
1889 case Intrinsic::riscv_vloxei_mask:
1890 case Intrinsic::riscv_vluxei:
1891 case Intrinsic::riscv_vluxei_mask:
1892 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1893 /*IsStore*/ false,
1894 /*IsUnitStrided*/ false);
1895 case Intrinsic::riscv_vsse:
1896 case Intrinsic::riscv_vsse_mask:
1897 case Intrinsic::riscv_vsoxei:
1898 case Intrinsic::riscv_vsoxei_mask:
1899 case Intrinsic::riscv_vsuxei:
1900 case Intrinsic::riscv_vsuxei_mask:
1901 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1902 /*IsStore*/ true,
1903 /*IsUnitStrided*/ false);
1904 case Intrinsic::riscv_vlseg2:
1905 case Intrinsic::riscv_vlseg3:
1906 case Intrinsic::riscv_vlseg4:
1907 case Intrinsic::riscv_vlseg5:
1908 case Intrinsic::riscv_vlseg6:
1909 case Intrinsic::riscv_vlseg7:
1910 case Intrinsic::riscv_vlseg8:
1911 case Intrinsic::riscv_vlseg2ff:
1912 case Intrinsic::riscv_vlseg3ff:
1913 case Intrinsic::riscv_vlseg4ff:
1914 case Intrinsic::riscv_vlseg5ff:
1915 case Intrinsic::riscv_vlseg6ff:
1916 case Intrinsic::riscv_vlseg7ff:
1917 case Intrinsic::riscv_vlseg8ff:
1918 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1919 /*IsStore*/ false,
1920 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1921 case Intrinsic::riscv_vlseg2_mask:
1922 case Intrinsic::riscv_vlseg3_mask:
1923 case Intrinsic::riscv_vlseg4_mask:
1924 case Intrinsic::riscv_vlseg5_mask:
1925 case Intrinsic::riscv_vlseg6_mask:
1926 case Intrinsic::riscv_vlseg7_mask:
1927 case Intrinsic::riscv_vlseg8_mask:
1928 case Intrinsic::riscv_vlseg2ff_mask:
1929 case Intrinsic::riscv_vlseg3ff_mask:
1930 case Intrinsic::riscv_vlseg4ff_mask:
1931 case Intrinsic::riscv_vlseg5ff_mask:
1932 case Intrinsic::riscv_vlseg6ff_mask:
1933 case Intrinsic::riscv_vlseg7ff_mask:
1934 case Intrinsic::riscv_vlseg8ff_mask:
1935 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1936 /*IsStore*/ false,
1937 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1938 case Intrinsic::riscv_vlsseg2:
1939 case Intrinsic::riscv_vlsseg3:
1940 case Intrinsic::riscv_vlsseg4:
1941 case Intrinsic::riscv_vlsseg5:
1942 case Intrinsic::riscv_vlsseg6:
1943 case Intrinsic::riscv_vlsseg7:
1944 case Intrinsic::riscv_vlsseg8:
1945 case Intrinsic::riscv_vloxseg2:
1946 case Intrinsic::riscv_vloxseg3:
1947 case Intrinsic::riscv_vloxseg4:
1948 case Intrinsic::riscv_vloxseg5:
1949 case Intrinsic::riscv_vloxseg6:
1950 case Intrinsic::riscv_vloxseg7:
1951 case Intrinsic::riscv_vloxseg8:
1952 case Intrinsic::riscv_vluxseg2:
1953 case Intrinsic::riscv_vluxseg3:
1954 case Intrinsic::riscv_vluxseg4:
1955 case Intrinsic::riscv_vluxseg5:
1956 case Intrinsic::riscv_vluxseg6:
1957 case Intrinsic::riscv_vluxseg7:
1958 case Intrinsic::riscv_vluxseg8:
1959 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1960 /*IsStore*/ false,
1961 /*IsUnitStrided*/ false);
1962 case Intrinsic::riscv_vlsseg2_mask:
1963 case Intrinsic::riscv_vlsseg3_mask:
1964 case Intrinsic::riscv_vlsseg4_mask:
1965 case Intrinsic::riscv_vlsseg5_mask:
1966 case Intrinsic::riscv_vlsseg6_mask:
1967 case Intrinsic::riscv_vlsseg7_mask:
1968 case Intrinsic::riscv_vlsseg8_mask:
1969 case Intrinsic::riscv_vloxseg2_mask:
1970 case Intrinsic::riscv_vloxseg3_mask:
1971 case Intrinsic::riscv_vloxseg4_mask:
1972 case Intrinsic::riscv_vloxseg5_mask:
1973 case Intrinsic::riscv_vloxseg6_mask:
1974 case Intrinsic::riscv_vloxseg7_mask:
1975 case Intrinsic::riscv_vloxseg8_mask:
1976 case Intrinsic::riscv_vluxseg2_mask:
1977 case Intrinsic::riscv_vluxseg3_mask:
1978 case Intrinsic::riscv_vluxseg4_mask:
1979 case Intrinsic::riscv_vluxseg5_mask:
1980 case Intrinsic::riscv_vluxseg6_mask:
1981 case Intrinsic::riscv_vluxseg7_mask:
1982 case Intrinsic::riscv_vluxseg8_mask:
1983 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1984 /*IsStore*/ false,
1985 /*IsUnitStrided*/ false);
1986 case Intrinsic::riscv_vsseg2:
1987 case Intrinsic::riscv_vsseg3:
1988 case Intrinsic::riscv_vsseg4:
1989 case Intrinsic::riscv_vsseg5:
1990 case Intrinsic::riscv_vsseg6:
1991 case Intrinsic::riscv_vsseg7:
1992 case Intrinsic::riscv_vsseg8:
1993 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1994 /*IsStore*/ true,
1995 /*IsUnitStrided*/ false);
1996 case Intrinsic::riscv_vsseg2_mask:
1997 case Intrinsic::riscv_vsseg3_mask:
1998 case Intrinsic::riscv_vsseg4_mask:
1999 case Intrinsic::riscv_vsseg5_mask:
2000 case Intrinsic::riscv_vsseg6_mask:
2001 case Intrinsic::riscv_vsseg7_mask:
2002 case Intrinsic::riscv_vsseg8_mask:
2003 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2004 /*IsStore*/ true,
2005 /*IsUnitStrided*/ false);
2006 case Intrinsic::riscv_vssseg2:
2007 case Intrinsic::riscv_vssseg3:
2008 case Intrinsic::riscv_vssseg4:
2009 case Intrinsic::riscv_vssseg5:
2010 case Intrinsic::riscv_vssseg6:
2011 case Intrinsic::riscv_vssseg7:
2012 case Intrinsic::riscv_vssseg8:
2013 case Intrinsic::riscv_vsoxseg2:
2014 case Intrinsic::riscv_vsoxseg3:
2015 case Intrinsic::riscv_vsoxseg4:
2016 case Intrinsic::riscv_vsoxseg5:
2017 case Intrinsic::riscv_vsoxseg6:
2018 case Intrinsic::riscv_vsoxseg7:
2019 case Intrinsic::riscv_vsoxseg8:
2020 case Intrinsic::riscv_vsuxseg2:
2021 case Intrinsic::riscv_vsuxseg3:
2022 case Intrinsic::riscv_vsuxseg4:
2023 case Intrinsic::riscv_vsuxseg5:
2024 case Intrinsic::riscv_vsuxseg6:
2025 case Intrinsic::riscv_vsuxseg7:
2026 case Intrinsic::riscv_vsuxseg8:
2027 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2028 /*IsStore*/ true,
2029 /*IsUnitStrided*/ false);
2030 case Intrinsic::riscv_vssseg2_mask:
2031 case Intrinsic::riscv_vssseg3_mask:
2032 case Intrinsic::riscv_vssseg4_mask:
2033 case Intrinsic::riscv_vssseg5_mask:
2034 case Intrinsic::riscv_vssseg6_mask:
2035 case Intrinsic::riscv_vssseg7_mask:
2036 case Intrinsic::riscv_vssseg8_mask:
2037 case Intrinsic::riscv_vsoxseg2_mask:
2038 case Intrinsic::riscv_vsoxseg3_mask:
2039 case Intrinsic::riscv_vsoxseg4_mask:
2040 case Intrinsic::riscv_vsoxseg5_mask:
2041 case Intrinsic::riscv_vsoxseg6_mask:
2042 case Intrinsic::riscv_vsoxseg7_mask:
2043 case Intrinsic::riscv_vsoxseg8_mask:
2044 case Intrinsic::riscv_vsuxseg2_mask:
2045 case Intrinsic::riscv_vsuxseg3_mask:
2046 case Intrinsic::riscv_vsuxseg4_mask:
2047 case Intrinsic::riscv_vsuxseg5_mask:
2048 case Intrinsic::riscv_vsuxseg6_mask:
2049 case Intrinsic::riscv_vsuxseg7_mask:
2050 case Intrinsic::riscv_vsuxseg8_mask:
2051 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2052 /*IsStore*/ true,
2053 /*IsUnitStrided*/ false);
2054 }
2055}
2056
2058 const AddrMode &AM, Type *Ty,
2059 unsigned AS,
2060 Instruction *I) const {
2061 // No global is ever allowed as a base.
2062 if (AM.BaseGV)
2063 return false;
2064
2065 // None of our addressing modes allows a scalable offset
2066 if (AM.ScalableOffset)
2067 return false;
2068
2069 // RVV instructions only support register addressing.
2070 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2071 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2072
2073 // Require a 12-bit signed offset.
2074 if (!isInt<12>(AM.BaseOffs))
2075 return false;
2076
2077 switch (AM.Scale) {
2078 case 0: // "r+i" or just "i", depending on HasBaseReg.
2079 break;
2080 case 1:
2081 if (!AM.HasBaseReg) // allow "r+i".
2082 break;
2083 return false; // disallow "r+r" or "r+r+i".
2084 default:
2085 return false;
2086 }
2087
2088 return true;
2089}
2090
2092 return isInt<12>(Imm);
2093}
2094
2096 return isInt<12>(Imm);
2097}
2098
2099// On RV32, 64-bit integers are split into their high and low parts and held
2100// in two different registers, so the trunc is free since the low register can
2101// just be used.
2102// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2103// isTruncateFree?
2105 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2106 return false;
2107 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2108 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2109 return (SrcBits == 64 && DestBits == 32);
2110}
2111
2113 // We consider i64->i32 free on RV64 since we have good selection of W
2114 // instructions that make promoting operations back to i64 free in many cases.
2115 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2116 !DstVT.isInteger())
2117 return false;
2118 unsigned SrcBits = SrcVT.getSizeInBits();
2119 unsigned DestBits = DstVT.getSizeInBits();
2120 return (SrcBits == 64 && DestBits == 32);
2121}
2122
2124 EVT SrcVT = Val.getValueType();
2125 // free truncate from vnsrl and vnsra
2126 if (Subtarget.hasVInstructions() &&
2127 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2128 SrcVT.isVector() && VT2.isVector()) {
2129 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2130 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2131 if (SrcBits == DestBits * 2) {
2132 return true;
2133 }
2134 }
2135 return TargetLowering::isTruncateFree(Val, VT2);
2136}
2137
2139 // Zexts are free if they can be combined with a load.
2140 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2141 // poorly with type legalization of compares preferring sext.
2142 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2143 EVT MemVT = LD->getMemoryVT();
2144 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2145 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2146 LD->getExtensionType() == ISD::ZEXTLOAD))
2147 return true;
2148 }
2149
2150 return TargetLowering::isZExtFree(Val, VT2);
2151}
2152
2154 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2155}
2156
2158 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2159}
2160
2162 return Subtarget.hasCTZLike();
2163}
2164
2166 return Subtarget.hasCLZLike();
2167}
2168
2170 const Instruction &AndI) const {
2171 // We expect to be able to match a bit extraction instruction if the Zbs
2172 // extension is supported and the mask is a power of two. However, we
2173 // conservatively return false if the mask would fit in an ANDI instruction,
2174 // on the basis that it's possible the sinking+duplication of the AND in
2175 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2176 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2177 if (!Subtarget.hasBEXTILike())
2178 return false;
2180 if (!Mask)
2181 return false;
2182 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2183}
2184
2186 EVT VT = Y.getValueType();
2187
2188 if (VT.isVector())
2189 return false;
2190
2191 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2192 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2193}
2194
2196 EVT VT = Y.getValueType();
2197
2198 if (!VT.isVector())
2199 return hasAndNotCompare(Y);
2200
2201 return Subtarget.hasStdExtZvkb();
2202}
2203
2205 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2206 if (Subtarget.hasStdExtZbs())
2207 return X.getValueType().isScalarInteger();
2208 auto *C = dyn_cast<ConstantSDNode>(Y);
2209 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2210 if (Subtarget.hasVendorXTHeadBs())
2211 return C != nullptr;
2212 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2213 return C && C->getAPIntValue().ule(10);
2214}
2215
2217 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2218 SDValue Y) const {
2219 if (SelectOpcode != ISD::VSELECT)
2220 return false;
2221
2222 // Only enable for rvv.
2223 if (!VT.isVector() || !Subtarget.hasVInstructions())
2224 return false;
2225
2226 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2227 return false;
2228
2229 return true;
2230}
2231
2233 Type *Ty) const {
2234 assert(Ty->isIntegerTy());
2235
2236 unsigned BitSize = Ty->getIntegerBitWidth();
2237 if (BitSize > Subtarget.getXLen())
2238 return false;
2239
2240 // Fast path, assume 32-bit immediates are cheap.
2241 int64_t Val = Imm.getSExtValue();
2242 if (isInt<32>(Val))
2243 return true;
2244
2245 // A constant pool entry may be more aligned than the load we're trying to
2246 // replace. If we don't support unaligned scalar mem, prefer the constant
2247 // pool.
2248 // TODO: Can the caller pass down the alignment?
2249 if (!Subtarget.enableUnalignedScalarMem())
2250 return true;
2251
2252 // Prefer to keep the load if it would require many instructions.
2253 // This uses the same threshold we use for constant pools but doesn't
2254 // check useConstantPoolForLargeInts.
2255 // TODO: Should we keep the load only when we're definitely going to emit a
2256 // constant pool?
2257
2259 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2260}
2261
2265 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2266 SelectionDAG &DAG) const {
2267 // One interesting pattern that we'd want to form is 'bit extract':
2268 // ((1 >> Y) & 1) ==/!= 0
2269 // But we also need to be careful not to try to reverse that fold.
2270
2271 // Is this '((1 >> Y) & 1)'?
2272 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2273 return false; // Keep the 'bit extract' pattern.
2274
2275 // Will this be '((1 >> Y) & 1)' after the transform?
2276 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2277 return true; // Do form the 'bit extract' pattern.
2278
2279 // If 'X' is a constant, and we transform, then we will immediately
2280 // try to undo the fold, thus causing endless combine loop.
2281 // So only do the transform if X is not a constant. This matches the default
2282 // implementation of this function.
2283 return !XC;
2284}
2285
2287 unsigned Opc = VecOp.getOpcode();
2288
2289 // Assume target opcodes can't be scalarized.
2290 // TODO - do we have any exceptions?
2291 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2292 return false;
2293
2294 // If the vector op is not supported, try to convert to scalar.
2295 EVT VecVT = VecOp.getValueType();
2297 return true;
2298
2299 // If the vector op is supported, but the scalar op is not, the transform may
2300 // not be worthwhile.
2301 // Permit a vector binary operation can be converted to scalar binary
2302 // operation which is custom lowered with illegal type.
2303 EVT ScalarVT = VecVT.getScalarType();
2304 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2305 isOperationCustom(Opc, ScalarVT);
2306}
2307
2309 const GlobalAddressSDNode *GA) const {
2310 // In order to maximise the opportunity for common subexpression elimination,
2311 // keep a separate ADD node for the global address offset instead of folding
2312 // it in the global address node. Later peephole optimisations may choose to
2313 // fold it back in when profitable.
2314 return false;
2315}
2316
2317// Returns 0-31 if the fli instruction is available for the type and this is
2318// legal FP immediate for the type. Returns -1 otherwise.
2320 if (!Subtarget.hasStdExtZfa())
2321 return -1;
2322
2323 bool IsSupportedVT = false;
2324 if (VT == MVT::f16) {
2325 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2326 } else if (VT == MVT::f32) {
2327 IsSupportedVT = true;
2328 } else if (VT == MVT::f64) {
2329 assert(Subtarget.hasStdExtD() && "Expect D extension");
2330 IsSupportedVT = true;
2331 }
2332
2333 if (!IsSupportedVT)
2334 return -1;
2335
2336 return RISCVLoadFPImm::getLoadFPImm(Imm);
2337}
2338
2340 bool ForCodeSize) const {
2341 bool IsLegalVT = false;
2342 if (VT == MVT::f16)
2343 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2344 else if (VT == MVT::f32)
2345 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2346 else if (VT == MVT::f64)
2347 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2348 else if (VT == MVT::bf16)
2349 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2350
2351 if (!IsLegalVT)
2352 return false;
2353
2354 if (getLegalZfaFPImm(Imm, VT) >= 0)
2355 return true;
2356
2357 // Some constants can be produced by fli+fneg.
2358 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2359 return true;
2360
2361 // Cannot create a 64 bit floating-point immediate value for rv32.
2362 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2363 // td can handle +0.0 or -0.0 already.
2364 // -0.0 can be created by fmv + fneg.
2365 return Imm.isZero();
2366 }
2367
2368 // Special case: fmv + fneg
2369 if (Imm.isNegZero())
2370 return true;
2371
2372 // Building an integer and then converting requires a fmv at the end of
2373 // the integer sequence. The fmv is not required for Zfinx.
2374 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2375 const int Cost =
2376 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2377 Subtarget.getXLen(), Subtarget);
2378 return Cost <= FPImmCost;
2379}
2380
2381// TODO: This is very conservative.
2383 unsigned Index) const {
2385 return false;
2386
2387 // Extracts from index 0 are just subreg extracts.
2388 if (Index == 0)
2389 return true;
2390
2391 // Only support extracting a fixed from a fixed vector for now.
2392 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2393 return false;
2394
2395 EVT EltVT = ResVT.getVectorElementType();
2396 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2397
2398 // The smallest type we can slide is i8.
2399 // TODO: We can extract index 0 from a mask vector without a slide.
2400 if (EltVT == MVT::i1)
2401 return false;
2402
2403 unsigned ResElts = ResVT.getVectorNumElements();
2404 unsigned SrcElts = SrcVT.getVectorNumElements();
2405
2406 unsigned MinVLen = Subtarget.getRealMinVLen();
2407 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2408
2409 // If we're extracting only data from the first VLEN bits of the source
2410 // then we can always do this with an m1 vslidedown.vx. Restricting the
2411 // Index ensures we can use a vslidedown.vi.
2412 // TODO: We can generalize this when the exact VLEN is known.
2413 if (Index + ResElts <= MinVLMAX && Index < 31)
2414 return true;
2415
2416 // Convervatively only handle extracting half of a vector.
2417 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2418 // the upper half of a vector until we have more test coverage.
2419 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2420 // a cheap extract. However, this case is important in practice for
2421 // shuffled extracts of longer vectors. How resolve?
2422 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2423}
2424
2426 CallingConv::ID CC,
2427 EVT VT) const {
2428 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2429 // We might still end up using a GPR but that will be decided based on ABI.
2430 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2431 !Subtarget.hasStdExtZfhminOrZhinxmin())
2432 return MVT::f32;
2433
2434 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2435
2436 return PartVT;
2437}
2438
2439unsigned
2441 std::optional<MVT> RegisterVT) const {
2442 // Pair inline assembly operand
2443 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2444 *RegisterVT == MVT::Untyped)
2445 return 1;
2446
2447 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2448}
2449
2451 CallingConv::ID CC,
2452 EVT VT) const {
2453 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2454 // We might still end up using a GPR but that will be decided based on ABI.
2455 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2456 !Subtarget.hasStdExtZfhminOrZhinxmin())
2457 return 1;
2458
2459 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2460}
2461
2463 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2464 unsigned &NumIntermediates, MVT &RegisterVT) const {
2466 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2467
2468 return NumRegs;
2469}
2470
2471// Changes the condition code and swaps operands if necessary, so the SetCC
2472// operation matches one of the comparisons supported directly by branches
2473// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2474// with 1/-1.
2476 ISD::CondCode &CC, SelectionDAG &DAG,
2477 const RISCVSubtarget &Subtarget) {
2478 // If this is a single bit test that can't be handled by ANDI, shift the
2479 // bit to be tested to the MSB and perform a signed compare with 0.
2480 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2481 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2482 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2483 // XAndesPerf supports branch on test bit.
2484 !Subtarget.hasVendorXAndesPerf()) {
2485 uint64_t Mask = LHS.getConstantOperandVal(1);
2486 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2487 unsigned ShAmt = 0;
2488 if (isPowerOf2_64(Mask)) {
2489 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2490 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2491 } else {
2492 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2493 }
2494
2495 LHS = LHS.getOperand(0);
2496 if (ShAmt != 0)
2497 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2498 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2499 return;
2500 }
2501 }
2502
2503 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2504 int64_t C = RHSC->getSExtValue();
2505 switch (CC) {
2506 default: break;
2507 case ISD::SETGT:
2508 // Convert X > -1 to X >= 0.
2509 if (C == -1) {
2510 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2511 CC = ISD::SETGE;
2512 return;
2513 }
2514 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2515 // We have a branch immediate instruction for SETGE but not SETGT.
2516 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2517 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2518 CC = ISD::SETGE;
2519 return;
2520 }
2521 break;
2522 case ISD::SETLT:
2523 // Convert X < 1 to 0 >= X.
2524 if (C == 1) {
2525 RHS = LHS;
2526 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2527 CC = ISD::SETGE;
2528 return;
2529 }
2530 break;
2531 case ISD::SETUGT:
2532 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2533 // We have a branch immediate instruction for SETUGE but not SETUGT.
2534 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2535 // immediate.
2536 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2537 CC = ISD::SETUGE;
2538 return;
2539 }
2540 break;
2541 }
2542 }
2543
2544 switch (CC) {
2545 default:
2546 break;
2547 case ISD::SETGT:
2548 case ISD::SETLE:
2549 case ISD::SETUGT:
2550 case ISD::SETULE:
2552 std::swap(LHS, RHS);
2553 break;
2554 }
2555}
2556
2558 if (VT.isRISCVVectorTuple()) {
2559 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2560 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2561 return RISCVVType::LMUL_F8;
2562 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2563 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2564 return RISCVVType::LMUL_F4;
2565 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2566 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2567 return RISCVVType::LMUL_F2;
2568 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2569 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2570 return RISCVVType::LMUL_1;
2571 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2572 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2573 return RISCVVType::LMUL_2;
2574 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2575 return RISCVVType::LMUL_4;
2576 llvm_unreachable("Invalid vector tuple type LMUL.");
2577 }
2578
2579 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2580 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2581 if (VT.getVectorElementType() == MVT::i1)
2582 KnownSize *= 8;
2583
2584 switch (KnownSize) {
2585 default:
2586 llvm_unreachable("Invalid LMUL.");
2587 case 8:
2588 return RISCVVType::LMUL_F8;
2589 case 16:
2590 return RISCVVType::LMUL_F4;
2591 case 32:
2592 return RISCVVType::LMUL_F2;
2593 case 64:
2594 return RISCVVType::LMUL_1;
2595 case 128:
2596 return RISCVVType::LMUL_2;
2597 case 256:
2598 return RISCVVType::LMUL_4;
2599 case 512:
2600 return RISCVVType::LMUL_8;
2601 }
2602}
2603
2605 switch (LMul) {
2606 default:
2607 llvm_unreachable("Invalid LMUL.");
2611 case RISCVVType::LMUL_1:
2612 return RISCV::VRRegClassID;
2613 case RISCVVType::LMUL_2:
2614 return RISCV::VRM2RegClassID;
2615 case RISCVVType::LMUL_4:
2616 return RISCV::VRM4RegClassID;
2617 case RISCVVType::LMUL_8:
2618 return RISCV::VRM8RegClassID;
2619 }
2620}
2621
2622unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2623 RISCVVType::VLMUL LMUL = getLMUL(VT);
2624 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2625 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2626 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2627 "Unexpected subreg numbering");
2628 return RISCV::sub_vrm1_0 + Index;
2629 }
2630 if (LMUL == RISCVVType::LMUL_2) {
2631 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2632 "Unexpected subreg numbering");
2633 return RISCV::sub_vrm2_0 + Index;
2634 }
2635 if (LMUL == RISCVVType::LMUL_4) {
2636 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2637 "Unexpected subreg numbering");
2638 return RISCV::sub_vrm4_0 + Index;
2639 }
2640 llvm_unreachable("Invalid vector type.");
2641}
2642
2644 if (VT.isRISCVVectorTuple()) {
2645 unsigned NF = VT.getRISCVVectorTupleNumFields();
2646 unsigned RegsPerField =
2647 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2648 (NF * RISCV::RVVBitsPerBlock));
2649 switch (RegsPerField) {
2650 case 1:
2651 if (NF == 2)
2652 return RISCV::VRN2M1RegClassID;
2653 if (NF == 3)
2654 return RISCV::VRN3M1RegClassID;
2655 if (NF == 4)
2656 return RISCV::VRN4M1RegClassID;
2657 if (NF == 5)
2658 return RISCV::VRN5M1RegClassID;
2659 if (NF == 6)
2660 return RISCV::VRN6M1RegClassID;
2661 if (NF == 7)
2662 return RISCV::VRN7M1RegClassID;
2663 if (NF == 8)
2664 return RISCV::VRN8M1RegClassID;
2665 break;
2666 case 2:
2667 if (NF == 2)
2668 return RISCV::VRN2M2RegClassID;
2669 if (NF == 3)
2670 return RISCV::VRN3M2RegClassID;
2671 if (NF == 4)
2672 return RISCV::VRN4M2RegClassID;
2673 break;
2674 case 4:
2675 assert(NF == 2);
2676 return RISCV::VRN2M4RegClassID;
2677 default:
2678 break;
2679 }
2680 llvm_unreachable("Invalid vector tuple type RegClass.");
2681 }
2682
2683 if (VT.getVectorElementType() == MVT::i1)
2684 return RISCV::VRRegClassID;
2685 return getRegClassIDForLMUL(getLMUL(VT));
2686}
2687
2688// Attempt to decompose a subvector insert/extract between VecVT and
2689// SubVecVT via subregister indices. Returns the subregister index that
2690// can perform the subvector insert/extract with the given element index, as
2691// well as the index corresponding to any leftover subvectors that must be
2692// further inserted/extracted within the register class for SubVecVT.
2693std::pair<unsigned, unsigned>
2695 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2696 const RISCVRegisterInfo *TRI) {
2697 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2698 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2699 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2700 "Register classes not ordered");
2701 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2702 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2703
2704 // If VecVT is a vector tuple type, either it's the tuple type with same
2705 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2706 if (VecVT.isRISCVVectorTuple()) {
2707 if (VecRegClassID == SubRegClassID)
2708 return {RISCV::NoSubRegister, 0};
2709
2710 assert(SubVecVT.isScalableVector() &&
2711 "Only allow scalable vector subvector.");
2712 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2713 "Invalid vector tuple insert/extract for vector and subvector with "
2714 "different LMUL.");
2715 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2716 }
2717
2718 // Try to compose a subregister index that takes us from the incoming
2719 // LMUL>1 register class down to the outgoing one. At each step we half
2720 // the LMUL:
2721 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2722 // Note that this is not guaranteed to find a subregister index, such as
2723 // when we are extracting from one VR type to another.
2724 unsigned SubRegIdx = RISCV::NoSubRegister;
2725 for (const unsigned RCID :
2726 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2727 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2728 VecVT = VecVT.getHalfNumVectorElementsVT();
2729 bool IsHi =
2730 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2731 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2732 getSubregIndexByMVT(VecVT, IsHi));
2733 if (IsHi)
2734 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2735 }
2736 return {SubRegIdx, InsertExtractIdx};
2737}
2738
2739// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2740// stores for those types.
2741bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2742 return !Subtarget.useRVVForFixedLengthVectors() ||
2743 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2744}
2745
2747 if (!ScalarTy.isSimple())
2748 return false;
2749 switch (ScalarTy.getSimpleVT().SimpleTy) {
2750 case MVT::iPTR:
2751 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2752 case MVT::i8:
2753 case MVT::i16:
2754 case MVT::i32:
2755 return true;
2756 case MVT::i64:
2757 return Subtarget.hasVInstructionsI64();
2758 case MVT::f16:
2759 return Subtarget.hasVInstructionsF16Minimal();
2760 case MVT::bf16:
2761 return Subtarget.hasVInstructionsBF16Minimal();
2762 case MVT::f32:
2763 return Subtarget.hasVInstructionsF32();
2764 case MVT::f64:
2765 return Subtarget.hasVInstructionsF64();
2766 default:
2767 return false;
2768 }
2769}
2770
2771
2773 return NumRepeatedDivisors;
2774}
2775
2777 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2778 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2779 "Unexpected opcode");
2780 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2781 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2783 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2784 if (!II)
2785 return SDValue();
2786 return Op.getOperand(II->VLOperand + 1 + HasChain);
2787}
2788
2790 const RISCVSubtarget &Subtarget) {
2791 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2792 if (!Subtarget.useRVVForFixedLengthVectors())
2793 return false;
2794
2795 // We only support a set of vector types with a consistent maximum fixed size
2796 // across all supported vector element types to avoid legalization issues.
2797 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2798 // fixed-length vector type we support is 1024 bytes.
2799 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2800 return false;
2801
2802 unsigned MinVLen = Subtarget.getRealMinVLen();
2803
2804 MVT EltVT = VT.getVectorElementType();
2805
2806 // Don't use RVV for vectors we cannot scalarize if required.
2807 switch (EltVT.SimpleTy) {
2808 // i1 is supported but has different rules.
2809 default:
2810 return false;
2811 case MVT::i1:
2812 // Masks can only use a single register.
2813 if (VT.getVectorNumElements() > MinVLen)
2814 return false;
2815 MinVLen /= 8;
2816 break;
2817 case MVT::i8:
2818 case MVT::i16:
2819 case MVT::i32:
2820 break;
2821 case MVT::i64:
2822 if (!Subtarget.hasVInstructionsI64())
2823 return false;
2824 break;
2825 case MVT::f16:
2826 if (!Subtarget.hasVInstructionsF16Minimal())
2827 return false;
2828 break;
2829 case MVT::bf16:
2830 if (!Subtarget.hasVInstructionsBF16Minimal())
2831 return false;
2832 break;
2833 case MVT::f32:
2834 if (!Subtarget.hasVInstructionsF32())
2835 return false;
2836 break;
2837 case MVT::f64:
2838 if (!Subtarget.hasVInstructionsF64())
2839 return false;
2840 break;
2841 }
2842
2843 // Reject elements larger than ELEN.
2844 if (EltVT.getSizeInBits() > Subtarget.getELen())
2845 return false;
2846
2847 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2848 // Don't use RVV for types that don't fit.
2849 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2850 return false;
2851
2852 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2853 // the base fixed length RVV support in place.
2854 if (!VT.isPow2VectorType())
2855 return false;
2856
2857 return true;
2858}
2859
2860bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2861 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2862}
2863
2864// Return the largest legal scalable vector type that matches VT's element type.
2866 const RISCVSubtarget &Subtarget) {
2867 // This may be called before legal types are setup.
2868 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2869 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2870 "Expected legal fixed length vector!");
2871
2872 unsigned MinVLen = Subtarget.getRealMinVLen();
2873 unsigned MaxELen = Subtarget.getELen();
2874
2875 MVT EltVT = VT.getVectorElementType();
2876 switch (EltVT.SimpleTy) {
2877 default:
2878 llvm_unreachable("unexpected element type for RVV container");
2879 case MVT::i1:
2880 case MVT::i8:
2881 case MVT::i16:
2882 case MVT::i32:
2883 case MVT::i64:
2884 case MVT::bf16:
2885 case MVT::f16:
2886 case MVT::f32:
2887 case MVT::f64: {
2888 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2889 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2890 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2891 unsigned NumElts =
2893 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2894 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2895 return MVT::getScalableVectorVT(EltVT, NumElts);
2896 }
2897 }
2898}
2899
2901 const RISCVSubtarget &Subtarget) {
2903 Subtarget);
2904}
2905
2907 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2908}
2909
2910// Grow V to consume an entire RVV register.
2912 const RISCVSubtarget &Subtarget) {
2913 assert(VT.isScalableVector() &&
2914 "Expected to convert into a scalable vector!");
2915 assert(V.getValueType().isFixedLengthVector() &&
2916 "Expected a fixed length vector operand!");
2917 SDLoc DL(V);
2918 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2919}
2920
2921// Shrink V so it's just big enough to maintain a VT's worth of data.
2923 const RISCVSubtarget &Subtarget) {
2925 "Expected to convert into a fixed length vector!");
2926 assert(V.getValueType().isScalableVector() &&
2927 "Expected a scalable vector operand!");
2928 SDLoc DL(V);
2929 return DAG.getExtractSubvector(DL, VT, V, 0);
2930}
2931
2932/// Return the type of the mask type suitable for masking the provided
2933/// vector type. This is simply an i1 element type vector of the same
2934/// (possibly scalable) length.
2935static MVT getMaskTypeFor(MVT VecVT) {
2936 assert(VecVT.isVector());
2938 return MVT::getVectorVT(MVT::i1, EC);
2939}
2940
2941/// Creates an all ones mask suitable for masking a vector of type VecTy with
2942/// vector length VL. .
2943static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2944 SelectionDAG &DAG) {
2945 MVT MaskVT = getMaskTypeFor(VecVT);
2946 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2947}
2948
2949static std::pair<SDValue, SDValue>
2951 const RISCVSubtarget &Subtarget) {
2952 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2953 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2954 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2955 return {Mask, VL};
2956}
2957
2958static std::pair<SDValue, SDValue>
2959getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2960 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2961 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2962 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2963 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2964 return {Mask, VL};
2965}
2966
2967// Gets the two common "VL" operands: an all-ones mask and the vector length.
2968// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2969// the vector type that the fixed-length vector is contained in. Otherwise if
2970// VecVT is scalable, then ContainerVT should be the same as VecVT.
2971static std::pair<SDValue, SDValue>
2972getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2973 const RISCVSubtarget &Subtarget) {
2974 if (VecVT.isFixedLengthVector())
2975 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2976 Subtarget);
2977 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2978 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2979}
2980
2982 SelectionDAG &DAG) const {
2983 assert(VecVT.isScalableVector() && "Expected scalable vector");
2984 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2985 VecVT.getVectorElementCount());
2986}
2987
2988std::pair<unsigned, unsigned>
2990 const RISCVSubtarget &Subtarget) {
2991 assert(VecVT.isScalableVector() && "Expected scalable vector");
2992
2993 unsigned EltSize = VecVT.getScalarSizeInBits();
2994 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2995
2996 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2997 unsigned MaxVLMAX =
2998 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2999
3000 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3001 unsigned MinVLMAX =
3002 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3003
3004 return std::make_pair(MinVLMAX, MaxVLMAX);
3005}
3006
3007// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3008// of either is (currently) supported. This can get us into an infinite loop
3009// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3010// as a ..., etc.
3011// Until either (or both) of these can reliably lower any node, reporting that
3012// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3013// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3014// which is not desirable.
3016 EVT VT, unsigned DefinedValues) const {
3017 return false;
3018}
3019
3021 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3022 // implementation-defined.
3023 if (!VT.isVector())
3025 unsigned DLenFactor = Subtarget.getDLenFactor();
3026 unsigned Cost;
3027 if (VT.isScalableVector()) {
3028 unsigned LMul;
3029 bool Fractional;
3030 std::tie(LMul, Fractional) =
3032 if (Fractional)
3033 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3034 else
3035 Cost = (LMul * DLenFactor);
3036 } else {
3037 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3038 }
3039 return Cost;
3040}
3041
3042
3043/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3044/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3045/// be by default. VRGatherCostModel reflects available options. Note that
3046/// operand (index and possibly mask) are handled separately.
3048 auto LMULCost = getLMULCost(VT);
3049 bool Log2CostModel =
3050 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3051 if (Log2CostModel && LMULCost.isValid()) {
3052 unsigned Log = Log2_64(LMULCost.getValue());
3053 if (Log > 0)
3054 return LMULCost * Log;
3055 }
3056 return LMULCost * LMULCost;
3057}
3058
3059/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3060/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3061/// or may track the vrgather.vv cost. It is implementation-dependent.
3065
3066/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3067/// for the type VT. (This does not cover the vslide1up or vslide1down
3068/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3069/// or may track the vrgather.vv cost. It is implementation-dependent.
3073
3074/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3075/// for the type VT. (This does not cover the vslide1up or vslide1down
3076/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3077/// or may track the vrgather.vv cost. It is implementation-dependent.
3081
3083 const RISCVSubtarget &Subtarget) {
3084 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3085 // bf16 conversions are always promoted to f32.
3086 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3087 Op.getValueType() == MVT::bf16) {
3088 bool IsStrict = Op->isStrictFPOpcode();
3089
3090 SDLoc DL(Op);
3091 if (IsStrict) {
3092 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3093 {Op.getOperand(0), Op.getOperand(1)});
3094 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3095 {Op.getValueType(), MVT::Other},
3096 {Val.getValue(1), Val.getValue(0),
3097 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3098 }
3099 return DAG.getNode(
3100 ISD::FP_ROUND, DL, Op.getValueType(),
3101 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3102 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3103 }
3104
3105 // Other operations are legal.
3106 return Op;
3107}
3108
3110 const RISCVSubtarget &Subtarget) {
3111 // RISC-V FP-to-int conversions saturate to the destination register size, but
3112 // don't produce 0 for nan. We can use a conversion instruction and fix the
3113 // nan case with a compare and a select.
3114 SDValue Src = Op.getOperand(0);
3115
3116 MVT DstVT = Op.getSimpleValueType();
3117 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3118
3119 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3120
3121 if (!DstVT.isVector()) {
3122 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3123 // the result.
3124 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3125 Src.getValueType() == MVT::bf16) {
3126 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3127 }
3128
3129 unsigned Opc;
3130 if (SatVT == DstVT)
3131 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3132 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3133 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3134 else
3135 return SDValue();
3136 // FIXME: Support other SatVTs by clamping before or after the conversion.
3137
3138 SDLoc DL(Op);
3139 SDValue FpToInt = DAG.getNode(
3140 Opc, DL, DstVT, Src,
3142
3143 if (Opc == RISCVISD::FCVT_WU_RV64)
3144 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3145
3146 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3147 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3149 }
3150
3151 // Vectors.
3152
3153 MVT DstEltVT = DstVT.getVectorElementType();
3154 MVT SrcVT = Src.getSimpleValueType();
3155 MVT SrcEltVT = SrcVT.getVectorElementType();
3156 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3157 unsigned DstEltSize = DstEltVT.getSizeInBits();
3158
3159 // Only handle saturating to the destination type.
3160 if (SatVT != DstEltVT)
3161 return SDValue();
3162
3163 MVT DstContainerVT = DstVT;
3164 MVT SrcContainerVT = SrcVT;
3165 if (DstVT.isFixedLengthVector()) {
3166 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3167 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3168 assert(DstContainerVT.getVectorElementCount() ==
3169 SrcContainerVT.getVectorElementCount() &&
3170 "Expected same element count");
3171 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3172 }
3173
3174 SDLoc DL(Op);
3175
3176 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3177
3178 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3179 {Src, Src, DAG.getCondCode(ISD::SETNE),
3180 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3181
3182 // Need to widen by more than 1 step, promote the FP type, then do a widening
3183 // convert.
3184 if (DstEltSize > (2 * SrcEltSize)) {
3185 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3186 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3187 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3188 }
3189
3190 MVT CvtContainerVT = DstContainerVT;
3191 MVT CvtEltVT = DstEltVT;
3192 if (SrcEltSize > (2 * DstEltSize)) {
3193 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3194 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3195 }
3196
3197 unsigned RVVOpc =
3198 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3199 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3200
3201 while (CvtContainerVT != DstContainerVT) {
3202 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3203 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3204 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3205 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3206 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3207 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3208 }
3209
3210 SDValue SplatZero = DAG.getNode(
3211 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3212 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3213 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3214 Res, DAG.getUNDEF(DstContainerVT), VL);
3215
3216 if (DstVT.isFixedLengthVector())
3217 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3218
3219 return Res;
3220}
3221
3223 const RISCVSubtarget &Subtarget) {
3224 bool IsStrict = Op->isStrictFPOpcode();
3225 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3226
3227 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3228 // bf16 conversions are always promoted to f32.
3229 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3230 SrcVal.getValueType() == MVT::bf16) {
3231 SDLoc DL(Op);
3232 if (IsStrict) {
3233 SDValue Ext =
3234 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3235 {Op.getOperand(0), SrcVal});
3236 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3237 {Ext.getValue(1), Ext.getValue(0)});
3238 }
3239 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3240 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3241 }
3242
3243 // Other operations are legal.
3244 return Op;
3245}
3246
3248 switch (Opc) {
3249 case ISD::FROUNDEVEN:
3251 case ISD::VP_FROUNDEVEN:
3252 return RISCVFPRndMode::RNE;
3253 case ISD::FTRUNC:
3254 case ISD::STRICT_FTRUNC:
3255 case ISD::VP_FROUNDTOZERO:
3256 return RISCVFPRndMode::RTZ;
3257 case ISD::FFLOOR:
3258 case ISD::STRICT_FFLOOR:
3259 case ISD::VP_FFLOOR:
3260 return RISCVFPRndMode::RDN;
3261 case ISD::FCEIL:
3262 case ISD::STRICT_FCEIL:
3263 case ISD::VP_FCEIL:
3264 return RISCVFPRndMode::RUP;
3265 case ISD::FROUND:
3266 case ISD::LROUND:
3267 case ISD::LLROUND:
3268 case ISD::STRICT_FROUND:
3269 case ISD::STRICT_LROUND:
3271 case ISD::VP_FROUND:
3272 return RISCVFPRndMode::RMM;
3273 case ISD::FRINT:
3274 case ISD::LRINT:
3275 case ISD::LLRINT:
3276 case ISD::STRICT_FRINT:
3277 case ISD::STRICT_LRINT:
3278 case ISD::STRICT_LLRINT:
3279 case ISD::VP_FRINT:
3280 case ISD::VP_LRINT:
3281 case ISD::VP_LLRINT:
3282 return RISCVFPRndMode::DYN;
3283 }
3284
3286}
3287
3288// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3289// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3290// the integer domain and back. Taking care to avoid converting values that are
3291// nan or already correct.
3292static SDValue
3294 const RISCVSubtarget &Subtarget) {
3295 MVT VT = Op.getSimpleValueType();
3296 assert(VT.isVector() && "Unexpected type");
3297
3298 SDLoc DL(Op);
3299
3300 SDValue Src = Op.getOperand(0);
3301
3302 // Freeze the source since we are increasing the number of uses.
3303 Src = DAG.getFreeze(Src);
3304
3305 MVT ContainerVT = VT;
3306 if (VT.isFixedLengthVector()) {
3307 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3308 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3309 }
3310
3311 SDValue Mask, VL;
3312 if (Op->isVPOpcode()) {
3313 Mask = Op.getOperand(1);
3314 if (VT.isFixedLengthVector())
3315 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3316 Subtarget);
3317 VL = Op.getOperand(2);
3318 } else {
3319 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3320 }
3321
3322 // We do the conversion on the absolute value and fix the sign at the end.
3323 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3324
3325 // Determine the largest integer that can be represented exactly. This and
3326 // values larger than it don't have any fractional bits so don't need to
3327 // be converted.
3328 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3329 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3330 APFloat MaxVal = APFloat(FltSem);
3331 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3332 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3333 SDValue MaxValNode =
3334 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3335 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3336 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3337
3338 // If abs(Src) was larger than MaxVal or nan, keep it.
3339 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3340 Mask =
3341 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3342 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3343 Mask, Mask, VL});
3344
3345 // Truncate to integer and convert back to FP.
3346 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3347 MVT XLenVT = Subtarget.getXLenVT();
3348 SDValue Truncated;
3349
3350 switch (Op.getOpcode()) {
3351 default:
3352 llvm_unreachable("Unexpected opcode");
3353 case ISD::FRINT:
3354 case ISD::VP_FRINT:
3355 case ISD::FCEIL:
3356 case ISD::VP_FCEIL:
3357 case ISD::FFLOOR:
3358 case ISD::VP_FFLOOR:
3359 case ISD::FROUND:
3360 case ISD::FROUNDEVEN:
3361 case ISD::VP_FROUND:
3362 case ISD::VP_FROUNDEVEN:
3363 case ISD::VP_FROUNDTOZERO: {
3366 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3367 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3368 break;
3369 }
3370 case ISD::FTRUNC:
3371 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3372 Mask, VL);
3373 break;
3374 case ISD::FNEARBYINT:
3375 case ISD::VP_FNEARBYINT:
3376 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3377 Mask, VL);
3378 break;
3379 }
3380
3381 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3382 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3383 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3384 Mask, VL);
3385
3386 // Restore the original sign so that -0.0 is preserved.
3387 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3388 Src, Src, Mask, VL);
3389
3390 if (!VT.isFixedLengthVector())
3391 return Truncated;
3392
3393 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3394}
3395
3396// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3397// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3398// qNan and converting the new source to integer and back to FP.
3399static SDValue
3401 const RISCVSubtarget &Subtarget) {
3402 SDLoc DL(Op);
3403 MVT VT = Op.getSimpleValueType();
3404 SDValue Chain = Op.getOperand(0);
3405 SDValue Src = Op.getOperand(1);
3406
3407 MVT ContainerVT = VT;
3408 if (VT.isFixedLengthVector()) {
3409 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3410 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3411 }
3412
3413 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3414
3415 // Freeze the source since we are increasing the number of uses.
3416 Src = DAG.getFreeze(Src);
3417
3418 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3419 MVT MaskVT = Mask.getSimpleValueType();
3420 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3421 DAG.getVTList(MaskVT, MVT::Other),
3422 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3423 DAG.getUNDEF(MaskVT), Mask, VL});
3424 Chain = Unorder.getValue(1);
3425 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3426 DAG.getVTList(ContainerVT, MVT::Other),
3427 {Chain, Src, Src, Src, Unorder, VL});
3428 Chain = Src.getValue(1);
3429
3430 // We do the conversion on the absolute value and fix the sign at the end.
3431 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3432
3433 // Determine the largest integer that can be represented exactly. This and
3434 // values larger than it don't have any fractional bits so don't need to
3435 // be converted.
3436 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3437 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3438 APFloat MaxVal = APFloat(FltSem);
3439 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3440 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3441 SDValue MaxValNode =
3442 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3443 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3444 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3445
3446 // If abs(Src) was larger than MaxVal or nan, keep it.
3447 Mask = DAG.getNode(
3448 RISCVISD::SETCC_VL, DL, MaskVT,
3449 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3450
3451 // Truncate to integer and convert back to FP.
3452 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3453 MVT XLenVT = Subtarget.getXLenVT();
3454 SDValue Truncated;
3455
3456 switch (Op.getOpcode()) {
3457 default:
3458 llvm_unreachable("Unexpected opcode");
3459 case ISD::STRICT_FCEIL:
3460 case ISD::STRICT_FFLOOR:
3461 case ISD::STRICT_FROUND:
3465 Truncated = DAG.getNode(
3466 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3467 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3468 break;
3469 }
3470 case ISD::STRICT_FTRUNC:
3471 Truncated =
3472 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3473 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3474 break;
3476 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3477 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3478 Mask, VL);
3479 break;
3480 }
3481 Chain = Truncated.getValue(1);
3482
3483 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3484 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3485 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3486 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3487 Truncated, Mask, VL);
3488 Chain = Truncated.getValue(1);
3489 }
3490
3491 // Restore the original sign so that -0.0 is preserved.
3492 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3493 Src, Src, Mask, VL);
3494
3495 if (VT.isFixedLengthVector())
3496 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3497 return DAG.getMergeValues({Truncated, Chain}, DL);
3498}
3499
3500static SDValue
3502 const RISCVSubtarget &Subtarget) {
3503 MVT VT = Op.getSimpleValueType();
3504 if (VT.isVector())
3505 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3506
3507 if (DAG.shouldOptForSize())
3508 return SDValue();
3509
3510 SDLoc DL(Op);
3511 SDValue Src = Op.getOperand(0);
3512
3513 // Create an integer the size of the mantissa with the MSB set. This and all
3514 // values larger than it don't have any fractional bits so don't need to be
3515 // converted.
3516 const fltSemantics &FltSem = VT.getFltSemantics();
3517 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3518 APFloat MaxVal = APFloat(FltSem);
3519 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3520 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3521 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3522
3524 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3525 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3526}
3527
3528// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3530 const RISCVSubtarget &Subtarget) {
3531 SDLoc DL(Op);
3532 MVT DstVT = Op.getSimpleValueType();
3533 SDValue Src = Op.getOperand(0);
3534 MVT SrcVT = Src.getSimpleValueType();
3535 assert(SrcVT.isVector() && DstVT.isVector() &&
3536 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3537 "Unexpected type");
3538
3539 MVT DstContainerVT = DstVT;
3540 MVT SrcContainerVT = SrcVT;
3541
3542 if (DstVT.isFixedLengthVector()) {
3543 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3544 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3545 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3546 }
3547
3548 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3549
3550 // [b]f16 -> f32
3551 MVT SrcElemType = SrcVT.getVectorElementType();
3552 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3553 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3554 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3555 }
3556
3557 SDValue Res =
3558 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3559 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3560 Subtarget.getXLenVT()),
3561 VL);
3562
3563 if (!DstVT.isFixedLengthVector())
3564 return Res;
3565
3566 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3567}
3568
3569static SDValue
3571 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3572 SDValue Offset, SDValue Mask, SDValue VL,
3574 if (Passthru.isUndef())
3576 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3577 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3578 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3579}
3580
3581static SDValue
3582getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3583 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3584 SDValue VL,
3586 if (Passthru.isUndef())
3588 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3589 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3590 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3591}
3592
3596 int64_t Addend;
3597};
3598
3599static std::optional<APInt> getExactInteger(const APFloat &APF,
3601 // We will use a SINT_TO_FP to materialize this constant so we should use a
3602 // signed APSInt here.
3603 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3604 // We use an arbitrary rounding mode here. If a floating-point is an exact
3605 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3606 // the rounding mode changes the output value, then it is not an exact
3607 // integer.
3609 bool IsExact;
3610 // If it is out of signed integer range, it will return an invalid operation.
3611 // If it is not an exact integer, IsExact is false.
3612 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3614 !IsExact)
3615 return std::nullopt;
3616 return ValInt.extractBits(BitWidth, 0);
3617}
3618
3619// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3620// to the (non-zero) step S and start value X. This can be then lowered as the
3621// RVV sequence (VID * S) + X, for example.
3622// The step S is represented as an integer numerator divided by a positive
3623// denominator. Note that the implementation currently only identifies
3624// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3625// cannot detect 2/3, for example.
3626// Note that this method will also match potentially unappealing index
3627// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3628// determine whether this is worth generating code for.
3629//
3630// EltSizeInBits is the size of the type that the sequence will be calculated
3631// in, i.e. SEW for build_vectors or XLEN for address calculations.
3632static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3633 unsigned EltSizeInBits) {
3634 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3636 return std::nullopt;
3637 bool IsInteger = Op.getValueType().isInteger();
3638
3639 std::optional<unsigned> SeqStepDenom;
3640 std::optional<APInt> SeqStepNum;
3641 std::optional<APInt> SeqAddend;
3642 std::optional<std::pair<APInt, unsigned>> PrevElt;
3643 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3644
3645 // First extract the ops into a list of constant integer values. This may not
3646 // be possible for floats if they're not all representable as integers.
3647 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3648 const unsigned OpSize = Op.getScalarValueSizeInBits();
3649 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3650 if (Elt.isUndef()) {
3651 Elts[Idx] = std::nullopt;
3652 continue;
3653 }
3654 if (IsInteger) {
3655 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3656 } else {
3657 auto ExactInteger =
3658 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3659 if (!ExactInteger)
3660 return std::nullopt;
3661 Elts[Idx] = *ExactInteger;
3662 }
3663 }
3664
3665 for (auto [Idx, Elt] : enumerate(Elts)) {
3666 // Assume undef elements match the sequence; we just have to be careful
3667 // when interpolating across them.
3668 if (!Elt)
3669 continue;
3670
3671 if (PrevElt) {
3672 // Calculate the step since the last non-undef element, and ensure
3673 // it's consistent across the entire sequence.
3674 unsigned IdxDiff = Idx - PrevElt->second;
3675 APInt ValDiff = *Elt - PrevElt->first;
3676
3677 // A zero-value value difference means that we're somewhere in the middle
3678 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3679 // step change before evaluating the sequence.
3680 if (ValDiff == 0)
3681 continue;
3682
3683 int64_t Remainder = ValDiff.srem(IdxDiff);
3684 // Normalize the step if it's greater than 1.
3685 if (Remainder != ValDiff.getSExtValue()) {
3686 // The difference must cleanly divide the element span.
3687 if (Remainder != 0)
3688 return std::nullopt;
3689 ValDiff = ValDiff.sdiv(IdxDiff);
3690 IdxDiff = 1;
3691 }
3692
3693 if (!SeqStepNum)
3694 SeqStepNum = ValDiff;
3695 else if (ValDiff != SeqStepNum)
3696 return std::nullopt;
3697
3698 if (!SeqStepDenom)
3699 SeqStepDenom = IdxDiff;
3700 else if (IdxDiff != *SeqStepDenom)
3701 return std::nullopt;
3702 }
3703
3704 // Record this non-undef element for later.
3705 if (!PrevElt || PrevElt->first != *Elt)
3706 PrevElt = std::make_pair(*Elt, Idx);
3707 }
3708
3709 // We need to have logged a step for this to count as a legal index sequence.
3710 if (!SeqStepNum || !SeqStepDenom)
3711 return std::nullopt;
3712
3713 // Loop back through the sequence and validate elements we might have skipped
3714 // while waiting for a valid step. While doing this, log any sequence addend.
3715 for (auto [Idx, Elt] : enumerate(Elts)) {
3716 if (!Elt)
3717 continue;
3718 APInt ExpectedVal =
3719 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3720 *SeqStepNum)
3721 .sdiv(*SeqStepDenom);
3722
3723 APInt Addend = *Elt - ExpectedVal;
3724 if (!SeqAddend)
3725 SeqAddend = Addend;
3726 else if (Addend != SeqAddend)
3727 return std::nullopt;
3728 }
3729
3730 assert(SeqAddend && "Must have an addend if we have a step");
3731
3732 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3733 SeqAddend->getSExtValue()};
3734}
3735
3736// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3737// and lower it as a VRGATHER_VX_VL from the source vector.
3738static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3739 SelectionDAG &DAG,
3740 const RISCVSubtarget &Subtarget) {
3741 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3742 return SDValue();
3743 SDValue Src = SplatVal.getOperand(0);
3744 // Don't perform this optimization for i1 vectors, or if the element types are
3745 // different
3746 // FIXME: Support i1 vectors, maybe by promoting to i8?
3747 MVT EltTy = VT.getVectorElementType();
3748 if (EltTy == MVT::i1 ||
3749 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3750 return SDValue();
3751 MVT SrcVT = Src.getSimpleValueType();
3752 if (EltTy != SrcVT.getVectorElementType())
3753 return SDValue();
3754 SDValue Idx = SplatVal.getOperand(1);
3755 // The index must be a legal type.
3756 if (Idx.getValueType() != Subtarget.getXLenVT())
3757 return SDValue();
3758
3759 // Check that we know Idx lies within VT
3760 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3761 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3762 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3763 return SDValue();
3764 }
3765
3766 // Convert fixed length vectors to scalable
3767 MVT ContainerVT = VT;
3768 if (VT.isFixedLengthVector())
3769 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3770
3771 MVT SrcContainerVT = SrcVT;
3772 if (SrcVT.isFixedLengthVector()) {
3773 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3774 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3775 }
3776
3777 // Put Vec in a VT sized vector
3778 if (SrcContainerVT.getVectorMinNumElements() <
3779 ContainerVT.getVectorMinNumElements())
3780 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3781 else
3782 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3783
3784 // We checked that Idx fits inside VT earlier
3785 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3786 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3787 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3788 if (VT.isFixedLengthVector())
3789 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3790 return Gather;
3791}
3792
3794 const RISCVSubtarget &Subtarget) {
3795 MVT VT = Op.getSimpleValueType();
3796 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3797
3798 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3799
3800 SDLoc DL(Op);
3801 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3802
3803 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3804 int64_t StepNumerator = SimpleVID->StepNumerator;
3805 unsigned StepDenominator = SimpleVID->StepDenominator;
3806 int64_t Addend = SimpleVID->Addend;
3807
3808 assert(StepNumerator != 0 && "Invalid step");
3809 bool Negate = false;
3810 int64_t SplatStepVal = StepNumerator;
3811 unsigned StepOpcode = ISD::MUL;
3812 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3813 // anyway as the shift of 63 won't fit in uimm5.
3814 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3815 isPowerOf2_64(std::abs(StepNumerator))) {
3816 Negate = StepNumerator < 0;
3817 StepOpcode = ISD::SHL;
3818 SplatStepVal = Log2_64(std::abs(StepNumerator));
3819 }
3820
3821 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3822 // since it's the immediate value many RVV instructions accept. There is
3823 // no vmul.vi instruction so ensure multiply constant can fit in a
3824 // single addi instruction. For the addend, we allow up to 32 bits..
3825 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3826 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3827 isPowerOf2_32(StepDenominator) &&
3828 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3829 MVT VIDVT =
3831 MVT VIDContainerVT =
3832 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3833 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3834 // Convert right out of the scalable type so we can use standard ISD
3835 // nodes for the rest of the computation. If we used scalable types with
3836 // these, we'd lose the fixed-length vector info and generate worse
3837 // vsetvli code.
3838 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3839 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3840 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3841 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3842 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3843 }
3844 if (StepDenominator != 1) {
3845 SDValue SplatStep =
3846 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3847 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3848 }
3849 if (Addend != 0 || Negate) {
3850 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3851 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3852 VID);
3853 }
3854 if (VT.isFloatingPoint()) {
3855 // TODO: Use vfwcvt to reduce register pressure.
3856 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3857 }
3858 return VID;
3859 }
3860 }
3861
3862 return SDValue();
3863}
3864
3865/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3866/// which constitute a large proportion of the elements. In such cases we can
3867/// splat a vector with the dominant element and make up the shortfall with
3868/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3869/// Note that this includes vectors of 2 elements by association. The
3870/// upper-most element is the "dominant" one, allowing us to use a splat to
3871/// "insert" the upper element, and an insert of the lower element at position
3872/// 0, which improves codegen.
3874 const RISCVSubtarget &Subtarget) {
3875 MVT VT = Op.getSimpleValueType();
3876 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3877
3878 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3879
3880 SDLoc DL(Op);
3881 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3882
3883 MVT XLenVT = Subtarget.getXLenVT();
3884 unsigned NumElts = Op.getNumOperands();
3885
3886 SDValue DominantValue;
3887 unsigned MostCommonCount = 0;
3888 DenseMap<SDValue, unsigned> ValueCounts;
3889 unsigned NumUndefElts =
3890 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3891
3892 // Track the number of scalar loads we know we'd be inserting, estimated as
3893 // any non-zero floating-point constant. Other kinds of element are either
3894 // already in registers or are materialized on demand. The threshold at which
3895 // a vector load is more desirable than several scalar materializion and
3896 // vector-insertion instructions is not known.
3897 unsigned NumScalarLoads = 0;
3898
3899 for (SDValue V : Op->op_values()) {
3900 if (V.isUndef())
3901 continue;
3902
3903 unsigned &Count = ValueCounts[V];
3904 if (0 == Count)
3905 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3906 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3907
3908 // Is this value dominant? In case of a tie, prefer the highest element as
3909 // it's cheaper to insert near the beginning of a vector than it is at the
3910 // end.
3911 if (++Count >= MostCommonCount) {
3912 DominantValue = V;
3913 MostCommonCount = Count;
3914 }
3915 }
3916
3917 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3918 unsigned NumDefElts = NumElts - NumUndefElts;
3919 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3920
3921 // Don't perform this optimization when optimizing for size, since
3922 // materializing elements and inserting them tends to cause code bloat.
3923 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3924 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3925 ((MostCommonCount > DominantValueCountThreshold) ||
3926 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3927 // Start by splatting the most common element.
3928 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3929
3930 DenseSet<SDValue> Processed{DominantValue};
3931
3932 // We can handle an insert into the last element (of a splat) via
3933 // v(f)slide1down. This is slightly better than the vslideup insert
3934 // lowering as it avoids the need for a vector group temporary. It
3935 // is also better than using vmerge.vx as it avoids the need to
3936 // materialize the mask in a vector register.
3937 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3938 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3939 LastOp != DominantValue) {
3940 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3941 auto OpCode =
3942 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3943 if (!VT.isFloatingPoint())
3944 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3945 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3946 LastOp, Mask, VL);
3947 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3948 Processed.insert(LastOp);
3949 }
3950
3951 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3952 for (const auto &OpIdx : enumerate(Op->ops())) {
3953 const SDValue &V = OpIdx.value();
3954 if (V.isUndef() || !Processed.insert(V).second)
3955 continue;
3956 if (ValueCounts[V] == 1) {
3957 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3958 } else {
3959 // Blend in all instances of this value using a VSELECT, using a
3960 // mask where each bit signals whether that element is the one
3961 // we're after.
3963 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3964 return DAG.getConstant(V == V1, DL, XLenVT);
3965 });
3966 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3967 DAG.getBuildVector(SelMaskTy, DL, Ops),
3968 DAG.getSplatBuildVector(VT, DL, V), Vec);
3969 }
3970 }
3971
3972 return Vec;
3973 }
3974
3975 return SDValue();
3976}
3977
3979 const RISCVSubtarget &Subtarget) {
3980 MVT VT = Op.getSimpleValueType();
3981 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3982
3983 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3984
3985 SDLoc DL(Op);
3986 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3987
3988 MVT XLenVT = Subtarget.getXLenVT();
3989 unsigned NumElts = Op.getNumOperands();
3990
3991 if (VT.getVectorElementType() == MVT::i1) {
3992 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3993 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3994 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3995 }
3996
3997 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3998 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3999 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4000 }
4001
4002 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4003 // scalar integer chunks whose bit-width depends on the number of mask
4004 // bits and XLEN.
4005 // First, determine the most appropriate scalar integer type to use. This
4006 // is at most XLenVT, but may be shrunk to a smaller vector element type
4007 // according to the size of the final vector - use i8 chunks rather than
4008 // XLenVT if we're producing a v8i1. This results in more consistent
4009 // codegen across RV32 and RV64.
4010 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4011 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4012 // If we have to use more than one INSERT_VECTOR_ELT then this
4013 // optimization is likely to increase code size; avoid performing it in
4014 // such a case. We can use a load from a constant pool in this case.
4015 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4016 return SDValue();
4017 // Now we can create our integer vector type. Note that it may be larger
4018 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4019 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4020 MVT IntegerViaVecVT =
4021 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4022 IntegerViaVecElts);
4023
4024 uint64_t Bits = 0;
4025 unsigned BitPos = 0, IntegerEltIdx = 0;
4026 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4027
4028 for (unsigned I = 0; I < NumElts;) {
4029 SDValue V = Op.getOperand(I);
4030 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4031 Bits |= ((uint64_t)BitValue << BitPos);
4032 ++BitPos;
4033 ++I;
4034
4035 // Once we accumulate enough bits to fill our scalar type or process the
4036 // last element, insert into our vector and clear our accumulated data.
4037 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4038 if (NumViaIntegerBits <= 32)
4039 Bits = SignExtend64<32>(Bits);
4040 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4041 Elts[IntegerEltIdx] = Elt;
4042 Bits = 0;
4043 BitPos = 0;
4044 IntegerEltIdx++;
4045 }
4046 }
4047
4048 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4049
4050 if (NumElts < NumViaIntegerBits) {
4051 // If we're producing a smaller vector than our minimum legal integer
4052 // type, bitcast to the equivalent (known-legal) mask type, and extract
4053 // our final mask.
4054 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4055 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4056 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4057 } else {
4058 // Else we must have produced an integer type with the same size as the
4059 // mask type; bitcast for the final result.
4060 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4061 Vec = DAG.getBitcast(VT, Vec);
4062 }
4063
4064 return Vec;
4065 }
4066
4068 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4069 : RISCVISD::VMV_V_X_VL;
4070 if (!VT.isFloatingPoint())
4071 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4072 Splat =
4073 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4074 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4075 }
4076
4077 // Try and match index sequences, which we can lower to the vid instruction
4078 // with optional modifications. An all-undef vector is matched by
4079 // getSplatValue, above.
4080 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4081 return Res;
4082
4083 // For very small build_vectors, use a single scalar insert of a constant.
4084 // TODO: Base this on constant rematerialization cost, not size.
4085 const unsigned EltBitSize = VT.getScalarSizeInBits();
4086 if (VT.getSizeInBits() <= 32 &&
4088 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4089 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4090 "Unexpected sequence type");
4091 // If we can use the original VL with the modified element type, this
4092 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4093 // be moved into InsertVSETVLI?
4094 unsigned ViaVecLen =
4095 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4096 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4097
4098 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4099 uint64_t SplatValue = 0;
4100 // Construct the amalgamated value at this larger vector type.
4101 for (const auto &OpIdx : enumerate(Op->op_values())) {
4102 const auto &SeqV = OpIdx.value();
4103 if (!SeqV.isUndef())
4104 SplatValue |=
4105 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4106 }
4107
4108 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4109 // achieve better constant materializion.
4110 // On RV32, we need to sign-extend to use getSignedConstant.
4111 if (ViaIntVT == MVT::i32)
4112 SplatValue = SignExtend64<32>(SplatValue);
4113
4114 SDValue Vec = DAG.getInsertVectorElt(
4115 DL, DAG.getUNDEF(ViaVecVT),
4116 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4117 if (ViaVecLen != 1)
4118 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4119 return DAG.getBitcast(VT, Vec);
4120 }
4121
4122
4123 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4124 // when re-interpreted as a vector with a larger element type. For example,
4125 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4126 // could be instead splat as
4127 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4128 // TODO: This optimization could also work on non-constant splats, but it
4129 // would require bit-manipulation instructions to construct the splat value.
4130 SmallVector<SDValue> Sequence;
4131 const auto *BV = cast<BuildVectorSDNode>(Op);
4132 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4134 BV->getRepeatedSequence(Sequence) &&
4135 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4136 unsigned SeqLen = Sequence.size();
4137 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4138 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4139 ViaIntVT == MVT::i64) &&
4140 "Unexpected sequence type");
4141
4142 // If we can use the original VL with the modified element type, this
4143 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4144 // be moved into InsertVSETVLI?
4145 const unsigned RequiredVL = NumElts / SeqLen;
4146 const unsigned ViaVecLen =
4147 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4148 NumElts : RequiredVL;
4149 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4150
4151 unsigned EltIdx = 0;
4152 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4153 uint64_t SplatValue = 0;
4154 // Construct the amalgamated value which can be splatted as this larger
4155 // vector type.
4156 for (const auto &SeqV : Sequence) {
4157 if (!SeqV.isUndef())
4158 SplatValue |=
4159 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4160 EltIdx++;
4161 }
4162
4163 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4164 // achieve better constant materializion.
4165 // On RV32, we need to sign-extend to use getSignedConstant.
4166 if (ViaIntVT == MVT::i32)
4167 SplatValue = SignExtend64<32>(SplatValue);
4168
4169 // Since we can't introduce illegal i64 types at this stage, we can only
4170 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4171 // way we can use RVV instructions to splat.
4172 assert((ViaIntVT.bitsLE(XLenVT) ||
4173 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4174 "Unexpected bitcast sequence");
4175 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4176 SDValue ViaVL =
4177 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4178 MVT ViaContainerVT =
4179 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4180 SDValue Splat =
4181 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4182 DAG.getUNDEF(ViaContainerVT),
4183 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4184 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4185 if (ViaVecLen != RequiredVL)
4187 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4188 return DAG.getBitcast(VT, Splat);
4189 }
4190 }
4191
4192 // If the number of signbits allows, see if we can lower as a <N x i8>.
4193 // Our main goal here is to reduce LMUL (and thus work) required to
4194 // build the constant, but we will also narrow if the resulting
4195 // narrow vector is known to materialize cheaply.
4196 // TODO: We really should be costing the smaller vector. There are
4197 // profitable cases this misses.
4198 if (EltBitSize > 8 && VT.isInteger() &&
4199 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4200 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4201 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4202 DL, Op->ops());
4203 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4204 Source, DAG, Subtarget);
4205 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4206 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4207 }
4208
4209 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4210 return Res;
4211
4212 // For constant vectors, use generic constant pool lowering. Otherwise,
4213 // we'd have to materialize constants in GPRs just to move them into the
4214 // vector.
4215 return SDValue();
4216}
4217
4218static unsigned getPACKOpcode(unsigned DestBW,
4219 const RISCVSubtarget &Subtarget) {
4220 switch (DestBW) {
4221 default:
4222 llvm_unreachable("Unsupported pack size");
4223 case 16:
4224 return RISCV::PACKH;
4225 case 32:
4226 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4227 case 64:
4228 assert(Subtarget.is64Bit());
4229 return RISCV::PACK;
4230 }
4231}
4232
4233/// Double the element size of the build vector to reduce the number
4234/// of vslide1down in the build vector chain. In the worst case, this
4235/// trades three scalar operations for 1 vector operation. Scalar
4236/// operations are generally lower latency, and for out-of-order cores
4237/// we also benefit from additional parallelism.
4239 const RISCVSubtarget &Subtarget) {
4240 SDLoc DL(Op);
4241 MVT VT = Op.getSimpleValueType();
4242 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4243 MVT ElemVT = VT.getVectorElementType();
4244 if (!ElemVT.isInteger())
4245 return SDValue();
4246
4247 // TODO: Relax these architectural restrictions, possibly with costing
4248 // of the actual instructions required.
4249 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4250 return SDValue();
4251
4252 unsigned NumElts = VT.getVectorNumElements();
4253 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4254 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4255 NumElts % 2 != 0)
4256 return SDValue();
4257
4258 // Produce [B,A] packed into a type twice as wide. Note that all
4259 // scalars are XLenVT, possibly masked (see below).
4260 MVT XLenVT = Subtarget.getXLenVT();
4261 SDValue Mask = DAG.getConstant(
4262 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4263 auto pack = [&](SDValue A, SDValue B) {
4264 // Bias the scheduling of the inserted operations to near the
4265 // definition of the element - this tends to reduce register
4266 // pressure overall.
4267 SDLoc ElemDL(B);
4268 if (Subtarget.hasStdExtZbkb())
4269 // Note that we're relying on the high bits of the result being
4270 // don't care. For PACKW, the result is *sign* extended.
4271 return SDValue(
4272 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4273 ElemDL, XLenVT, A, B),
4274 0);
4275
4276 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4277 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4278 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4279 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4280 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4282 };
4283
4284 SmallVector<SDValue> NewOperands;
4285 NewOperands.reserve(NumElts / 2);
4286 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4287 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4288 assert(NumElts == NewOperands.size() * 2);
4289 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4290 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4291 return DAG.getNode(ISD::BITCAST, DL, VT,
4292 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4293}
4294
4296 const RISCVSubtarget &Subtarget) {
4297 MVT VT = Op.getSimpleValueType();
4298 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4299
4300 MVT EltVT = VT.getVectorElementType();
4301 MVT XLenVT = Subtarget.getXLenVT();
4302
4303 SDLoc DL(Op);
4304
4305 // Proper support for f16 requires Zvfh. bf16 always requires special
4306 // handling. We need to cast the scalar to integer and create an integer
4307 // build_vector.
4308 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4309 MVT IVT = VT.changeVectorElementType(MVT::i16);
4310 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4311 for (const auto &[I, U] : enumerate(Op->ops())) {
4312 SDValue Elem = U.get();
4313 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4314 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4315 // Called by LegalizeDAG, we need to use XLenVT operations since we
4316 // can't create illegal types.
4317 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4318 // Manually constant fold so the integer build_vector can be lowered
4319 // better. Waiting for DAGCombine will be too late.
4320 APInt V =
4321 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4322 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4323 } else {
4324 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4325 }
4326 } else {
4327 // Called by scalar type legalizer, we can use i16.
4328 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4329 }
4330 }
4331 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4332 return DAG.getBitcast(VT, Res);
4333 }
4334
4335 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4337 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4338
4339 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4340
4341 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4342
4343 if (VT.getVectorElementType() == MVT::i1) {
4344 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4345 // vector type, we have a legal equivalently-sized i8 type, so we can use
4346 // that.
4347 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4348 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4349
4350 SDValue WideVec;
4352 // For a splat, perform a scalar truncate before creating the wider
4353 // vector.
4354 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4355 DAG.getConstant(1, DL, Splat.getValueType()));
4356 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4357 } else {
4358 SmallVector<SDValue, 8> Ops(Op->op_values());
4359 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4360 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4361 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4362 }
4363
4364 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4365 }
4366
4368 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4369 return Gather;
4370
4371 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4372 // pressure at high LMUL.
4373 if (all_of(Op->ops().drop_front(),
4374 [](const SDUse &U) { return U.get().isUndef(); })) {
4375 unsigned Opc =
4376 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4377 if (!VT.isFloatingPoint())
4378 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4379 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4380 Splat, VL);
4381 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4382 }
4383
4384 unsigned Opc =
4385 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4386 if (!VT.isFloatingPoint())
4387 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4388 Splat =
4389 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4390 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4391 }
4392
4393 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4394 return Res;
4395
4396 // If we're compiling for an exact VLEN value, we can split our work per
4397 // register in the register group.
4398 if (const auto VLen = Subtarget.getRealVLen();
4399 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4400 MVT ElemVT = VT.getVectorElementType();
4401 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4402 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4403 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4404 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4405 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4406
4407 // The following semantically builds up a fixed length concat_vector
4408 // of the component build_vectors. We eagerly lower to scalable and
4409 // insert_subvector here to avoid DAG combining it back to a large
4410 // build_vector.
4411 SmallVector<SDValue> BuildVectorOps(Op->ops());
4412 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4413 SDValue Vec = DAG.getUNDEF(ContainerVT);
4414 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4415 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4416 SDValue SubBV =
4417 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4418 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4419 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4420 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4421 }
4422 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4423 }
4424
4425 // If we're about to resort to vslide1down (or stack usage), pack our
4426 // elements into the widest scalar type we can. This will force a VL/VTYPE
4427 // toggle, but reduces the critical path, the number of vslide1down ops
4428 // required, and possibly enables scalar folds of the values.
4429 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4430 return Res;
4431
4432 // For m1 vectors, if we have non-undef values in both halves of our vector,
4433 // split the vector into low and high halves, build them separately, then
4434 // use a vselect to combine them. For long vectors, this cuts the critical
4435 // path of the vslide1down sequence in half, and gives us an opportunity
4436 // to special case each half independently. Note that we don't change the
4437 // length of the sub-vectors here, so if both fallback to the generic
4438 // vslide1down path, we should be able to fold the vselect into the final
4439 // vslidedown (for the undef tail) for the first half w/ masking.
4440 unsigned NumElts = VT.getVectorNumElements();
4441 unsigned NumUndefElts =
4442 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4443 unsigned NumDefElts = NumElts - NumUndefElts;
4444 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4445 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4446 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4447 SmallVector<SDValue> MaskVals;
4448 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4449 SubVecAOps.reserve(NumElts);
4450 SubVecBOps.reserve(NumElts);
4451 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4452 SDValue Elem = U.get();
4453 if (Idx < NumElts / 2) {
4454 SubVecAOps.push_back(Elem);
4455 SubVecBOps.push_back(UndefElem);
4456 } else {
4457 SubVecAOps.push_back(UndefElem);
4458 SubVecBOps.push_back(Elem);
4459 }
4460 bool SelectMaskVal = (Idx < NumElts / 2);
4461 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4462 }
4463 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4464 MaskVals.size() == NumElts);
4465
4466 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4467 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4468 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4469 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4470 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4471 }
4472
4473 // Cap the cost at a value linear to the number of elements in the vector.
4474 // The default lowering is to use the stack. The vector store + scalar loads
4475 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4476 // being (at least) linear in LMUL. As a result, using the vslidedown
4477 // lowering for every element ends up being VL*LMUL..
4478 // TODO: Should we be directly costing the stack alternative? Doing so might
4479 // give us a more accurate upper bound.
4480 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4481
4482 // TODO: unify with TTI getSlideCost.
4483 InstructionCost PerSlideCost = 1;
4484 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4485 default: break;
4486 case RISCVVType::LMUL_2:
4487 PerSlideCost = 2;
4488 break;
4489 case RISCVVType::LMUL_4:
4490 PerSlideCost = 4;
4491 break;
4492 case RISCVVType::LMUL_8:
4493 PerSlideCost = 8;
4494 break;
4495 }
4496
4497 // TODO: Should we be using the build instseq then cost + evaluate scheme
4498 // we use for integer constants here?
4499 unsigned UndefCount = 0;
4500 for (const SDValue &V : Op->ops()) {
4501 if (V.isUndef()) {
4502 UndefCount++;
4503 continue;
4504 }
4505 if (UndefCount) {
4506 LinearBudget -= PerSlideCost;
4507 UndefCount = 0;
4508 }
4509 LinearBudget -= PerSlideCost;
4510 }
4511 if (UndefCount) {
4512 LinearBudget -= PerSlideCost;
4513 }
4514
4515 if (LinearBudget < 0)
4516 return SDValue();
4517
4518 assert((!VT.isFloatingPoint() ||
4519 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4520 "Illegal type which will result in reserved encoding");
4521
4522 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4523
4524 // General case: splat the first operand and slide other operands down one
4525 // by one to form a vector. Alternatively, if every operand is an
4526 // extraction from element 0 of a vector, we use that vector from the last
4527 // extraction as the start value and slide up instead of slide down. Such that
4528 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4529 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4530 // something we cannot do with vslide1down/vslidedown.
4531 // Of course, using vslide1up/vslideup might increase the register pressure,
4532 // and that's why we conservatively limit to cases where every operand is an
4533 // extraction from the first element.
4534 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4535 SDValue EVec;
4536 bool SlideUp = false;
4537 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4538 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4539 if (SlideUp)
4540 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4541 Mask, VL, Policy);
4542 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4543 Mask, VL, Policy);
4544 };
4545
4546 // The reason we don't use all_of here is because we're also capturing EVec
4547 // from the last non-undef operand. If the std::execution_policy of the
4548 // underlying std::all_of is anything but std::sequenced_policy we might
4549 // capture the wrong EVec.
4550 for (SDValue V : Operands) {
4551 using namespace SDPatternMatch;
4552 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4553 if (!SlideUp)
4554 break;
4555 }
4556
4557 if (SlideUp) {
4558 MVT EVecContainerVT = EVec.getSimpleValueType();
4559 // Make sure the original vector has scalable vector type.
4560 if (EVecContainerVT.isFixedLengthVector()) {
4561 EVecContainerVT =
4562 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4563 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4564 }
4565
4566 // Adapt EVec's type into ContainerVT.
4567 if (EVecContainerVT.getVectorMinNumElements() <
4568 ContainerVT.getVectorMinNumElements())
4569 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4570 else
4571 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4572
4573 // Reverse the elements as we're going to slide up from the last element.
4574 std::reverse(Operands.begin(), Operands.end());
4575 }
4576
4577 SDValue Vec;
4578 UndefCount = 0;
4579 for (SDValue V : Operands) {
4580 if (V.isUndef()) {
4581 UndefCount++;
4582 continue;
4583 }
4584
4585 // Start our sequence with either a TA splat or extract source in the
4586 // hopes that hardware is able to recognize there's no dependency on the
4587 // prior value of our temporary register.
4588 if (!Vec) {
4589 if (SlideUp) {
4590 Vec = EVec;
4591 } else {
4592 Vec = DAG.getSplatVector(VT, DL, V);
4593 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4594 }
4595
4596 UndefCount = 0;
4597 continue;
4598 }
4599
4600 if (UndefCount) {
4601 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4602 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4603 VL);
4604 UndefCount = 0;
4605 }
4606
4607 unsigned Opcode;
4608 if (VT.isFloatingPoint())
4609 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4610 else
4611 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4612
4613 if (!VT.isFloatingPoint())
4614 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4615 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4616 V, Mask, VL);
4617 }
4618 if (UndefCount) {
4619 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4620 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4621 VL);
4622 }
4623 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4624}
4625
4626static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4628 SelectionDAG &DAG) {
4629 if (!Passthru)
4630 Passthru = DAG.getUNDEF(VT);
4632 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4633 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4634 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4635 // node in order to try and match RVV vector/scalar instructions.
4636 if ((LoC >> 31) == HiC)
4637 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4638
4639 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4640 // VL. This can temporarily increase VL if VL less than VLMAX.
4641 if (LoC == HiC) {
4642 SDValue NewVL;
4643 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4644 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4645 else
4646 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4647 MVT InterVT =
4648 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4649 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4650 DAG.getUNDEF(InterVT), Lo, NewVL);
4651 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4652 }
4653 }
4654
4655 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4656 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4657 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4658 Hi.getConstantOperandVal(1) == 31)
4659 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4660
4661 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4662 // even if it might be sign extended.
4663 if (Hi.isUndef())
4664 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4665
4666 // Fall back to a stack store and stride x0 vector load.
4667 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4668 Hi, VL);
4669}
4670
4671// Called by type legalization to handle splat of i64 on RV32.
4672// FIXME: We can optimize this when the type has sign or zero bits in one
4673// of the halves.
4674static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4675 SDValue Scalar, SDValue VL,
4676 SelectionDAG &DAG) {
4677 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4678 SDValue Lo, Hi;
4679 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4680 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4681}
4682
4683// This function lowers a splat of a scalar operand Splat with the vector
4684// length VL. It ensures the final sequence is type legal, which is useful when
4685// lowering a splat after type legalization.
4686static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4687 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4688 const RISCVSubtarget &Subtarget) {
4689 bool HasPassthru = Passthru && !Passthru.isUndef();
4690 if (!HasPassthru && !Passthru)
4691 Passthru = DAG.getUNDEF(VT);
4692
4693 MVT EltVT = VT.getVectorElementType();
4694 MVT XLenVT = Subtarget.getXLenVT();
4695
4696 if (VT.isFloatingPoint()) {
4697 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4698 EltVT == MVT::bf16) {
4699 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4700 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4701 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4702 else
4703 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4704 MVT IVT = VT.changeVectorElementType(MVT::i16);
4705 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4706 SDValue Splat =
4707 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4708 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4709 }
4710 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4711 }
4712
4713 // Simplest case is that the operand needs to be promoted to XLenVT.
4714 if (Scalar.getValueType().bitsLE(XLenVT)) {
4715 // If the operand is a constant, sign extend to increase our chances
4716 // of being able to use a .vi instruction. ANY_EXTEND would become a
4717 // a zero extend and the simm5 check in isel would fail.
4718 // FIXME: Should we ignore the upper bits in isel instead?
4719 unsigned ExtOpc =
4721 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4722 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4723 }
4724
4725 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4726 "Unexpected scalar for splat lowering!");
4727
4728 if (isOneConstant(VL) && isNullConstant(Scalar))
4729 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4730 DAG.getConstant(0, DL, XLenVT), VL);
4731
4732 // Otherwise use the more complicated splatting algorithm.
4733 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4734}
4735
4736// This function lowers an insert of a scalar operand Scalar into lane
4737// 0 of the vector regardless of the value of VL. The contents of the
4738// remaining lanes of the result vector are unspecified. VL is assumed
4739// to be non-zero.
4741 const SDLoc &DL, SelectionDAG &DAG,
4742 const RISCVSubtarget &Subtarget) {
4743 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4744
4745 const MVT XLenVT = Subtarget.getXLenVT();
4746 SDValue Passthru = DAG.getUNDEF(VT);
4747
4748 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4749 isNullConstant(Scalar.getOperand(1))) {
4750 SDValue ExtractedVal = Scalar.getOperand(0);
4751 // The element types must be the same.
4752 if (ExtractedVal.getValueType().getVectorElementType() ==
4753 VT.getVectorElementType()) {
4754 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4755 MVT ExtractedContainerVT = ExtractedVT;
4756 if (ExtractedContainerVT.isFixedLengthVector()) {
4757 ExtractedContainerVT = getContainerForFixedLengthVector(
4758 DAG, ExtractedContainerVT, Subtarget);
4759 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4760 ExtractedVal, DAG, Subtarget);
4761 }
4762 if (ExtractedContainerVT.bitsLE(VT))
4763 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4764 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4765 }
4766 }
4767
4768 if (VT.isFloatingPoint())
4769 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4770 VL);
4771
4772 // Avoid the tricky legalization cases by falling back to using the
4773 // splat code which already handles it gracefully.
4774 if (!Scalar.getValueType().bitsLE(XLenVT))
4775 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4776 DAG.getConstant(1, DL, XLenVT),
4777 VT, DL, DAG, Subtarget);
4778
4779 // If the operand is a constant, sign extend to increase our chances
4780 // of being able to use a .vi instruction. ANY_EXTEND would become a
4781 // a zero extend and the simm5 check in isel would fail.
4782 // FIXME: Should we ignore the upper bits in isel instead?
4783 unsigned ExtOpc =
4785 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4786 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4787 VL);
4788}
4789
4790/// If concat_vector(V1,V2) could be folded away to some existing
4791/// vector source, return it. Note that the source may be larger
4792/// than the requested concat_vector (i.e. a extract_subvector
4793/// might be required.)
4795 EVT VT = V1.getValueType();
4796 assert(VT == V2.getValueType() && "argument types must match");
4797 // Both input must be extracts.
4798 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4800 return SDValue();
4801
4802 // Extracting from the same source.
4803 SDValue Src = V1.getOperand(0);
4804 if (Src != V2.getOperand(0) ||
4805 VT.isScalableVector() != Src.getValueType().isScalableVector())
4806 return SDValue();
4807
4808 // The extracts must extract the two halves of the source.
4809 if (V1.getConstantOperandVal(1) != 0 ||
4811 return SDValue();
4812
4813 return Src;
4814}
4815
4816// Can this shuffle be performed on exactly one (possibly larger) input?
4818
4819 if (V2.isUndef())
4820 return V1;
4821
4822 unsigned NumElts = VT.getVectorNumElements();
4823 // Src needs to have twice the number of elements.
4824 // TODO: Update shuffle lowering to add the extract subvector
4825 if (SDValue Src = foldConcatVector(V1, V2);
4826 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4827 return Src;
4828
4829 return SDValue();
4830}
4831
4832/// Is this shuffle interleaving contiguous elements from one vector into the
4833/// even elements and contiguous elements from another vector into the odd
4834/// elements. \p EvenSrc will contain the element that should be in the first
4835/// even element. \p OddSrc will contain the element that should be in the first
4836/// odd element. These can be the first element in a source or the element half
4837/// way through the source.
4838static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4839 int &OddSrc, const RISCVSubtarget &Subtarget) {
4840 // We need to be able to widen elements to the next larger integer type or
4841 // use the zip2a instruction at e64.
4842 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4843 !Subtarget.hasVendorXRivosVizip())
4844 return false;
4845
4846 int Size = Mask.size();
4847 int NumElts = VT.getVectorNumElements();
4848 assert(Size == (int)NumElts && "Unexpected mask size");
4849
4850 SmallVector<unsigned, 2> StartIndexes;
4851 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4852 return false;
4853
4854 EvenSrc = StartIndexes[0];
4855 OddSrc = StartIndexes[1];
4856
4857 // One source should be low half of first vector.
4858 if (EvenSrc != 0 && OddSrc != 0)
4859 return false;
4860
4861 // Subvectors will be subtracted from either at the start of the two input
4862 // vectors, or at the start and middle of the first vector if it's an unary
4863 // interleave.
4864 // In both cases, HalfNumElts will be extracted.
4865 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4866 // we'll create an illegal extract_subvector.
4867 // FIXME: We could support other values using a slidedown first.
4868 int HalfNumElts = NumElts / 2;
4869 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4870}
4871
4872/// Is this mask representing a masked combination of two slides?
4874 std::array<std::pair<int, int>, 2> &SrcInfo) {
4875 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4876 return false;
4877
4878 // Avoid matching vselect idioms
4879 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4880 return false;
4881 // Prefer vslideup as the second instruction, and identity
4882 // only as the initial instruction.
4883 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4884 SrcInfo[1].second == 0)
4885 std::swap(SrcInfo[0], SrcInfo[1]);
4886 assert(SrcInfo[0].first != -1 && "Must find one slide");
4887 return true;
4888}
4889
4890// Exactly matches the semantics of a previously existing custom matcher
4891// to allow migration to new matcher without changing output.
4892static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4893 unsigned NumElts) {
4894 if (SrcInfo[1].first == -1)
4895 return true;
4896 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4897 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4898}
4899
4900static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4901 ArrayRef<int> Mask, unsigned Factor,
4902 bool RequiredPolarity) {
4903 int NumElts = Mask.size();
4904 for (const auto &[Idx, M] : enumerate(Mask)) {
4905 if (M < 0)
4906 continue;
4907 int Src = M >= NumElts;
4908 int Diff = (int)Idx - (M % NumElts);
4909 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4910 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4911 "Must match exactly one of the two slides");
4912 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4913 return false;
4914 }
4915 return true;
4916}
4917
4918/// Given a shuffle which can be represented as a pair of two slides,
4919/// see if it is a zipeven idiom. Zipeven is:
4920/// vs2: a0 a1 a2 a3
4921/// vs1: b0 b1 b2 b3
4922/// vd: a0 b0 a2 b2
4923static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4924 ArrayRef<int> Mask, unsigned &Factor) {
4925 Factor = SrcInfo[1].second;
4926 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4927 Mask.size() % Factor == 0 &&
4928 isAlternating(SrcInfo, Mask, Factor, true);
4929}
4930
4931/// Given a shuffle which can be represented as a pair of two slides,
4932/// see if it is a zipodd idiom. Zipodd is:
4933/// vs2: a0 a1 a2 a3
4934/// vs1: b0 b1 b2 b3
4935/// vd: a1 b1 a3 b3
4936/// Note that the operand order is swapped due to the way we canonicalize
4937/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4938static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4939 ArrayRef<int> Mask, unsigned &Factor) {
4940 Factor = -SrcInfo[1].second;
4941 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4942 Mask.size() % Factor == 0 &&
4943 isAlternating(SrcInfo, Mask, Factor, false);
4944}
4945
4946// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4947// 2, 4, 8 and the integer type Factor-times larger than VT's
4948// element type must be a legal element type.
4949// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4950// -> [p, q, r, s] (Factor=2, Index=1)
4952 SDValue Src, unsigned Factor,
4953 unsigned Index, SelectionDAG &DAG) {
4954 unsigned EltBits = VT.getScalarSizeInBits();
4955 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4956 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4957 SrcEC.divideCoefficientBy(Factor));
4958 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4959 SrcEC.divideCoefficientBy(Factor));
4960 Src = DAG.getBitcast(WideSrcVT, Src);
4961
4962 unsigned Shift = Index * EltBits;
4963 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4964 DAG.getConstant(Shift, DL, WideSrcVT));
4965 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4967 Res = DAG.getBitcast(CastVT, Res);
4968 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4969}
4970
4971/// Match a single source shuffle which is an identity except that some
4972/// particular element is repeated. This can be lowered as a masked
4973/// vrgather.vi/vx. Note that the two source form of this is handled
4974/// by the recursive splitting logic and doesn't need special handling.
4976 const RISCVSubtarget &Subtarget,
4977 SelectionDAG &DAG) {
4978
4979 SDLoc DL(SVN);
4980 MVT VT = SVN->getSimpleValueType(0);
4981 SDValue V1 = SVN->getOperand(0);
4982 assert(SVN->getOperand(1).isUndef());
4983 ArrayRef<int> Mask = SVN->getMask();
4984 const unsigned NumElts = VT.getVectorNumElements();
4985 MVT XLenVT = Subtarget.getXLenVT();
4986
4987 std::optional<int> SplatIdx;
4988 for (auto [I, M] : enumerate(Mask)) {
4989 if (M == -1 || I == (unsigned)M)
4990 continue;
4991 if (SplatIdx && *SplatIdx != M)
4992 return SDValue();
4993 SplatIdx = M;
4994 }
4995
4996 if (!SplatIdx)
4997 return SDValue();
4998
4999 SmallVector<SDValue> MaskVals;
5000 for (int MaskIndex : Mask) {
5001 bool SelectMaskVal = MaskIndex == *SplatIdx;
5002 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5003 }
5004 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5005 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5006 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5007 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5008 SmallVector<int>(NumElts, *SplatIdx));
5009 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5010}
5011
5012// Lower the following shuffle to vslidedown.
5013// a)
5014// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5015// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5016// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5017// b)
5018// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5019// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5020// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5021// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5022// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5023// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5025 SDValue V1, SDValue V2,
5026 ArrayRef<int> Mask,
5027 const RISCVSubtarget &Subtarget,
5028 SelectionDAG &DAG) {
5029 auto findNonEXTRACT_SUBVECTORParent =
5030 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5031 uint64_t Offset = 0;
5032 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5033 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5034 // a scalable vector. But we don't want to match the case.
5035 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5036 Offset += Parent.getConstantOperandVal(1);
5037 Parent = Parent.getOperand(0);
5038 }
5039 return std::make_pair(Parent, Offset);
5040 };
5041
5042 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5043 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5044
5045 // Extracting from the same source.
5046 SDValue Src = V1Src;
5047 if (Src != V2Src)
5048 return SDValue();
5049
5050 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5051 SmallVector<int, 16> NewMask(Mask);
5052 for (size_t i = 0; i != NewMask.size(); ++i) {
5053 if (NewMask[i] == -1)
5054 continue;
5055
5056 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5057 NewMask[i] = NewMask[i] + V1IndexOffset;
5058 } else {
5059 // Minus NewMask.size() is needed. Otherwise, the b case would be
5060 // <5,6,7,12> instead of <5,6,7,8>.
5061 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5062 }
5063 }
5064
5065 // First index must be known and non-zero. It will be used as the slidedown
5066 // amount.
5067 if (NewMask[0] <= 0)
5068 return SDValue();
5069
5070 // NewMask is also continuous.
5071 for (unsigned i = 1; i != NewMask.size(); ++i)
5072 if (NewMask[i - 1] + 1 != NewMask[i])
5073 return SDValue();
5074
5075 MVT XLenVT = Subtarget.getXLenVT();
5076 MVT SrcVT = Src.getSimpleValueType();
5077 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5078 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5079 SDValue Slidedown =
5080 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5081 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5082 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5083 return DAG.getExtractSubvector(
5084 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5085}
5086
5087// Because vslideup leaves the destination elements at the start intact, we can
5088// use it to perform shuffles that insert subvectors:
5089//
5090// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5091// ->
5092// vsetvli zero, 8, e8, mf2, ta, ma
5093// vslideup.vi v8, v9, 4
5094//
5095// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5096// ->
5097// vsetvli zero, 5, e8, mf2, tu, ma
5098// vslideup.v1 v8, v9, 2
5100 SDValue V1, SDValue V2,
5101 ArrayRef<int> Mask,
5102 const RISCVSubtarget &Subtarget,
5103 SelectionDAG &DAG) {
5104 unsigned NumElts = VT.getVectorNumElements();
5105 int NumSubElts, Index;
5106 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5107 Index))
5108 return SDValue();
5109
5110 bool OpsSwapped = Mask[Index] < (int)NumElts;
5111 SDValue InPlace = OpsSwapped ? V2 : V1;
5112 SDValue ToInsert = OpsSwapped ? V1 : V2;
5113
5114 MVT XLenVT = Subtarget.getXLenVT();
5115 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5116 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5117 // We slide up by the index that the subvector is being inserted at, and set
5118 // VL to the index + the number of elements being inserted.
5119 unsigned Policy =
5121 // If the we're adding a suffix to the in place vector, i.e. inserting right
5122 // up to the very end of it, then we don't actually care about the tail.
5123 if (NumSubElts + Index >= (int)NumElts)
5124 Policy |= RISCVVType::TAIL_AGNOSTIC;
5125
5126 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5127 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5128 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5129
5130 SDValue Res;
5131 // If we're inserting into the lowest elements, use a tail undisturbed
5132 // vmv.v.v.
5133 if (Index == 0)
5134 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5135 VL);
5136 else
5137 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5138 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5139 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5140}
5141
5142/// Match v(f)slide1up/down idioms. These operations involve sliding
5143/// N-1 elements to make room for an inserted scalar at one end.
5145 SDValue V1, SDValue V2,
5146 ArrayRef<int> Mask,
5147 const RISCVSubtarget &Subtarget,
5148 SelectionDAG &DAG) {
5149 bool OpsSwapped = false;
5150 if (!isa<BuildVectorSDNode>(V1)) {
5151 if (!isa<BuildVectorSDNode>(V2))
5152 return SDValue();
5153 std::swap(V1, V2);
5154 OpsSwapped = true;
5155 }
5156 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5157 if (!Splat)
5158 return SDValue();
5159
5160 // Return true if the mask could describe a slide of Mask.size() - 1
5161 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5162 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5163 const unsigned S = (Offset > 0) ? 0 : -Offset;
5164 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5165 for (unsigned i = S; i != E; ++i)
5166 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5167 return false;
5168 return true;
5169 };
5170
5171 const unsigned NumElts = VT.getVectorNumElements();
5172 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5173 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5174 return SDValue();
5175
5176 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5177 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5178 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5179 return SDValue();
5180
5181 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5182 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5183
5184 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5185 // vslide1{down,up}.vx instead.
5186 if (VT.getVectorElementType() == MVT::bf16 ||
5187 (VT.getVectorElementType() == MVT::f16 &&
5188 !Subtarget.hasVInstructionsF16())) {
5189 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5190 Splat =
5191 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5192 V2 = DAG.getBitcast(
5193 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5194 SDValue Vec = DAG.getNode(
5195 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5196 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5197 Vec = DAG.getBitcast(ContainerVT, Vec);
5198 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5199 }
5200
5201 auto OpCode = IsVSlidedown ?
5202 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5203 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5204 if (!VT.isFloatingPoint())
5205 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5206 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5207 DAG.getUNDEF(ContainerVT),
5208 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5209 Splat, TrueMask, VL);
5210 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5211}
5212
5213/// Match a mask which "spreads" the leading elements of a vector evenly
5214/// across the result. Factor is the spread amount, and Index is the
5215/// offset applied. (on success, Index < Factor) This is the inverse
5216/// of a deinterleave with the same Factor and Index. This is analogous
5217/// to an interleave, except that all but one lane is undef.
5219 unsigned &Index) {
5220 SmallVector<bool> LaneIsUndef(Factor, true);
5221 for (unsigned i = 0; i < Mask.size(); i++)
5222 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5223
5224 bool Found = false;
5225 for (unsigned i = 0; i < Factor; i++) {
5226 if (LaneIsUndef[i])
5227 continue;
5228 if (Found)
5229 return false;
5230 Index = i;
5231 Found = true;
5232 }
5233 if (!Found)
5234 return false;
5235
5236 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5237 unsigned j = i * Factor + Index;
5238 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5239 return false;
5240 }
5241 return true;
5242}
5243
5244static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5245 const SDLoc &DL, SelectionDAG &DAG,
5246 const RISCVSubtarget &Subtarget) {
5247 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5248 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5249 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5251
5252 MVT VT = Op0.getSimpleValueType();
5254 Op0 = DAG.getBitcast(IntVT, Op0);
5255 Op1 = DAG.getBitcast(IntVT, Op1);
5256
5257 MVT ContainerVT = IntVT;
5258 if (VT.isFixedLengthVector()) {
5259 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5260 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5261 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5262 }
5263
5264 MVT InnerVT = ContainerVT;
5265 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5266 if (Op1.isUndef() &&
5267 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5268 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5269 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5270 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5271 Subtarget.getXLenVT());
5272 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5273 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5274 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5275 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5276 }
5277
5278 SDValue Passthru = DAG.getUNDEF(InnerVT);
5279 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5280 if (InnerVT.bitsLT(ContainerVT))
5281 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5282 if (IntVT.isFixedLengthVector())
5283 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5284 Res = DAG.getBitcast(VT, Res);
5285 return Res;
5286}
5287
5288// Given a vector a, b, c, d return a vector Factor times longer
5289// with Factor-1 undef's between elements. Ex:
5290// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5291// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5292static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5293 const SDLoc &DL, SelectionDAG &DAG) {
5294
5295 MVT VT = V.getSimpleValueType();
5296 unsigned EltBits = VT.getScalarSizeInBits();
5298 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5299
5300 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5301
5302 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5303 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5304 // allow the SHL to fold away if Index is 0.
5305 if (Index != 0)
5306 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5307 DAG.getConstant(EltBits * Index, DL, WideVT));
5308 // Make sure to use original element type
5310 EC.multiplyCoefficientBy(Factor));
5311 return DAG.getBitcast(ResultVT, Result);
5312}
5313
5314// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5315// to create an interleaved vector of <[vscale x] n*2 x ty>.
5316// This requires that the size of ty is less than the subtarget's maximum ELEN.
5318 const SDLoc &DL, SelectionDAG &DAG,
5319 const RISCVSubtarget &Subtarget) {
5320
5321 // FIXME: Not only does this optimize the code, it fixes some correctness
5322 // issues because MIR does not have freeze.
5323 if (EvenV.isUndef())
5324 return getWideningSpread(OddV, 2, 1, DL, DAG);
5325 if (OddV.isUndef())
5326 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5327
5328 MVT VecVT = EvenV.getSimpleValueType();
5329 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5330 // Convert fixed vectors to scalable if needed
5331 if (VecContainerVT.isFixedLengthVector()) {
5332 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5333 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5334 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5335 }
5336
5337 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5338
5339 // We're working with a vector of the same size as the resulting
5340 // interleaved vector, but with half the number of elements and
5341 // twice the SEW (Hence the restriction on not using the maximum
5342 // ELEN)
5343 MVT WideVT =
5345 VecVT.getVectorElementCount());
5346 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5347 if (WideContainerVT.isFixedLengthVector())
5348 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5349
5350 // Bitcast the input vectors to integers in case they are FP
5351 VecContainerVT = VecContainerVT.changeTypeToInteger();
5352 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5353 OddV = DAG.getBitcast(VecContainerVT, OddV);
5354
5355 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5356 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5357
5358 SDValue Interleaved;
5359 if (Subtarget.hasStdExtZvbb()) {
5360 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5361 SDValue OffsetVec =
5362 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5363 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5364 OffsetVec, Passthru, Mask, VL);
5365 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5366 Interleaved, EvenV, Passthru, Mask, VL);
5367 } else {
5368 // FIXME: We should freeze the odd vector here. We already handled the case
5369 // of provably undef/poison above.
5370
5371 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5372 // vwaddu.vv
5373 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5374 OddV, Passthru, Mask, VL);
5375
5376 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5377 SDValue AllOnesVec = DAG.getSplatVector(
5378 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5379 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5380 OddV, AllOnesVec, Passthru, Mask, VL);
5381
5382 // Add the two together so we get
5383 // (OddV * 0xff...ff) + (OddV + EvenV)
5384 // = (OddV * 0x100...00) + EvenV
5385 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5386 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5387 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5388 Interleaved, OddsMul, Passthru, Mask, VL);
5389 }
5390
5391 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5392 MVT ResultContainerVT = MVT::getVectorVT(
5393 VecVT.getVectorElementType(), // Make sure to use original type
5394 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5395 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5396
5397 // Convert back to a fixed vector if needed
5398 MVT ResultVT =
5401 if (ResultVT.isFixedLengthVector())
5402 Interleaved =
5403 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5404
5405 return Interleaved;
5406}
5407
5408// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5409// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5411 SelectionDAG &DAG,
5412 const RISCVSubtarget &Subtarget) {
5413 SDLoc DL(SVN);
5414 MVT VT = SVN->getSimpleValueType(0);
5415 SDValue V = SVN->getOperand(0);
5416 unsigned NumElts = VT.getVectorNumElements();
5417
5418 assert(VT.getVectorElementType() == MVT::i1);
5419
5421 SVN->getMask().size()) ||
5422 !SVN->getOperand(1).isUndef())
5423 return SDValue();
5424
5425 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5426 EVT ViaVT = EVT::getVectorVT(
5427 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5428 EVT ViaBitVT =
5429 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5430
5431 // If we don't have zvbb or the larger element type > ELEN, the operation will
5432 // be illegal.
5434 ViaVT) ||
5435 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5436 return SDValue();
5437
5438 // If the bit vector doesn't fit exactly into the larger element type, we need
5439 // to insert it into the larger vector and then shift up the reversed bits
5440 // afterwards to get rid of the gap introduced.
5441 if (ViaEltSize > NumElts)
5442 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5443
5444 SDValue Res =
5445 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5446
5447 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5448 // element type.
5449 if (ViaEltSize > NumElts)
5450 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5451 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5452
5453 Res = DAG.getBitcast(ViaBitVT, Res);
5454
5455 if (ViaEltSize > NumElts)
5456 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5457 return Res;
5458}
5459
5461 const RISCVSubtarget &Subtarget,
5462 MVT &RotateVT, unsigned &RotateAmt) {
5463 unsigned NumElts = VT.getVectorNumElements();
5464 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5465 unsigned NumSubElts;
5466 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5467 NumElts, NumSubElts, RotateAmt))
5468 return false;
5469 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5470 NumElts / NumSubElts);
5471
5472 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5473 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5474}
5475
5476// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5477// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5478// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5480 SelectionDAG &DAG,
5481 const RISCVSubtarget &Subtarget) {
5482 SDLoc DL(SVN);
5483
5484 EVT VT = SVN->getValueType(0);
5485 unsigned RotateAmt;
5486 MVT RotateVT;
5487 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5488 return SDValue();
5489
5490 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5491
5492 SDValue Rotate;
5493 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5494 // so canonicalize to vrev8.
5495 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5496 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5497 else
5498 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5499 DAG.getConstant(RotateAmt, DL, RotateVT));
5500
5501 return DAG.getBitcast(VT, Rotate);
5502}
5503
5504// If compiling with an exactly known VLEN, see if we can split a
5505// shuffle on m2 or larger into a small number of m1 sized shuffles
5506// which write each destination registers exactly once.
5508 SelectionDAG &DAG,
5509 const RISCVSubtarget &Subtarget) {
5510 SDLoc DL(SVN);
5511 MVT VT = SVN->getSimpleValueType(0);
5512 SDValue V1 = SVN->getOperand(0);
5513 SDValue V2 = SVN->getOperand(1);
5514 ArrayRef<int> Mask = SVN->getMask();
5515
5516 // If we don't know exact data layout, not much we can do. If this
5517 // is already m1 or smaller, no point in splitting further.
5518 const auto VLen = Subtarget.getRealVLen();
5519 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5520 return SDValue();
5521
5522 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5523 // expansion for.
5524 unsigned RotateAmt;
5525 MVT RotateVT;
5526 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5527 return SDValue();
5528
5529 MVT ElemVT = VT.getVectorElementType();
5530 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5531
5532 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5533 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5534 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5535 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5536 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5537 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5538 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5539 unsigned NumOfDestRegs = NumElts / NumOpElts;
5540 // The following semantically builds up a fixed length concat_vector
5541 // of the component shuffle_vectors. We eagerly lower to scalable here
5542 // to avoid DAG combining it back to a large shuffle_vector again.
5543 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5544 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5546 Operands;
5548 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5549 [&]() { Operands.emplace_back(); },
5550 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5551 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5552 SmallVector<int>(SrcSubMask));
5553 },
5554 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5555 if (NewReg)
5556 Operands.emplace_back();
5557 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5558 });
5559 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5560 // Note: check that we do not emit too many shuffles here to prevent code
5561 // size explosion.
5562 // TODO: investigate, if it can be improved by extra analysis of the masks to
5563 // check if the code is more profitable.
5564 unsigned NumShuffles = std::accumulate(
5565 Operands.begin(), Operands.end(), 0u,
5566 [&](unsigned N,
5567 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5568 if (Data.empty())
5569 return N;
5570 N += Data.size();
5571 for (const auto &P : Data) {
5572 unsigned Idx2 = std::get<1>(P);
5573 ArrayRef<int> Mask = std::get<2>(P);
5574 if (Idx2 != UINT_MAX)
5575 ++N;
5576 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5577 --N;
5578 }
5579 return N;
5580 });
5581 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5582 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5583 return SDValue();
5584 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5585 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5586 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5587 return SubVec;
5588 };
5589 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5591 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5592 return SubVec;
5593 };
5594 SDValue Vec = DAG.getUNDEF(ContainerVT);
5595 for (auto [I, Data] : enumerate(Operands)) {
5596 if (Data.empty())
5597 continue;
5599 for (unsigned I : seq<unsigned>(Data.size())) {
5600 const auto &[Idx1, Idx2, _] = Data[I];
5601 // If the shuffle contains permutation of odd number of elements,
5602 // Idx1 might be used already in the first iteration.
5603 //
5604 // Idx1 = shuffle Idx1, Idx2
5605 // Idx1 = shuffle Idx1, Idx3
5606 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5607 if (!V)
5608 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5609 (Idx1 % NumOfSrcRegs) * NumOpElts);
5610 if (Idx2 != UINT_MAX) {
5611 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5612 if (!V)
5613 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5614 (Idx2 % NumOfSrcRegs) * NumOpElts);
5615 }
5616 }
5617 SDValue V;
5618 for (const auto &[Idx1, Idx2, Mask] : Data) {
5619 SDValue V1 = Values.at(Idx1);
5620 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5621 V = PerformShuffle(V1, V2, Mask);
5622 Values[Idx1] = V;
5623 }
5624
5625 unsigned InsertIdx = I * NumOpElts;
5626 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5627 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5628 }
5629 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5630}
5631
5632// Matches a subset of compress masks with a contiguous prefix of output
5633// elements. This could be extended to allow gaps by deciding which
5634// source elements to spuriously demand.
5636 int Last = -1;
5637 bool SawUndef = false;
5638 for (const auto &[Idx, M] : enumerate(Mask)) {
5639 if (M == -1) {
5640 SawUndef = true;
5641 continue;
5642 }
5643 if (SawUndef)
5644 return false;
5645 if (Idx > (unsigned)M)
5646 return false;
5647 if (M <= Last)
5648 return false;
5649 Last = M;
5650 }
5651 return true;
5652}
5653
5654/// Given a shuffle where the indices are disjoint between the two sources,
5655/// e.g.:
5656///
5657/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5658///
5659/// Merge the two sources into one and do a single source shuffle:
5660///
5661/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5662/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5663///
5664/// A vselect will either be merged into a masked instruction or be lowered as a
5665/// vmerge.vvm, which is cheaper than a vrgather.vv.
5667 SelectionDAG &DAG,
5668 const RISCVSubtarget &Subtarget) {
5669 MVT VT = SVN->getSimpleValueType(0);
5670 MVT XLenVT = Subtarget.getXLenVT();
5671 SDLoc DL(SVN);
5672
5673 const ArrayRef<int> Mask = SVN->getMask();
5674
5675 // Work out which source each lane will come from.
5676 SmallVector<int, 16> Srcs(Mask.size(), -1);
5677
5678 for (int Idx : Mask) {
5679 if (Idx == -1)
5680 continue;
5681 unsigned SrcIdx = Idx % Mask.size();
5682 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5683 if (Srcs[SrcIdx] == -1)
5684 // Mark this source as using this lane.
5685 Srcs[SrcIdx] = Src;
5686 else if (Srcs[SrcIdx] != Src)
5687 // The other source is using this lane: not disjoint.
5688 return SDValue();
5689 }
5690
5691 SmallVector<SDValue> SelectMaskVals;
5692 for (int Lane : Srcs) {
5693 if (Lane == -1)
5694 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5695 else
5696 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5697 }
5698 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5699 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5700 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5701 SVN->getOperand(0), SVN->getOperand(1));
5702
5703 // Move all indices relative to the first source.
5704 SmallVector<int> NewMask(Mask.size());
5705 for (unsigned I = 0; I < Mask.size(); I++) {
5706 if (Mask[I] == -1)
5707 NewMask[I] = -1;
5708 else
5709 NewMask[I] = Mask[I] % Mask.size();
5710 }
5711
5712 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5713}
5714
5715/// Is this mask local (i.e. elements only move within their local span), and
5716/// repeating (that is, the same rearrangement is being done within each span)?
5717static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5718 // Require a prefix from the original mask until the consumer code
5719 // is adjusted to rewrite the mask instead of just taking a prefix.
5720 for (auto [I, M] : enumerate(Mask)) {
5721 if (M == -1)
5722 continue;
5723 if ((M / Span) != (int)(I / Span))
5724 return false;
5725 int SpanIdx = I % Span;
5726 int Expected = M % Span;
5727 if (Mask[SpanIdx] != Expected)
5728 return false;
5729 }
5730 return true;
5731}
5732
5733/// Is this mask only using elements from the first span of the input?
5734static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5735 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5736}
5737
5738/// Return true for a mask which performs an arbitrary shuffle within the first
5739/// span, and then repeats that same result across all remaining spans. Note
5740/// that this doesn't check if all the inputs come from a single span!
5741static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5742 // Require a prefix from the original mask until the consumer code
5743 // is adjusted to rewrite the mask instead of just taking a prefix.
5744 for (auto [I, M] : enumerate(Mask)) {
5745 if (M == -1)
5746 continue;
5747 int SpanIdx = I % Span;
5748 if (Mask[SpanIdx] != M)
5749 return false;
5750 }
5751 return true;
5752}
5753
5754/// Try to widen element type to get a new mask value for a better permutation
5755/// sequence. This doesn't try to inspect the widened mask for profitability;
5756/// we speculate the widened form is equal or better. This has the effect of
5757/// reducing mask constant sizes - allowing cheaper materialization sequences
5758/// - and index sequence sizes - reducing register pressure and materialization
5759/// cost, at the cost of (possibly) an extra VTYPE toggle.
5761 SDLoc DL(Op);
5762 MVT VT = Op.getSimpleValueType();
5763 MVT ScalarVT = VT.getVectorElementType();
5764 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5765 SDValue V0 = Op.getOperand(0);
5766 SDValue V1 = Op.getOperand(1);
5767 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5768
5769 // Avoid wasted work leading to isTypeLegal check failing below
5770 if (ElementSize > 32)
5771 return SDValue();
5772
5773 SmallVector<int, 8> NewMask;
5774 if (!widenShuffleMaskElts(Mask, NewMask))
5775 return SDValue();
5776
5777 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5778 : MVT::getIntegerVT(ElementSize * 2);
5779 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5780 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5781 return SDValue();
5782 V0 = DAG.getBitcast(NewVT, V0);
5783 V1 = DAG.getBitcast(NewVT, V1);
5784 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5785}
5786
5788 const RISCVSubtarget &Subtarget) {
5789 SDValue V1 = Op.getOperand(0);
5790 SDValue V2 = Op.getOperand(1);
5791 SDLoc DL(Op);
5792 MVT XLenVT = Subtarget.getXLenVT();
5793 MVT VT = Op.getSimpleValueType();
5794 unsigned NumElts = VT.getVectorNumElements();
5796
5797 if (VT.getVectorElementType() == MVT::i1) {
5798 // Lower to a vror.vi of a larger element type if possible before we promote
5799 // i1s to i8s.
5800 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5801 return V;
5802 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5803 return V;
5804
5805 // Promote i1 shuffle to i8 shuffle.
5806 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5807 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5808 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5809 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5810 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5811 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5812 ISD::SETNE);
5813 }
5814
5815 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5816
5817 // Store the return value in a single variable instead of structured bindings
5818 // so that we can pass it to GetSlide below, which cannot capture structured
5819 // bindings until C++20.
5820 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5821 auto [TrueMask, VL] = TrueMaskVL;
5822
5823 if (SVN->isSplat()) {
5824 const int Lane = SVN->getSplatIndex();
5825 if (Lane >= 0) {
5826 MVT SVT = VT.getVectorElementType();
5827
5828 // Turn splatted vector load into a strided load with an X0 stride.
5829 SDValue V = V1;
5830 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5831 // with undef.
5832 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5833 int Offset = Lane;
5834 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5835 int OpElements =
5836 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5837 V = V.getOperand(Offset / OpElements);
5838 Offset %= OpElements;
5839 }
5840
5841 // We need to ensure the load isn't atomic or volatile.
5842 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5843 auto *Ld = cast<LoadSDNode>(V);
5844 Offset *= SVT.getStoreSize();
5845 SDValue NewAddr = DAG.getMemBasePlusOffset(
5846 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5847
5848 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5849 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5850 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5851 SDValue IntID =
5852 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5853 SDValue Ops[] = {Ld->getChain(),
5854 IntID,
5855 DAG.getUNDEF(ContainerVT),
5856 NewAddr,
5857 DAG.getRegister(RISCV::X0, XLenVT),
5858 VL};
5859 SDValue NewLoad = DAG.getMemIntrinsicNode(
5860 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5862 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5863 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5864 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5865 }
5866
5867 MVT SplatVT = ContainerVT;
5868
5869 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5870 if (SVT == MVT::bf16 ||
5871 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5872 SVT = MVT::i16;
5873 SplatVT = ContainerVT.changeVectorElementType(SVT);
5874 }
5875
5876 // Otherwise use a scalar load and splat. This will give the best
5877 // opportunity to fold a splat into the operation. ISel can turn it into
5878 // the x0 strided load if we aren't able to fold away the select.
5879 if (SVT.isFloatingPoint())
5880 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5881 Ld->getPointerInfo().getWithOffset(Offset),
5882 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5883 else
5884 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5885 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5886 Ld->getBaseAlign(),
5887 Ld->getMemOperand()->getFlags());
5889
5890 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5891 : RISCVISD::VMV_V_X_VL;
5892 SDValue Splat =
5893 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5894 Splat = DAG.getBitcast(ContainerVT, Splat);
5895 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5896 }
5897
5898 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5899 assert(Lane < (int)NumElts && "Unexpected lane!");
5900 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5901 V1, DAG.getConstant(Lane, DL, XLenVT),
5902 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5903 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5904 }
5905 }
5906
5907 // For exact VLEN m2 or greater, try to split to m1 operations if we
5908 // can split cleanly.
5909 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5910 return V;
5911
5912 ArrayRef<int> Mask = SVN->getMask();
5913
5914 if (SDValue V =
5915 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5916 return V;
5917
5918 if (SDValue V =
5919 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5920 return V;
5921
5922 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5923 // available.
5924 if (Subtarget.hasStdExtZvkb())
5925 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5926 return V;
5927
5928 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5929 NumElts != 2)
5930 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5931
5932 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5933 // use shift and truncate to perform the shuffle.
5934 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5935 // shift-and-trunc reducing total cost for everything except an mf8 result.
5936 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5937 // to do the entire operation.
5938 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5939 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5940 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5941 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5942 unsigned Index = 0;
5943 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5944 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5945 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5946 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5947 if (1 < count_if(Mask,
5948 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5949 1 < count_if(Mask, [&Mask](int Idx) {
5950 return Idx >= (int)Mask.size();
5951 })) {
5952 // Narrow each source and concatenate them.
5953 // FIXME: For small LMUL it is better to concatenate first.
5954 MVT EltVT = VT.getVectorElementType();
5955 auto EltCnt = VT.getVectorElementCount();
5956 MVT SubVT =
5957 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5958
5959 SDValue Lo =
5960 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5961 SDValue Hi =
5962 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5963
5964 SDValue Concat =
5967 if (Factor == 2)
5968 return Concat;
5969
5970 SDValue Vec = DAG.getUNDEF(VT);
5971 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5972 }
5973 }
5974 }
5975 }
5976
5977 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5978 // e64 which can't match above.
5979 unsigned Index = 0;
5980 if (Subtarget.hasVendorXRivosVizip() &&
5982 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5983 unsigned Opc =
5984 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5985 if (V2.isUndef())
5986 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5987 if (auto VLEN = Subtarget.getRealVLen();
5988 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5989 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5990 if (SDValue Src = foldConcatVector(V1, V2)) {
5991 EVT NewVT = VT.getDoubleNumVectorElementsVT();
5992 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
5993 SDValue Res =
5994 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
5995 return DAG.getExtractSubvector(DL, VT, Res, 0);
5996 }
5997 // Deinterleave each source and concatenate them, or concat first, then
5998 // deinterleave.
5999 if (1 < count_if(Mask,
6000 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6001 1 < count_if(Mask,
6002 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6003
6004 const unsigned EltSize = VT.getScalarSizeInBits();
6005 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6006 if (NumElts < MinVLMAX) {
6007 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6008 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6009 SDValue Res =
6010 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6011 return DAG.getExtractSubvector(DL, VT, Res, 0);
6012 }
6013
6014 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6015 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6016
6017 MVT SubVT = VT.getHalfNumVectorElementsVT();
6018 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6019 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6020 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6021 }
6022 }
6023
6024 if (SDValue V =
6025 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6026 return V;
6027
6028 // Detect an interleave shuffle and lower to
6029 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6030 int EvenSrc, OddSrc;
6031 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6032 !(NumElts == 2 &&
6033 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6034 // Extract the halves of the vectors.
6035 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6036
6037 // Recognize if one half is actually undef; the matching above will
6038 // otherwise reuse the even stream for the undef one. This improves
6039 // spread(2) shuffles.
6040 bool LaneIsUndef[2] = { true, true};
6041 for (const auto &[Idx, M] : enumerate(Mask))
6042 LaneIsUndef[Idx % 2] &= (M == -1);
6043
6044 int Size = Mask.size();
6045 SDValue EvenV, OddV;
6046 if (LaneIsUndef[0]) {
6047 EvenV = DAG.getUNDEF(HalfVT);
6048 } else {
6049 assert(EvenSrc >= 0 && "Undef source?");
6050 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6051 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6052 }
6053
6054 if (LaneIsUndef[1]) {
6055 OddV = DAG.getUNDEF(HalfVT);
6056 } else {
6057 assert(OddSrc >= 0 && "Undef source?");
6058 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6059 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6060 }
6061
6062 // Prefer vzip2a if available.
6063 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6064 if (Subtarget.hasVendorXRivosVizip()) {
6065 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6066 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6067 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6068 }
6069 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6070 }
6071
6072 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6073 // instructions (in any combination) with masking on the second instruction.
6074 // Also handles masked slides into an identity source, and single slides
6075 // without masking. Avoid matching bit rotates (which are not also element
6076 // rotates) as slide pairs. This is a performance heuristic, not a
6077 // functional check.
6078 std::array<std::pair<int, int>, 2> SrcInfo;
6079 unsigned RotateAmt;
6080 MVT RotateVT;
6081 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6082 (isElementRotate(SrcInfo, NumElts) ||
6083 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6084 SDValue Sources[2];
6085 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6086 int SrcIdx = Info.first;
6087 assert(SrcIdx == 0 || SrcIdx == 1);
6088 SDValue &Src = Sources[SrcIdx];
6089 if (!Src) {
6090 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6091 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6092 }
6093 return Src;
6094 };
6095 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6096 SDValue Passthru) {
6097 auto [TrueMask, VL] = TrueMaskVL;
6098 SDValue SrcV = GetSourceFor(Src);
6099 int SlideAmt = Src.second;
6100 if (SlideAmt == 0) {
6101 // Should never be second operation
6102 assert(Mask == TrueMask);
6103 return SrcV;
6104 }
6105 if (SlideAmt < 0)
6106 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6107 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6109 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6110 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6112 };
6113
6114 if (SrcInfo[1].first == -1) {
6115 SDValue Res = DAG.getUNDEF(ContainerVT);
6116 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6117 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6118 }
6119
6120 if (Subtarget.hasVendorXRivosVizip()) {
6121 bool TryWiden = false;
6122 unsigned Factor;
6123 if (isZipEven(SrcInfo, Mask, Factor)) {
6124 if (Factor == 1) {
6125 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6126 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6127 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6128 Subtarget);
6129 }
6130 TryWiden = true;
6131 }
6132 if (isZipOdd(SrcInfo, Mask, Factor)) {
6133 if (Factor == 1) {
6134 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6135 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6136 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6137 Subtarget);
6138 }
6139 TryWiden = true;
6140 }
6141 // If we found a widening oppurtunity which would let us form a
6142 // zipeven or zipodd, use the generic code to widen the shuffle
6143 // and recurse through this logic.
6144 if (TryWiden)
6145 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6146 return V;
6147 }
6148
6149 // Build the mask. Note that vslideup unconditionally preserves elements
6150 // below the slide amount in the destination, and thus those elements are
6151 // undefined in the mask. If the mask ends up all true (or undef), it
6152 // will be folded away by general logic.
6153 SmallVector<SDValue> MaskVals;
6154 for (const auto &[Idx, M] : enumerate(Mask)) {
6155 if (M < 0 ||
6156 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6157 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6158 continue;
6159 }
6160 int Src = M >= (int)NumElts;
6161 int Diff = (int)Idx - (M % NumElts);
6162 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6163 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6164 "Must match exactly one of the two slides");
6165 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6166 }
6167 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6168 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6169 SDValue SelectMask = convertToScalableVector(
6170 ContainerVT.changeVectorElementType(MVT::i1),
6171 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6172
6173 SDValue Res = DAG.getUNDEF(ContainerVT);
6174 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6175 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6176 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6177 }
6178
6179 // Handle any remaining single source shuffles
6180 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6181 if (V2.isUndef()) {
6182 // We might be able to express the shuffle as a bitrotate. But even if we
6183 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6184 // shifts and a vor will have a higher throughput than a vrgather.
6185 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6186 return V;
6187
6188 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6189 return V;
6190
6191 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6192 // is fully covered in interleave(2) above, so it is ignored here.
6193 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6194 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6195 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6196 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6197 unsigned Index;
6198 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6199 MVT NarrowVT =
6200 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6201 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6202 return getWideningSpread(Src, Factor, Index, DL, DAG);
6203 }
6204 }
6205 }
6206
6207 // If only a prefix of the source elements influence a prefix of the
6208 // destination elements, try to see if we can reduce the required LMUL
6209 unsigned MinVLen = Subtarget.getRealMinVLen();
6210 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6211 if (NumElts > MinVLMAX) {
6212 unsigned MaxIdx = 0;
6213 for (auto [I, M] : enumerate(Mask)) {
6214 if (M == -1)
6215 continue;
6216 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6217 }
6218 unsigned NewNumElts =
6219 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6220 if (NewNumElts != NumElts) {
6221 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6222 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6223 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6224 Mask.take_front(NewNumElts));
6225 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6226 }
6227 }
6228
6229 // Before hitting generic lowering fallbacks, try to widen the mask
6230 // to a wider SEW.
6231 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6232 return V;
6233
6234 // Can we generate a vcompress instead of a vrgather? These scale better
6235 // at high LMUL, at the cost of not being able to fold a following select
6236 // into them. The mask constants are also smaller than the index vector
6237 // constants, and thus easier to materialize.
6238 if (isCompressMask(Mask)) {
6239 SmallVector<SDValue> MaskVals(NumElts,
6240 DAG.getConstant(false, DL, XLenVT));
6241 for (auto Idx : Mask) {
6242 if (Idx == -1)
6243 break;
6244 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6245 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6246 }
6247 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6248 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6249 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6250 DAG.getUNDEF(VT));
6251 }
6252
6253 if (VT.getScalarSizeInBits() == 8 &&
6254 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6255 // On such a vector we're unable to use i8 as the index type.
6256 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6257 // may involve vector splitting if we're already at LMUL=8, or our
6258 // user-supplied maximum fixed-length LMUL.
6259 return SDValue();
6260 }
6261
6262 // Base case for the two operand recursion below - handle the worst case
6263 // single source shuffle.
6264 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6265 MVT IndexVT = VT.changeTypeToInteger();
6266 // Since we can't introduce illegal index types at this stage, use i16 and
6267 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6268 // than XLenVT.
6269 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6270 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6271 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6272 }
6273
6274 // If the mask allows, we can do all the index computation in 16 bits. This
6275 // requires less work and less register pressure at high LMUL, and creates
6276 // smaller constants which may be cheaper to materialize.
6277 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6278 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6279 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6280 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6281 }
6282
6283 MVT IndexContainerVT =
6284 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6285
6286 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6287 SmallVector<SDValue> GatherIndicesLHS;
6288 for (int MaskIndex : Mask) {
6289 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6290 GatherIndicesLHS.push_back(IsLHSIndex
6291 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6292 : DAG.getUNDEF(XLenVT));
6293 }
6294 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6295 LHSIndices =
6296 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6297 // At m1 and less, there's no point trying any of the high LMUL splitting
6298 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6299 if (NumElts <= MinVLMAX) {
6300 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6301 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6302 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6303 }
6304
6305 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6306 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6307 auto [InnerTrueMask, InnerVL] =
6308 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6309 int N =
6310 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6311 assert(isPowerOf2_32(N) && N <= 8);
6312
6313 // If we have a locally repeating mask, then we can reuse the first
6314 // register in the index register group for all registers within the
6315 // source register group. TODO: This generalizes to m2, and m4.
6316 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6317 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6318 SDValue Gather = DAG.getUNDEF(ContainerVT);
6319 for (int i = 0; i < N; i++) {
6320 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6321 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6322 SDValue SubVec =
6323 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6324 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6325 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6326 }
6327 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6328 }
6329
6330 // If we have a shuffle which only uses the first register in our source
6331 // register group, and repeats the same index across all spans, we can
6332 // use a single vrgather (and possibly some register moves).
6333 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6334 // which we can do a linear number of shuffles to form an m1 which
6335 // contains all the output elements.
6336 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6337 isSpanSplatShuffle(Mask, MinVLMAX)) {
6338 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6339 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6340 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6341 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6342 SDValue Gather = DAG.getUNDEF(ContainerVT);
6343 for (int i = 0; i < N; i++)
6344 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6345 M1VT.getVectorMinNumElements() * i);
6346 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6347 }
6348
6349 // If we have a shuffle which only uses the first register in our
6350 // source register group, we can do a linear number of m1 vrgathers
6351 // reusing the same source register (but with different indices)
6352 // TODO: This can be generalized for m2 or m4, or for any shuffle
6353 // for which we can do a vslidedown followed by this expansion.
6354 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6355 SDValue SlideAmt =
6356 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6357 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6358 SDValue Gather = DAG.getUNDEF(ContainerVT);
6359 for (int i = 0; i < N; i++) {
6360 if (i != 0)
6361 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6362 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6363 SlideAmt, TrueMask, VL);
6364 SDValue SubIndex =
6365 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6366 SDValue SubVec =
6367 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6368 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6369 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6370 M1VT.getVectorMinNumElements() * i);
6371 }
6372 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6373 }
6374
6375 // Fallback to generic vrgather if we can't find anything better.
6376 // On many machines, this will be O(LMUL^2)
6377 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6378 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6379 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6380 }
6381
6382 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6383 // merged with a second vrgather.
6384 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6385
6386 // Now construct the mask that will be used by the blended vrgather operation.
6387 // Construct the appropriate indices into each vector.
6388 for (int MaskIndex : Mask) {
6389 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6390 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6391 ? MaskIndex : -1);
6392 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6393 }
6394
6395 // If the mask indices are disjoint between the two sources, we can lower it
6396 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6397 // operands may end up being lowered to something cheaper than a vrgather.vv.
6398 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6399 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6400 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6401 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6402 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6403 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6404 return V;
6405
6406 // Before hitting generic lowering fallbacks, try to widen the mask
6407 // to a wider SEW.
6408 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6409 return V;
6410
6411 // Try to pick a profitable operand order.
6412 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6413 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6414
6415 // Recursively invoke lowering for each operand if we had two
6416 // independent single source shuffles, and then combine the result via a
6417 // vselect. Note that the vselect will likely be folded back into the
6418 // second permute (vrgather, or other) by the post-isel combine.
6419 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6420 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6421
6422 SmallVector<SDValue> MaskVals;
6423 for (int MaskIndex : Mask) {
6424 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6425 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6426 }
6427
6428 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6429 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6430 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6431
6432 if (SwapOps)
6433 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6434 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6435}
6436
6438 // Only support legal VTs for other shuffles for now.
6439 if (!isTypeLegal(VT))
6440 return false;
6441
6442 // Support splats for any type. These should type legalize well.
6444 return true;
6445
6446 const unsigned NumElts = M.size();
6447 MVT SVT = VT.getSimpleVT();
6448
6449 // Not for i1 vectors.
6450 if (SVT.getScalarType() == MVT::i1)
6451 return false;
6452
6453 std::array<std::pair<int, int>, 2> SrcInfo;
6454 int Dummy1, Dummy2;
6455 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6456 (::isMaskedSlidePair(M, SrcInfo) &&
6457 isElementRotate(SrcInfo, NumElts)) ||
6458 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6459}
6460
6461// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6462// the exponent.
6463SDValue
6464RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6465 SelectionDAG &DAG) const {
6466 MVT VT = Op.getSimpleValueType();
6467 unsigned EltSize = VT.getScalarSizeInBits();
6468 SDValue Src = Op.getOperand(0);
6469 SDLoc DL(Op);
6470 MVT ContainerVT = VT;
6471
6472 SDValue Mask, VL;
6473 if (Op->isVPOpcode()) {
6474 Mask = Op.getOperand(1);
6475 if (VT.isFixedLengthVector())
6476 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6477 Subtarget);
6478 VL = Op.getOperand(2);
6479 }
6480
6481 // We choose FP type that can represent the value if possible. Otherwise, we
6482 // use rounding to zero conversion for correct exponent of the result.
6483 // TODO: Use f16 for i8 when possible?
6484 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6485 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6486 FloatEltVT = MVT::f32;
6487 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6488
6489 // Legal types should have been checked in the RISCVTargetLowering
6490 // constructor.
6491 // TODO: Splitting may make sense in some cases.
6492 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6493 "Expected legal float type!");
6494
6495 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6496 // The trailing zero count is equal to log2 of this single bit value.
6497 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6498 SDValue Neg = DAG.getNegative(Src, DL, VT);
6499 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6500 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6501 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6502 Src, Mask, VL);
6503 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6504 }
6505
6506 // We have a legal FP type, convert to it.
6507 SDValue FloatVal;
6508 if (FloatVT.bitsGT(VT)) {
6509 if (Op->isVPOpcode())
6510 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6511 else
6512 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6513 } else {
6514 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6515 if (VT.isFixedLengthVector()) {
6516 ContainerVT = getContainerForFixedLengthVector(VT);
6517 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6518 }
6519 if (!Op->isVPOpcode())
6520 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6521 SDValue RTZRM =
6522 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6523 MVT ContainerFloatVT =
6524 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6525 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6526 Src, Mask, RTZRM, VL);
6527 if (VT.isFixedLengthVector())
6528 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6529 }
6530 // Bitcast to integer and shift the exponent to the LSB.
6531 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6532 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6533 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6534
6535 SDValue Exp;
6536 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6537 if (Op->isVPOpcode()) {
6538 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6539 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6540 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6541 } else {
6542 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6543 DAG.getConstant(ShiftAmt, DL, IntVT));
6544 if (IntVT.bitsLT(VT))
6545 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6546 else if (IntVT.bitsGT(VT))
6547 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6548 }
6549
6550 // The exponent contains log2 of the value in biased form.
6551 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6552 // For trailing zeros, we just need to subtract the bias.
6553 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6554 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6555 DAG.getConstant(ExponentBias, DL, VT));
6556 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6557 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6558 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6559
6560 // For leading zeros, we need to remove the bias and convert from log2 to
6561 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6562 unsigned Adjust = ExponentBias + (EltSize - 1);
6563 SDValue Res;
6564 if (Op->isVPOpcode())
6565 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6566 Mask, VL);
6567 else
6568 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6569
6570 // The above result with zero input equals to Adjust which is greater than
6571 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6572 if (Op.getOpcode() == ISD::CTLZ)
6573 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6574 else if (Op.getOpcode() == ISD::VP_CTLZ)
6575 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6576 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6577 return Res;
6578}
6579
6580SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6581 SelectionDAG &DAG) const {
6582 SDLoc DL(Op);
6583 MVT XLenVT = Subtarget.getXLenVT();
6584 SDValue Source = Op->getOperand(0);
6585 MVT SrcVT = Source.getSimpleValueType();
6586 SDValue Mask = Op->getOperand(1);
6587 SDValue EVL = Op->getOperand(2);
6588
6589 if (SrcVT.isFixedLengthVector()) {
6590 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6591 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6592 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6593 Subtarget);
6594 SrcVT = ContainerVT;
6595 }
6596
6597 // Convert to boolean vector.
6598 if (SrcVT.getScalarType() != MVT::i1) {
6599 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6600 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6601 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6602 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6603 DAG.getUNDEF(SrcVT), Mask, EVL});
6604 }
6605
6606 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6607 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6608 // In this case, we can interpret poison as -1, so nothing to do further.
6609 return Res;
6610
6611 // Convert -1 to VL.
6612 SDValue SetCC =
6613 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6614 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6615 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6616}
6617
6618// While RVV has alignment restrictions, we should always be able to load as a
6619// legal equivalently-sized byte-typed vector instead. This method is
6620// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6621// the load is already correctly-aligned, it returns SDValue().
6622SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6623 SelectionDAG &DAG) const {
6624 auto *Load = cast<LoadSDNode>(Op);
6625 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6626
6628 Load->getMemoryVT(),
6629 *Load->getMemOperand()))
6630 return SDValue();
6631
6632 SDLoc DL(Op);
6633 MVT VT = Op.getSimpleValueType();
6634 unsigned EltSizeBits = VT.getScalarSizeInBits();
6635 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6636 "Unexpected unaligned RVV load type");
6637 MVT NewVT =
6638 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6639 assert(NewVT.isValid() &&
6640 "Expecting equally-sized RVV vector types to be legal");
6641 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6642 Load->getPointerInfo(), Load->getBaseAlign(),
6643 Load->getMemOperand()->getFlags());
6644 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6645}
6646
6647// While RVV has alignment restrictions, we should always be able to store as a
6648// legal equivalently-sized byte-typed vector instead. This method is
6649// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6650// returns SDValue() if the store is already correctly aligned.
6651SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6652 SelectionDAG &DAG) const {
6653 auto *Store = cast<StoreSDNode>(Op);
6654 assert(Store && Store->getValue().getValueType().isVector() &&
6655 "Expected vector store");
6656
6658 Store->getMemoryVT(),
6659 *Store->getMemOperand()))
6660 return SDValue();
6661
6662 SDLoc DL(Op);
6663 SDValue StoredVal = Store->getValue();
6664 MVT VT = StoredVal.getSimpleValueType();
6665 unsigned EltSizeBits = VT.getScalarSizeInBits();
6666 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6667 "Unexpected unaligned RVV store type");
6668 MVT NewVT =
6669 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6670 assert(NewVT.isValid() &&
6671 "Expecting equally-sized RVV vector types to be legal");
6672 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6673 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6674 Store->getPointerInfo(), Store->getBaseAlign(),
6675 Store->getMemOperand()->getFlags());
6676}
6677
6679 const RISCVSubtarget &Subtarget) {
6680 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6681
6682 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6683
6684 // All simm32 constants should be handled by isel.
6685 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6686 // this check redundant, but small immediates are common so this check
6687 // should have better compile time.
6688 if (isInt<32>(Imm))
6689 return Op;
6690
6691 // We only need to cost the immediate, if constant pool lowering is enabled.
6692 if (!Subtarget.useConstantPoolForLargeInts())
6693 return Op;
6694
6696 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6697 return Op;
6698
6699 // Optimizations below are disabled for opt size. If we're optimizing for
6700 // size, use a constant pool.
6701 if (DAG.shouldOptForSize())
6702 return SDValue();
6703
6704 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6705 // that if it will avoid a constant pool.
6706 // It will require an extra temporary register though.
6707 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6708 // low and high 32 bits are the same and bit 31 and 63 are set.
6709 unsigned ShiftAmt, AddOpc;
6710 RISCVMatInt::InstSeq SeqLo =
6711 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6712 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6713 return Op;
6714
6715 return SDValue();
6716}
6717
6718SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6719 SelectionDAG &DAG) const {
6720 MVT VT = Op.getSimpleValueType();
6721 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6722
6723 // Can this constant be selected by a Zfa FLI instruction?
6724 bool Negate = false;
6725 int Index = getLegalZfaFPImm(Imm, VT);
6726
6727 // If the constant is negative, try negating.
6728 if (Index < 0 && Imm.isNegative()) {
6729 Index = getLegalZfaFPImm(-Imm, VT);
6730 Negate = true;
6731 }
6732
6733 // If we couldn't find a FLI lowering, fall back to generic code.
6734 if (Index < 0)
6735 return SDValue();
6736
6737 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6738 SDLoc DL(Op);
6739 SDValue Const =
6740 DAG.getNode(RISCVISD::FLI, DL, VT,
6741 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6742 if (!Negate)
6743 return Const;
6744
6745 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6746}
6747
6749 SelectionDAG &DAG) {
6750
6751 unsigned IsData = Op.getConstantOperandVal(4);
6752
6753 // mips-p8700 we support data prefetch for now.
6754 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6755 return Op.getOperand(0);
6756 return Op;
6757}
6758
6760 const RISCVSubtarget &Subtarget) {
6761 SDLoc dl(Op);
6762 AtomicOrdering FenceOrdering =
6763 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6764 SyncScope::ID FenceSSID =
6765 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6766
6767 if (Subtarget.hasStdExtZtso()) {
6768 // The only fence that needs an instruction is a sequentially-consistent
6769 // cross-thread fence.
6770 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6771 FenceSSID == SyncScope::System)
6772 return Op;
6773
6774 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6775 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6776 }
6777
6778 // singlethread fences only synchronize with signal handlers on the same
6779 // thread and thus only need to preserve instruction order, not actually
6780 // enforce memory ordering.
6781 if (FenceSSID == SyncScope::SingleThread)
6782 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6783 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6784
6785 return Op;
6786}
6787
6788SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6789 SelectionDAG &DAG) const {
6790 SDLoc DL(Op);
6791 MVT VT = Op.getSimpleValueType();
6792 MVT XLenVT = Subtarget.getXLenVT();
6793 unsigned Check = Op.getConstantOperandVal(1);
6794 unsigned TDCMask = 0;
6795 if (Check & fcSNan)
6796 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6797 if (Check & fcQNan)
6798 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6799 if (Check & fcPosInf)
6801 if (Check & fcNegInf)
6803 if (Check & fcPosNormal)
6805 if (Check & fcNegNormal)
6807 if (Check & fcPosSubnormal)
6809 if (Check & fcNegSubnormal)
6811 if (Check & fcPosZero)
6812 TDCMask |= RISCV::FPMASK_Positive_Zero;
6813 if (Check & fcNegZero)
6814 TDCMask |= RISCV::FPMASK_Negative_Zero;
6815
6816 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6817
6818 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6819
6820 if (VT.isVector()) {
6821 SDValue Op0 = Op.getOperand(0);
6822 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6823
6824 if (VT.isScalableVector()) {
6825 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6826 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6827 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6828 Mask = Op.getOperand(2);
6829 VL = Op.getOperand(3);
6830 }
6831 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6832 VL, Op->getFlags());
6833 if (IsOneBitMask)
6834 return DAG.getSetCC(DL, VT, FPCLASS,
6835 DAG.getConstant(TDCMask, DL, DstVT),
6837 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6838 DAG.getConstant(TDCMask, DL, DstVT));
6839 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6840 ISD::SETNE);
6841 }
6842
6843 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6844 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6845 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6846 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6847 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6848 Mask = Op.getOperand(2);
6849 MVT MaskContainerVT =
6850 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6851 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6852 VL = Op.getOperand(3);
6853 }
6854 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6855
6856 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6857 Mask, VL, Op->getFlags());
6858
6859 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6860 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6861 if (IsOneBitMask) {
6862 SDValue VMSEQ =
6863 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6864 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6865 DAG.getUNDEF(ContainerVT), Mask, VL});
6866 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6867 }
6868 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6869 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6870
6871 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6872 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6873 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6874
6875 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6876 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6877 DAG.getUNDEF(ContainerVT), Mask, VL});
6878 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6879 }
6880
6881 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6882 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6883 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6885 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6886}
6887
6888// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6889// operations propagate nans.
6891 const RISCVSubtarget &Subtarget) {
6892 SDLoc DL(Op);
6893 MVT VT = Op.getSimpleValueType();
6894
6895 SDValue X = Op.getOperand(0);
6896 SDValue Y = Op.getOperand(1);
6897
6898 if (!VT.isVector()) {
6899 MVT XLenVT = Subtarget.getXLenVT();
6900
6901 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6902 // ensures that when one input is a nan, the other will also be a nan
6903 // allowing the nan to propagate. If both inputs are nan, this will swap the
6904 // inputs which is harmless.
6905
6906 SDValue NewY = Y;
6907 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6908 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6909 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6910 }
6911
6912 SDValue NewX = X;
6913 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6914 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6915 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6916 }
6917
6918 unsigned Opc =
6919 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6920 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6921 }
6922
6923 // Check no NaNs before converting to fixed vector scalable.
6924 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6925 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6926
6927 MVT ContainerVT = VT;
6928 if (VT.isFixedLengthVector()) {
6929 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6930 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6931 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6932 }
6933
6934 SDValue Mask, VL;
6935 if (Op->isVPOpcode()) {
6936 Mask = Op.getOperand(2);
6937 if (VT.isFixedLengthVector())
6938 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6939 Subtarget);
6940 VL = Op.getOperand(3);
6941 } else {
6942 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6943 }
6944
6945 SDValue NewY = Y;
6946 if (!XIsNeverNan) {
6947 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6948 {X, X, DAG.getCondCode(ISD::SETOEQ),
6949 DAG.getUNDEF(ContainerVT), Mask, VL});
6950 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6951 DAG.getUNDEF(ContainerVT), VL);
6952 }
6953
6954 SDValue NewX = X;
6955 if (!YIsNeverNan) {
6956 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6957 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6958 DAG.getUNDEF(ContainerVT), Mask, VL});
6959 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6960 DAG.getUNDEF(ContainerVT), VL);
6961 }
6962
6963 unsigned Opc =
6964 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6965 ? RISCVISD::VFMAX_VL
6966 : RISCVISD::VFMIN_VL;
6967 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6968 DAG.getUNDEF(ContainerVT), Mask, VL);
6969 if (VT.isFixedLengthVector())
6970 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6971 return Res;
6972}
6973
6975 const RISCVSubtarget &Subtarget) {
6976 bool IsFABS = Op.getOpcode() == ISD::FABS;
6977 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6978 "Wrong opcode for lowering FABS or FNEG.");
6979
6980 MVT XLenVT = Subtarget.getXLenVT();
6981 MVT VT = Op.getSimpleValueType();
6982 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6983
6984 SDLoc DL(Op);
6985 SDValue Fmv =
6986 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6987
6988 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6989 Mask = Mask.sext(Subtarget.getXLen());
6990
6991 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6992 SDValue Logic =
6993 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6994 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6995}
6996
6998 const RISCVSubtarget &Subtarget) {
6999 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7000
7001 MVT XLenVT = Subtarget.getXLenVT();
7002 MVT VT = Op.getSimpleValueType();
7003 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7004
7005 SDValue Mag = Op.getOperand(0);
7006 SDValue Sign = Op.getOperand(1);
7007
7008 SDLoc DL(Op);
7009
7010 // Get sign bit into an integer value.
7011 unsigned SignSize = Sign.getValueSizeInBits();
7012 SDValue SignAsInt = [&]() {
7013 if (SignSize == Subtarget.getXLen())
7014 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7015 switch (SignSize) {
7016 case 16:
7017 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7018 case 32:
7019 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7020 case 64: {
7021 assert(XLenVT == MVT::i32 && "Unexpected type");
7022 // Copy the upper word to integer.
7023 SignSize = 32;
7024 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7025 .getValue(1);
7026 }
7027 default:
7028 llvm_unreachable("Unexpected sign size");
7029 }
7030 }();
7031
7032 // Get the signbit at the right position for MagAsInt.
7033 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7034 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7035 SignAsInt,
7036 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7037
7038 // Mask the sign bit and any bits above it. The extra bits will be dropped
7039 // when we convert back to FP.
7040 SDValue SignMask = DAG.getConstant(
7041 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7042 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7043
7044 // Transform Mag value to integer, and clear the sign bit.
7045 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7046 SDValue ClearSignMask = DAG.getConstant(
7047 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7048 SDValue ClearedSign =
7049 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7050
7051 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7053
7054 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7055}
7056
7057/// Get a RISC-V target specified VL op for a given SDNode.
7058static unsigned getRISCVVLOp(SDValue Op) {
7059#define OP_CASE(NODE) \
7060 case ISD::NODE: \
7061 return RISCVISD::NODE##_VL;
7062#define VP_CASE(NODE) \
7063 case ISD::VP_##NODE: \
7064 return RISCVISD::NODE##_VL;
7065 // clang-format off
7066 switch (Op.getOpcode()) {
7067 default:
7068 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7069 OP_CASE(ADD)
7070 OP_CASE(SUB)
7071 OP_CASE(MUL)
7072 OP_CASE(MULHS)
7073 OP_CASE(MULHU)
7074 OP_CASE(SDIV)
7075 OP_CASE(SREM)
7076 OP_CASE(UDIV)
7077 OP_CASE(UREM)
7078 OP_CASE(SHL)
7079 OP_CASE(SRA)
7080 OP_CASE(SRL)
7081 OP_CASE(ROTL)
7082 OP_CASE(ROTR)
7083 OP_CASE(BSWAP)
7084 OP_CASE(CTTZ)
7085 OP_CASE(CTLZ)
7086 OP_CASE(CTPOP)
7087 OP_CASE(BITREVERSE)
7088 OP_CASE(SADDSAT)
7089 OP_CASE(UADDSAT)
7090 OP_CASE(SSUBSAT)
7091 OP_CASE(USUBSAT)
7092 OP_CASE(AVGFLOORS)
7093 OP_CASE(AVGFLOORU)
7094 OP_CASE(AVGCEILS)
7095 OP_CASE(AVGCEILU)
7096 OP_CASE(FADD)
7097 OP_CASE(FSUB)
7098 OP_CASE(FMUL)
7099 OP_CASE(FDIV)
7100 OP_CASE(FNEG)
7101 OP_CASE(FABS)
7102 OP_CASE(FCOPYSIGN)
7103 OP_CASE(FSQRT)
7104 OP_CASE(SMIN)
7105 OP_CASE(SMAX)
7106 OP_CASE(UMIN)
7107 OP_CASE(UMAX)
7108 OP_CASE(STRICT_FADD)
7109 OP_CASE(STRICT_FSUB)
7110 OP_CASE(STRICT_FMUL)
7111 OP_CASE(STRICT_FDIV)
7112 OP_CASE(STRICT_FSQRT)
7113 VP_CASE(ADD) // VP_ADD
7114 VP_CASE(SUB) // VP_SUB
7115 VP_CASE(MUL) // VP_MUL
7116 VP_CASE(SDIV) // VP_SDIV
7117 VP_CASE(SREM) // VP_SREM
7118 VP_CASE(UDIV) // VP_UDIV
7119 VP_CASE(UREM) // VP_UREM
7120 VP_CASE(SHL) // VP_SHL
7121 VP_CASE(FADD) // VP_FADD
7122 VP_CASE(FSUB) // VP_FSUB
7123 VP_CASE(FMUL) // VP_FMUL
7124 VP_CASE(FDIV) // VP_FDIV
7125 VP_CASE(FNEG) // VP_FNEG
7126 VP_CASE(FABS) // VP_FABS
7127 VP_CASE(SMIN) // VP_SMIN
7128 VP_CASE(SMAX) // VP_SMAX
7129 VP_CASE(UMIN) // VP_UMIN
7130 VP_CASE(UMAX) // VP_UMAX
7131 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7132 VP_CASE(SETCC) // VP_SETCC
7133 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7134 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7135 VP_CASE(BITREVERSE) // VP_BITREVERSE
7136 VP_CASE(SADDSAT) // VP_SADDSAT
7137 VP_CASE(UADDSAT) // VP_UADDSAT
7138 VP_CASE(SSUBSAT) // VP_SSUBSAT
7139 VP_CASE(USUBSAT) // VP_USUBSAT
7140 VP_CASE(BSWAP) // VP_BSWAP
7141 VP_CASE(CTLZ) // VP_CTLZ
7142 VP_CASE(CTTZ) // VP_CTTZ
7143 VP_CASE(CTPOP) // VP_CTPOP
7145 case ISD::VP_CTLZ_ZERO_UNDEF:
7146 return RISCVISD::CTLZ_VL;
7148 case ISD::VP_CTTZ_ZERO_UNDEF:
7149 return RISCVISD::CTTZ_VL;
7150 case ISD::FMA:
7151 case ISD::VP_FMA:
7152 return RISCVISD::VFMADD_VL;
7153 case ISD::STRICT_FMA:
7154 return RISCVISD::STRICT_VFMADD_VL;
7155 case ISD::AND:
7156 case ISD::VP_AND:
7157 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7158 return RISCVISD::VMAND_VL;
7159 return RISCVISD::AND_VL;
7160 case ISD::OR:
7161 case ISD::VP_OR:
7162 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7163 return RISCVISD::VMOR_VL;
7164 return RISCVISD::OR_VL;
7165 case ISD::XOR:
7166 case ISD::VP_XOR:
7167 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7168 return RISCVISD::VMXOR_VL;
7169 return RISCVISD::XOR_VL;
7170 case ISD::ANY_EXTEND:
7171 case ISD::ZERO_EXTEND:
7172 return RISCVISD::VZEXT_VL;
7173 case ISD::SIGN_EXTEND:
7174 return RISCVISD::VSEXT_VL;
7175 case ISD::SETCC:
7176 return RISCVISD::SETCC_VL;
7177 case ISD::VSELECT:
7178 return RISCVISD::VMERGE_VL;
7179 case ISD::VP_SELECT:
7180 case ISD::VP_MERGE:
7181 return RISCVISD::VMERGE_VL;
7182 case ISD::VP_SRA:
7183 return RISCVISD::SRA_VL;
7184 case ISD::VP_SRL:
7185 return RISCVISD::SRL_VL;
7186 case ISD::VP_SQRT:
7187 return RISCVISD::FSQRT_VL;
7188 case ISD::VP_SIGN_EXTEND:
7189 return RISCVISD::VSEXT_VL;
7190 case ISD::VP_ZERO_EXTEND:
7191 return RISCVISD::VZEXT_VL;
7192 case ISD::VP_FP_TO_SINT:
7193 return RISCVISD::VFCVT_RTZ_X_F_VL;
7194 case ISD::VP_FP_TO_UINT:
7195 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7196 case ISD::FMINNUM:
7197 case ISD::FMINIMUMNUM:
7198 case ISD::VP_FMINNUM:
7199 return RISCVISD::VFMIN_VL;
7200 case ISD::FMAXNUM:
7201 case ISD::FMAXIMUMNUM:
7202 case ISD::VP_FMAXNUM:
7203 return RISCVISD::VFMAX_VL;
7204 case ISD::LRINT:
7205 case ISD::VP_LRINT:
7206 case ISD::LLRINT:
7207 case ISD::VP_LLRINT:
7208 return RISCVISD::VFCVT_RM_X_F_VL;
7209 }
7210 // clang-format on
7211#undef OP_CASE
7212#undef VP_CASE
7213}
7214
7216 const RISCVSubtarget &Subtarget) {
7217 return (Op.getValueType() == MVT::nxv32f16 &&
7218 (Subtarget.hasVInstructionsF16Minimal() &&
7219 !Subtarget.hasVInstructionsF16())) ||
7220 Op.getValueType() == MVT::nxv32bf16;
7221}
7222
7224 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7225 SDLoc DL(Op);
7226
7227 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7228 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7229
7230 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7231 if (!Op.getOperand(j).getValueType().isVector()) {
7232 LoOperands[j] = Op.getOperand(j);
7233 HiOperands[j] = Op.getOperand(j);
7234 continue;
7235 }
7236 std::tie(LoOperands[j], HiOperands[j]) =
7237 DAG.SplitVector(Op.getOperand(j), DL);
7238 }
7239
7240 SDValue LoRes =
7241 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7242 SDValue HiRes =
7243 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7244
7245 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7246}
7247
7249 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7250 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7251 SDLoc DL(Op);
7252
7253 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7254 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7255
7256 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7257 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7258 std::tie(LoOperands[j], HiOperands[j]) =
7259 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7260 continue;
7261 }
7262 if (!Op.getOperand(j).getValueType().isVector()) {
7263 LoOperands[j] = Op.getOperand(j);
7264 HiOperands[j] = Op.getOperand(j);
7265 continue;
7266 }
7267 std::tie(LoOperands[j], HiOperands[j]) =
7268 DAG.SplitVector(Op.getOperand(j), DL);
7269 }
7270
7271 SDValue LoRes =
7272 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7273 SDValue HiRes =
7274 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7275
7276 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7277}
7278
7280 SDLoc DL(Op);
7281
7282 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7283 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7284 auto [EVLLo, EVLHi] =
7285 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7286
7287 SDValue ResLo =
7288 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7289 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7290 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7291 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7292}
7293
7295
7296 assert(Op->isStrictFPOpcode());
7297
7298 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7299
7300 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7301 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7302
7303 SDLoc DL(Op);
7304
7305 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7306 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7307
7308 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7309 if (!Op.getOperand(j).getValueType().isVector()) {
7310 LoOperands[j] = Op.getOperand(j);
7311 HiOperands[j] = Op.getOperand(j);
7312 continue;
7313 }
7314 std::tie(LoOperands[j], HiOperands[j]) =
7315 DAG.SplitVector(Op.getOperand(j), DL);
7316 }
7317
7318 SDValue LoRes =
7319 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7320 HiOperands[0] = LoRes.getValue(1);
7321 SDValue HiRes =
7322 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7323
7324 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7325 LoRes.getValue(0), HiRes.getValue(0));
7326 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7327}
7328
7329SDValue
7330RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7331 SelectionDAG &DAG) const {
7332 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7333 "Unexpected bfloat16 load lowering");
7334
7335 SDLoc DL(Op);
7336 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7337 EVT MemVT = LD->getMemoryVT();
7338 SDValue Load = DAG.getExtLoad(
7339 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7340 LD->getBasePtr(),
7342 LD->getMemOperand());
7343 // Using mask to make bf16 nan-boxing valid when we don't have flh
7344 // instruction. -65536 would be treat as a small number and thus it can be
7345 // directly used lui to get the constant.
7346 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7347 SDValue OrSixteenOne =
7348 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7349 SDValue ConvertedResult =
7350 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7351 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7352}
7353
7354SDValue
7355RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7356 SelectionDAG &DAG) const {
7357 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7358 "Unexpected bfloat16 store lowering");
7359
7360 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7361 SDLoc DL(Op);
7362 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7363 Subtarget.getXLenVT(), ST->getValue());
7364 return DAG.getTruncStore(
7365 ST->getChain(), DL, FMV, ST->getBasePtr(),
7366 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7367 ST->getMemOperand());
7368}
7369
7371 SelectionDAG &DAG) const {
7372 switch (Op.getOpcode()) {
7373 default:
7375 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7376 case ISD::PREFETCH:
7377 return LowerPREFETCH(Op, Subtarget, DAG);
7378 case ISD::ATOMIC_FENCE:
7379 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7380 case ISD::GlobalAddress:
7381 return lowerGlobalAddress(Op, DAG);
7382 case ISD::BlockAddress:
7383 return lowerBlockAddress(Op, DAG);
7384 case ISD::ConstantPool:
7385 return lowerConstantPool(Op, DAG);
7386 case ISD::JumpTable:
7387 return lowerJumpTable(Op, DAG);
7389 return lowerGlobalTLSAddress(Op, DAG);
7390 case ISD::Constant:
7391 return lowerConstant(Op, DAG, Subtarget);
7392 case ISD::ConstantFP:
7393 return lowerConstantFP(Op, DAG);
7394 case ISD::SELECT:
7395 return lowerSELECT(Op, DAG);
7396 case ISD::BRCOND:
7397 return lowerBRCOND(Op, DAG);
7398 case ISD::VASTART:
7399 return lowerVASTART(Op, DAG);
7400 case ISD::FRAMEADDR:
7401 return lowerFRAMEADDR(Op, DAG);
7402 case ISD::RETURNADDR:
7403 return lowerRETURNADDR(Op, DAG);
7404 case ISD::SHL_PARTS:
7405 return lowerShiftLeftParts(Op, DAG);
7406 case ISD::SRA_PARTS:
7407 return lowerShiftRightParts(Op, DAG, true);
7408 case ISD::SRL_PARTS:
7409 return lowerShiftRightParts(Op, DAG, false);
7410 case ISD::ROTL:
7411 case ISD::ROTR:
7412 if (Op.getValueType().isFixedLengthVector()) {
7413 assert(Subtarget.hasStdExtZvkb());
7414 return lowerToScalableOp(Op, DAG);
7415 }
7416 assert(Subtarget.hasVendorXTHeadBb() &&
7417 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7418 "Unexpected custom legalization");
7419 // XTHeadBb only supports rotate by constant.
7420 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7421 return SDValue();
7422 return Op;
7423 case ISD::BITCAST: {
7424 SDLoc DL(Op);
7425 EVT VT = Op.getValueType();
7426 SDValue Op0 = Op.getOperand(0);
7427 EVT Op0VT = Op0.getValueType();
7428 MVT XLenVT = Subtarget.getXLenVT();
7429 if (Op0VT == MVT::i16 &&
7430 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7431 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7432 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7433 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7434 }
7435 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7436 Subtarget.hasStdExtFOrZfinx()) {
7437 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7438 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7439 }
7440 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7441 Subtarget.hasStdExtDOrZdinx()) {
7442 SDValue Lo, Hi;
7443 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7444 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7445 }
7446
7447 // Consider other scalar<->scalar casts as legal if the types are legal.
7448 // Otherwise expand them.
7449 if (!VT.isVector() && !Op0VT.isVector()) {
7450 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7451 return Op;
7452 return SDValue();
7453 }
7454
7455 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7456 "Unexpected types");
7457
7458 if (VT.isFixedLengthVector()) {
7459 // We can handle fixed length vector bitcasts with a simple replacement
7460 // in isel.
7461 if (Op0VT.isFixedLengthVector())
7462 return Op;
7463 // When bitcasting from scalar to fixed-length vector, insert the scalar
7464 // into a one-element vector of the result type, and perform a vector
7465 // bitcast.
7466 if (!Op0VT.isVector()) {
7467 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7468 if (!isTypeLegal(BVT))
7469 return SDValue();
7470 return DAG.getBitcast(
7471 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7472 }
7473 return SDValue();
7474 }
7475 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7476 // thus: bitcast the vector to a one-element vector type whose element type
7477 // is the same as the result type, and extract the first element.
7478 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7479 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7480 if (!isTypeLegal(BVT))
7481 return SDValue();
7482 SDValue BVec = DAG.getBitcast(BVT, Op0);
7483 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7484 }
7485 return SDValue();
7486 }
7488 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7490 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7492 return LowerINTRINSIC_VOID(Op, DAG);
7493 case ISD::IS_FPCLASS:
7494 return LowerIS_FPCLASS(Op, DAG);
7495 case ISD::BITREVERSE: {
7496 MVT VT = Op.getSimpleValueType();
7497 if (VT.isFixedLengthVector()) {
7498 assert(Subtarget.hasStdExtZvbb());
7499 return lowerToScalableOp(Op, DAG);
7500 }
7501 SDLoc DL(Op);
7502 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7503 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7504 // Expand bitreverse to a bswap(rev8) followed by brev8.
7505 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7506 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7507 }
7508 case ISD::TRUNCATE:
7511 // Only custom-lower vector truncates
7512 if (!Op.getSimpleValueType().isVector())
7513 return Op;
7514 return lowerVectorTruncLike(Op, DAG);
7515 case ISD::ANY_EXTEND:
7516 case ISD::ZERO_EXTEND:
7517 if (Op.getOperand(0).getValueType().isVector() &&
7518 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7519 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7520 if (Op.getValueType().isScalableVector())
7521 return Op;
7522 return lowerToScalableOp(Op, DAG);
7523 case ISD::SIGN_EXTEND:
7524 if (Op.getOperand(0).getValueType().isVector() &&
7525 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7526 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7527 if (Op.getValueType().isScalableVector())
7528 return Op;
7529 return lowerToScalableOp(Op, DAG);
7531 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7533 return lowerINSERT_VECTOR_ELT(Op, DAG);
7535 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7536 case ISD::SCALAR_TO_VECTOR: {
7537 MVT VT = Op.getSimpleValueType();
7538 SDLoc DL(Op);
7539 SDValue Scalar = Op.getOperand(0);
7540 if (VT.getVectorElementType() == MVT::i1) {
7541 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7542 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7543 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7544 }
7545 MVT ContainerVT = VT;
7546 if (VT.isFixedLengthVector())
7547 ContainerVT = getContainerForFixedLengthVector(VT);
7548 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7549
7550 SDValue V;
7551 if (VT.isFloatingPoint()) {
7552 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7553 DAG.getUNDEF(ContainerVT), Scalar, VL);
7554 } else {
7555 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7556 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7557 DAG.getUNDEF(ContainerVT), Scalar, VL);
7558 }
7559 if (VT.isFixedLengthVector())
7560 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7561 return V;
7562 }
7563 case ISD::VSCALE: {
7564 MVT XLenVT = Subtarget.getXLenVT();
7565 MVT VT = Op.getSimpleValueType();
7566 SDLoc DL(Op);
7567 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7568 // We define our scalable vector types for lmul=1 to use a 64 bit known
7569 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7570 // vscale as VLENB / 8.
7571 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7572 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7573 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7574 // We assume VLENB is a multiple of 8. We manually choose the best shift
7575 // here because SimplifyDemandedBits isn't always able to simplify it.
7576 uint64_t Val = Op.getConstantOperandVal(0);
7577 if (isPowerOf2_64(Val)) {
7578 uint64_t Log2 = Log2_64(Val);
7579 if (Log2 < 3) {
7580 SDNodeFlags Flags;
7581 Flags.setExact(true);
7582 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7583 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7584 } else if (Log2 > 3) {
7585 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7586 DAG.getConstant(Log2 - 3, DL, XLenVT));
7587 }
7588 } else if ((Val % 8) == 0) {
7589 // If the multiplier is a multiple of 8, scale it down to avoid needing
7590 // to shift the VLENB value.
7591 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7592 DAG.getConstant(Val / 8, DL, XLenVT));
7593 } else {
7594 SDNodeFlags Flags;
7595 Flags.setExact(true);
7596 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7597 DAG.getConstant(3, DL, XLenVT), Flags);
7598 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7599 DAG.getConstant(Val, DL, XLenVT));
7600 }
7601 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7602 }
7603 case ISD::FPOWI: {
7604 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7605 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7606 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7607 Op.getOperand(1).getValueType() == MVT::i32) {
7608 SDLoc DL(Op);
7609 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7610 SDValue Powi =
7611 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7612 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7613 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7614 }
7615 return SDValue();
7616 }
7617 case ISD::FMAXIMUM:
7618 case ISD::FMINIMUM:
7619 if (isPromotedOpNeedingSplit(Op, Subtarget))
7620 return SplitVectorOp(Op, DAG);
7621 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7622 case ISD::FP_EXTEND:
7623 case ISD::FP_ROUND:
7624 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7627 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7628 case ISD::SINT_TO_FP:
7629 case ISD::UINT_TO_FP:
7630 if (Op.getValueType().isVector() &&
7631 ((Op.getValueType().getScalarType() == MVT::f16 &&
7632 (Subtarget.hasVInstructionsF16Minimal() &&
7633 !Subtarget.hasVInstructionsF16())) ||
7634 Op.getValueType().getScalarType() == MVT::bf16)) {
7635 if (isPromotedOpNeedingSplit(Op, Subtarget))
7636 return SplitVectorOp(Op, DAG);
7637 // int -> f32
7638 SDLoc DL(Op);
7639 MVT NVT =
7640 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7641 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7642 // f32 -> [b]f16
7643 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7644 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7645 }
7646 [[fallthrough]];
7647 case ISD::FP_TO_SINT:
7648 case ISD::FP_TO_UINT:
7649 if (SDValue Op1 = Op.getOperand(0);
7650 Op1.getValueType().isVector() &&
7651 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7652 (Subtarget.hasVInstructionsF16Minimal() &&
7653 !Subtarget.hasVInstructionsF16())) ||
7654 Op1.getValueType().getScalarType() == MVT::bf16)) {
7655 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7656 return SplitVectorOp(Op, DAG);
7657 // [b]f16 -> f32
7658 SDLoc DL(Op);
7659 MVT NVT = MVT::getVectorVT(MVT::f32,
7660 Op1.getValueType().getVectorElementCount());
7661 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7662 // f32 -> int
7663 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7664 }
7665 [[fallthrough]];
7670 // RVV can only do fp<->int conversions to types half/double the size as
7671 // the source. We custom-lower any conversions that do two hops into
7672 // sequences.
7673 MVT VT = Op.getSimpleValueType();
7674 if (VT.isScalarInteger())
7675 return lowerFP_TO_INT(Op, DAG, Subtarget);
7676 bool IsStrict = Op->isStrictFPOpcode();
7677 SDValue Src = Op.getOperand(0 + IsStrict);
7678 MVT SrcVT = Src.getSimpleValueType();
7679 if (SrcVT.isScalarInteger())
7680 return lowerINT_TO_FP(Op, DAG, Subtarget);
7681 if (!VT.isVector())
7682 return Op;
7683 SDLoc DL(Op);
7684 MVT EltVT = VT.getVectorElementType();
7685 MVT SrcEltVT = SrcVT.getVectorElementType();
7686 unsigned EltSize = EltVT.getSizeInBits();
7687 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7688 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7689 "Unexpected vector element types");
7690
7691 bool IsInt2FP = SrcEltVT.isInteger();
7692 // Widening conversions
7693 if (EltSize > (2 * SrcEltSize)) {
7694 if (IsInt2FP) {
7695 // Do a regular integer sign/zero extension then convert to float.
7696 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7698 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7699 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7702 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7703 if (IsStrict)
7704 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7705 Op.getOperand(0), Ext);
7706 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7707 }
7708 // FP2Int
7709 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7710 // Do one doubling fp_extend then complete the operation by converting
7711 // to int.
7712 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7713 if (IsStrict) {
7714 auto [FExt, Chain] =
7715 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7716 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7717 }
7718 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7719 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7720 }
7721
7722 // Narrowing conversions
7723 if (SrcEltSize > (2 * EltSize)) {
7724 if (IsInt2FP) {
7725 // One narrowing int_to_fp, then an fp_round.
7726 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7727 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7728 if (IsStrict) {
7729 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7730 DAG.getVTList(InterimFVT, MVT::Other),
7731 Op.getOperand(0), Src);
7732 SDValue Chain = Int2FP.getValue(1);
7733 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7734 }
7735 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7736 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7737 }
7738 // FP2Int
7739 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7740 // representable by the integer, the result is poison.
7741 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7743 if (IsStrict) {
7744 SDValue FP2Int =
7745 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7746 Op.getOperand(0), Src);
7747 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7748 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7749 }
7750 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7751 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7752 }
7753
7754 // Scalable vectors can exit here. Patterns will handle equally-sized
7755 // conversions halving/doubling ones.
7756 if (!VT.isFixedLengthVector())
7757 return Op;
7758
7759 // For fixed-length vectors we lower to a custom "VL" node.
7760 unsigned RVVOpc = 0;
7761 switch (Op.getOpcode()) {
7762 default:
7763 llvm_unreachable("Impossible opcode");
7764 case ISD::FP_TO_SINT:
7765 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7766 break;
7767 case ISD::FP_TO_UINT:
7768 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7769 break;
7770 case ISD::SINT_TO_FP:
7771 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7772 break;
7773 case ISD::UINT_TO_FP:
7774 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7775 break;
7777 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7778 break;
7780 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7781 break;
7783 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7784 break;
7786 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7787 break;
7788 }
7789
7790 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7791 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7792 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7793 "Expected same element count");
7794
7795 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7796
7797 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7798 if (IsStrict) {
7799 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7800 Op.getOperand(0), Src, Mask, VL);
7801 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7802 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7803 }
7804 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7805 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7806 }
7809 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7810 case ISD::FP_TO_BF16: {
7811 // Custom lower to ensure the libcall return is passed in an FPR on hard
7812 // float ABIs.
7813 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7814 SDLoc DL(Op);
7815 MakeLibCallOptions CallOptions;
7816 RTLIB::Libcall LC =
7817 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7818 SDValue Res =
7819 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7820 if (Subtarget.is64Bit())
7821 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7822 return DAG.getBitcast(MVT::i32, Res);
7823 }
7824 case ISD::BF16_TO_FP: {
7825 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7826 MVT VT = Op.getSimpleValueType();
7827 SDLoc DL(Op);
7828 Op = DAG.getNode(
7829 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7830 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7831 SDValue Res = Subtarget.is64Bit()
7832 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7833 : DAG.getBitcast(MVT::f32, Op);
7834 // fp_extend if the target VT is bigger than f32.
7835 if (VT != MVT::f32)
7836 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7837 return Res;
7838 }
7839 case ISD::STRICT_FP_TO_FP16:
7840 case ISD::FP_TO_FP16: {
7841 // Custom lower to ensure the libcall return is passed in an FPR on hard
7842 // float ABIs.
7843 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7844 SDLoc DL(Op);
7845 MakeLibCallOptions CallOptions;
7846 bool IsStrict = Op->isStrictFPOpcode();
7847 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7848 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7849 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7850 SDValue Res;
7851 std::tie(Res, Chain) =
7852 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7853 if (Subtarget.is64Bit())
7854 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7855 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7856 if (IsStrict)
7857 return DAG.getMergeValues({Result, Chain}, DL);
7858 return Result;
7859 }
7860 case ISD::STRICT_FP16_TO_FP:
7861 case ISD::FP16_TO_FP: {
7862 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7863 // float ABIs.
7864 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7865 SDLoc DL(Op);
7866 MakeLibCallOptions CallOptions;
7867 bool IsStrict = Op->isStrictFPOpcode();
7868 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7869 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7870 SDValue Arg = Subtarget.is64Bit()
7871 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7872 : DAG.getBitcast(MVT::f32, Op0);
7873 SDValue Res;
7874 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7875 CallOptions, DL, Chain);
7876 if (IsStrict)
7877 return DAG.getMergeValues({Res, Chain}, DL);
7878 return Res;
7879 }
7880 case ISD::FTRUNC:
7881 case ISD::FCEIL:
7882 case ISD::FFLOOR:
7883 case ISD::FNEARBYINT:
7884 case ISD::FRINT:
7885 case ISD::FROUND:
7886 case ISD::FROUNDEVEN:
7887 if (isPromotedOpNeedingSplit(Op, Subtarget))
7888 return SplitVectorOp(Op, DAG);
7889 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7890 case ISD::LRINT:
7891 case ISD::LLRINT:
7892 case ISD::LROUND:
7893 case ISD::LLROUND: {
7894 if (Op.getValueType().isVector())
7895 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7896 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7897 "Unexpected custom legalisation");
7898 SDLoc DL(Op);
7899 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7900 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7901 }
7902 case ISD::STRICT_LRINT:
7903 case ISD::STRICT_LLRINT:
7904 case ISD::STRICT_LROUND:
7905 case ISD::STRICT_LLROUND: {
7906 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7907 "Unexpected custom legalisation");
7908 SDLoc DL(Op);
7909 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7910 {Op.getOperand(0), Op.getOperand(1)});
7911 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7912 {Ext.getValue(1), Ext.getValue(0)});
7913 }
7914 case ISD::VECREDUCE_ADD:
7915 case ISD::VECREDUCE_UMAX:
7916 case ISD::VECREDUCE_SMAX:
7917 case ISD::VECREDUCE_UMIN:
7918 case ISD::VECREDUCE_SMIN:
7919 return lowerVECREDUCE(Op, DAG);
7920 case ISD::VECREDUCE_AND:
7921 case ISD::VECREDUCE_OR:
7922 case ISD::VECREDUCE_XOR:
7923 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7924 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7925 return lowerVECREDUCE(Op, DAG);
7926 case ISD::VECREDUCE_FADD:
7927 case ISD::VECREDUCE_SEQ_FADD:
7928 case ISD::VECREDUCE_FMIN:
7929 case ISD::VECREDUCE_FMAX:
7930 case ISD::VECREDUCE_FMAXIMUM:
7931 case ISD::VECREDUCE_FMINIMUM:
7932 return lowerFPVECREDUCE(Op, DAG);
7933 case ISD::VP_REDUCE_ADD:
7934 case ISD::VP_REDUCE_UMAX:
7935 case ISD::VP_REDUCE_SMAX:
7936 case ISD::VP_REDUCE_UMIN:
7937 case ISD::VP_REDUCE_SMIN:
7938 case ISD::VP_REDUCE_FADD:
7939 case ISD::VP_REDUCE_SEQ_FADD:
7940 case ISD::VP_REDUCE_FMIN:
7941 case ISD::VP_REDUCE_FMAX:
7942 case ISD::VP_REDUCE_FMINIMUM:
7943 case ISD::VP_REDUCE_FMAXIMUM:
7944 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7945 return SplitVectorReductionOp(Op, DAG);
7946 return lowerVPREDUCE(Op, DAG);
7947 case ISD::VP_REDUCE_AND:
7948 case ISD::VP_REDUCE_OR:
7949 case ISD::VP_REDUCE_XOR:
7950 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7951 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7952 return lowerVPREDUCE(Op, DAG);
7953 case ISD::VP_CTTZ_ELTS:
7954 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7955 return lowerVPCttzElements(Op, DAG);
7956 case ISD::UNDEF: {
7957 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7958 return convertFromScalableVector(Op.getSimpleValueType(),
7959 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7960 }
7962 return lowerINSERT_SUBVECTOR(Op, DAG);
7964 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7966 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7968 return lowerVECTOR_INTERLEAVE(Op, DAG);
7969 case ISD::STEP_VECTOR:
7970 return lowerSTEP_VECTOR(Op, DAG);
7972 return lowerVECTOR_REVERSE(Op, DAG);
7973 case ISD::VECTOR_SPLICE:
7974 return lowerVECTOR_SPLICE(Op, DAG);
7975 case ISD::BUILD_VECTOR: {
7976 MVT VT = Op.getSimpleValueType();
7977 MVT EltVT = VT.getVectorElementType();
7978 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7979 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7980 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7981 }
7982 case ISD::SPLAT_VECTOR: {
7983 MVT VT = Op.getSimpleValueType();
7984 MVT EltVT = VT.getVectorElementType();
7985 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7986 EltVT == MVT::bf16) {
7987 SDLoc DL(Op);
7988 SDValue Elt;
7989 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7990 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7991 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7992 Op.getOperand(0));
7993 else
7994 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7995 MVT IVT = VT.changeVectorElementType(MVT::i16);
7996 return DAG.getNode(ISD::BITCAST, DL, VT,
7997 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7998 }
7999
8000 if (EltVT == MVT::i1)
8001 return lowerVectorMaskSplat(Op, DAG);
8002 return SDValue();
8003 }
8005 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8006 case ISD::CONCAT_VECTORS: {
8007 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8008 // better than going through the stack, as the default expansion does.
8009 SDLoc DL(Op);
8010 MVT VT = Op.getSimpleValueType();
8011 MVT ContainerVT = VT;
8012 if (VT.isFixedLengthVector())
8013 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8014
8015 // Recursively split concat_vectors with more than 2 operands:
8016 //
8017 // concat_vector op1, op2, op3, op4
8018 // ->
8019 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8020 //
8021 // This reduces the length of the chain of vslideups and allows us to
8022 // perform the vslideups at a smaller LMUL, limited to MF2.
8023 if (Op.getNumOperands() > 2 &&
8024 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8025 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8026 assert(isPowerOf2_32(Op.getNumOperands()));
8027 size_t HalfNumOps = Op.getNumOperands() / 2;
8028 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8029 Op->ops().take_front(HalfNumOps));
8030 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8031 Op->ops().drop_front(HalfNumOps));
8032 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8033 }
8034
8035 unsigned NumOpElts =
8036 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8037 SDValue Vec = DAG.getUNDEF(VT);
8038 for (const auto &OpIdx : enumerate(Op->ops())) {
8039 SDValue SubVec = OpIdx.value();
8040 // Don't insert undef subvectors.
8041 if (SubVec.isUndef())
8042 continue;
8043 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8044 }
8045 return Vec;
8046 }
8047 case ISD::LOAD: {
8048 auto *Load = cast<LoadSDNode>(Op);
8049 EVT VT = Load->getValueType(0);
8050 if (VT == MVT::f64) {
8051 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8052 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8053
8054 // Replace a double precision load with two i32 loads and a BuildPairF64.
8055 SDLoc DL(Op);
8056 SDValue BasePtr = Load->getBasePtr();
8057 SDValue Chain = Load->getChain();
8058
8059 SDValue Lo =
8060 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8061 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8062 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8063 SDValue Hi = DAG.getLoad(
8064 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8065 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8066 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8067 Hi.getValue(1));
8068
8069 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8070 return DAG.getMergeValues({Pair, Chain}, DL);
8071 }
8072
8073 if (VT == MVT::bf16)
8074 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8075
8076 // Handle normal vector tuple load.
8077 if (VT.isRISCVVectorTuple()) {
8078 SDLoc DL(Op);
8079 MVT XLenVT = Subtarget.getXLenVT();
8080 unsigned NF = VT.getRISCVVectorTupleNumFields();
8081 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8082 unsigned NumElts = Sz / (NF * 8);
8083 int Log2LMUL = Log2_64(NumElts) - 3;
8084
8085 auto Flag = SDNodeFlags();
8086 Flag.setNoUnsignedWrap(true);
8087 SDValue Ret = DAG.getUNDEF(VT);
8088 SDValue BasePtr = Load->getBasePtr();
8089 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8090 VROffset =
8091 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8092 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8093 SmallVector<SDValue, 8> OutChains;
8094
8095 // Load NF vector registers and combine them to a vector tuple.
8096 for (unsigned i = 0; i < NF; ++i) {
8097 SDValue LoadVal = DAG.getLoad(
8098 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8099 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8100 OutChains.push_back(LoadVal.getValue(1));
8101 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8102 DAG.getTargetConstant(i, DL, MVT::i32));
8103 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8104 }
8105 return DAG.getMergeValues(
8106 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8107 }
8108
8109 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8110 return V;
8111 if (Op.getValueType().isFixedLengthVector())
8112 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8113 return Op;
8114 }
8115 case ISD::STORE: {
8116 auto *Store = cast<StoreSDNode>(Op);
8117 SDValue StoredVal = Store->getValue();
8118 EVT VT = StoredVal.getValueType();
8119 if (VT == MVT::f64) {
8120 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8121 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8122
8123 // Replace a double precision store with a SplitF64 and i32 stores.
8124 SDValue DL(Op);
8125 SDValue BasePtr = Store->getBasePtr();
8126 SDValue Chain = Store->getChain();
8127 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8128 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8129
8130 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8131 Store->getPointerInfo(), Store->getBaseAlign(),
8132 Store->getMemOperand()->getFlags());
8133 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8134 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8135 Store->getPointerInfo().getWithOffset(4),
8136 Store->getBaseAlign(),
8137 Store->getMemOperand()->getFlags());
8138 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8139 }
8140 if (VT == MVT::i64) {
8141 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8142 "Unexpected custom legalisation");
8143 if (Store->isTruncatingStore())
8144 return SDValue();
8145
8146 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8147 return SDValue();
8148
8149 SDLoc DL(Op);
8150 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8151 DAG.getTargetConstant(0, DL, MVT::i32));
8152 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8153 DAG.getTargetConstant(1, DL, MVT::i32));
8154
8155 return DAG.getMemIntrinsicNode(
8156 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8157 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8158 Store->getMemOperand());
8159 }
8160
8161 if (VT == MVT::bf16)
8162 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8163
8164 // Handle normal vector tuple store.
8165 if (VT.isRISCVVectorTuple()) {
8166 SDLoc DL(Op);
8167 MVT XLenVT = Subtarget.getXLenVT();
8168 unsigned NF = VT.getRISCVVectorTupleNumFields();
8169 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8170 unsigned NumElts = Sz / (NF * 8);
8171 int Log2LMUL = Log2_64(NumElts) - 3;
8172
8173 auto Flag = SDNodeFlags();
8174 Flag.setNoUnsignedWrap(true);
8175 SDValue Ret;
8176 SDValue Chain = Store->getChain();
8177 SDValue BasePtr = Store->getBasePtr();
8178 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8179 VROffset =
8180 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8181 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8182
8183 // Extract subregisters in a vector tuple and store them individually.
8184 for (unsigned i = 0; i < NF; ++i) {
8185 auto Extract =
8186 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8187 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8188 DAG.getTargetConstant(i, DL, MVT::i32));
8189 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8190 MachinePointerInfo(Store->getAddressSpace()),
8191 Store->getBaseAlign(),
8192 Store->getMemOperand()->getFlags());
8193 Chain = Ret.getValue(0);
8194 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8195 }
8196 return Ret;
8197 }
8198
8199 if (auto V = expandUnalignedRVVStore(Op, DAG))
8200 return V;
8201 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8202 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8203 return Op;
8204 }
8205 case ISD::MLOAD:
8206 case ISD::VP_LOAD:
8207 return lowerMaskedLoad(Op, DAG);
8208 case ISD::VP_LOAD_FF:
8209 return lowerLoadFF(Op, DAG);
8210 case ISD::MSTORE:
8211 case ISD::VP_STORE:
8212 return lowerMaskedStore(Op, DAG);
8214 return lowerVectorCompress(Op, DAG);
8215 case ISD::SELECT_CC: {
8216 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8217 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8218 // into separate SETCC+SELECT just like LegalizeDAG.
8219 SDValue Tmp1 = Op.getOperand(0);
8220 SDValue Tmp2 = Op.getOperand(1);
8221 SDValue True = Op.getOperand(2);
8222 SDValue False = Op.getOperand(3);
8223 EVT VT = Op.getValueType();
8224 SDValue CC = Op.getOperand(4);
8225 EVT CmpVT = Tmp1.getValueType();
8226 EVT CCVT =
8227 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8228 SDLoc DL(Op);
8229 SDValue Cond =
8230 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8231 return DAG.getSelect(DL, VT, Cond, True, False);
8232 }
8233 case ISD::SETCC: {
8234 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8235 if (OpVT.isScalarInteger()) {
8236 MVT VT = Op.getSimpleValueType();
8237 SDValue LHS = Op.getOperand(0);
8238 SDValue RHS = Op.getOperand(1);
8239 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8240 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8241 "Unexpected CondCode");
8242
8243 SDLoc DL(Op);
8244
8245 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8246 // convert this to the equivalent of (set(u)ge X, C+1) by using
8247 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8248 // in a register.
8249 if (isa<ConstantSDNode>(RHS)) {
8250 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8251 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8252 // If this is an unsigned compare and the constant is -1, incrementing
8253 // the constant would change behavior. The result should be false.
8254 if (CCVal == ISD::SETUGT && Imm == -1)
8255 return DAG.getConstant(0, DL, VT);
8256 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8257 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8258 SDValue SetCC = DAG.getSetCC(
8259 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8260 return DAG.getLogicalNOT(DL, SetCC, VT);
8261 }
8262 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8263 if (CCVal == ISD::SETUGT && Imm == 2047) {
8264 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8265 DAG.getShiftAmountConstant(11, OpVT, DL));
8266 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8267 ISD::SETNE);
8268 }
8269 }
8270
8271 // Not a constant we could handle, swap the operands and condition code to
8272 // SETLT/SETULT.
8273 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8274 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8275 }
8276
8277 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8278 return SplitVectorOp(Op, DAG);
8279
8280 return lowerToScalableOp(Op, DAG);
8281 }
8282 case ISD::ADD:
8283 case ISD::SUB:
8284 case ISD::MUL:
8285 case ISD::MULHS:
8286 case ISD::MULHU:
8287 case ISD::AND:
8288 case ISD::OR:
8289 case ISD::XOR:
8290 case ISD::SDIV:
8291 case ISD::SREM:
8292 case ISD::UDIV:
8293 case ISD::UREM:
8294 case ISD::BSWAP:
8295 case ISD::CTPOP:
8296 case ISD::VSELECT:
8297 return lowerToScalableOp(Op, DAG);
8298 case ISD::SHL:
8299 case ISD::SRA:
8300 case ISD::SRL:
8301 if (Op.getSimpleValueType().isFixedLengthVector())
8302 return lowerToScalableOp(Op, DAG);
8303 // This can be called for an i32 shift amount that needs to be promoted.
8304 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8305 "Unexpected custom legalisation");
8306 return SDValue();
8307 case ISD::FABS:
8308 case ISD::FNEG:
8309 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8310 return lowerFABSorFNEG(Op, DAG, Subtarget);
8311 [[fallthrough]];
8312 case ISD::FADD:
8313 case ISD::FSUB:
8314 case ISD::FMUL:
8315 case ISD::FDIV:
8316 case ISD::FSQRT:
8317 case ISD::FMA:
8318 case ISD::FMINNUM:
8319 case ISD::FMAXNUM:
8320 case ISD::FMINIMUMNUM:
8321 case ISD::FMAXIMUMNUM:
8322 if (isPromotedOpNeedingSplit(Op, Subtarget))
8323 return SplitVectorOp(Op, DAG);
8324 [[fallthrough]];
8325 case ISD::AVGFLOORS:
8326 case ISD::AVGFLOORU:
8327 case ISD::AVGCEILS:
8328 case ISD::AVGCEILU:
8329 case ISD::SMIN:
8330 case ISD::SMAX:
8331 case ISD::UMIN:
8332 case ISD::UMAX:
8333 case ISD::UADDSAT:
8334 case ISD::USUBSAT:
8335 case ISD::SADDSAT:
8336 case ISD::SSUBSAT:
8337 return lowerToScalableOp(Op, DAG);
8338 case ISD::ABDS:
8339 case ISD::ABDU: {
8340 SDLoc dl(Op);
8341 EVT VT = Op->getValueType(0);
8342 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8343 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8344 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8345
8346 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8347 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8348 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8349 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8350 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8351 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8352 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8353 }
8354 case ISD::ABS:
8355 case ISD::VP_ABS:
8356 return lowerABS(Op, DAG);
8357 case ISD::CTLZ:
8359 case ISD::CTTZ:
8361 if (Subtarget.hasStdExtZvbb())
8362 return lowerToScalableOp(Op, DAG);
8363 assert(Op.getOpcode() != ISD::CTTZ);
8364 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8365 case ISD::FCOPYSIGN:
8366 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8367 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8368 if (isPromotedOpNeedingSplit(Op, Subtarget))
8369 return SplitVectorOp(Op, DAG);
8370 return lowerToScalableOp(Op, DAG);
8371 case ISD::STRICT_FADD:
8372 case ISD::STRICT_FSUB:
8373 case ISD::STRICT_FMUL:
8374 case ISD::STRICT_FDIV:
8375 case ISD::STRICT_FSQRT:
8376 case ISD::STRICT_FMA:
8377 if (isPromotedOpNeedingSplit(Op, Subtarget))
8378 return SplitStrictFPVectorOp(Op, DAG);
8379 return lowerToScalableOp(Op, DAG);
8380 case ISD::STRICT_FSETCC:
8382 return lowerVectorStrictFSetcc(Op, DAG);
8383 case ISD::STRICT_FCEIL:
8384 case ISD::STRICT_FRINT:
8385 case ISD::STRICT_FFLOOR:
8386 case ISD::STRICT_FTRUNC:
8388 case ISD::STRICT_FROUND:
8390 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8391 case ISD::MGATHER:
8392 case ISD::VP_GATHER:
8393 return lowerMaskedGather(Op, DAG);
8394 case ISD::MSCATTER:
8395 case ISD::VP_SCATTER:
8396 return lowerMaskedScatter(Op, DAG);
8397 case ISD::GET_ROUNDING:
8398 return lowerGET_ROUNDING(Op, DAG);
8399 case ISD::SET_ROUNDING:
8400 return lowerSET_ROUNDING(Op, DAG);
8401 case ISD::GET_FPENV:
8402 return lowerGET_FPENV(Op, DAG);
8403 case ISD::SET_FPENV:
8404 return lowerSET_FPENV(Op, DAG);
8405 case ISD::RESET_FPENV:
8406 return lowerRESET_FPENV(Op, DAG);
8407 case ISD::GET_FPMODE:
8408 return lowerGET_FPMODE(Op, DAG);
8409 case ISD::SET_FPMODE:
8410 return lowerSET_FPMODE(Op, DAG);
8411 case ISD::RESET_FPMODE:
8412 return lowerRESET_FPMODE(Op, DAG);
8413 case ISD::EH_DWARF_CFA:
8414 return lowerEH_DWARF_CFA(Op, DAG);
8415 case ISD::VP_MERGE:
8416 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8417 return lowerVPMergeMask(Op, DAG);
8418 [[fallthrough]];
8419 case ISD::VP_SELECT:
8420 case ISD::VP_ADD:
8421 case ISD::VP_SUB:
8422 case ISD::VP_MUL:
8423 case ISD::VP_SDIV:
8424 case ISD::VP_UDIV:
8425 case ISD::VP_SREM:
8426 case ISD::VP_UREM:
8427 case ISD::VP_UADDSAT:
8428 case ISD::VP_USUBSAT:
8429 case ISD::VP_SADDSAT:
8430 case ISD::VP_SSUBSAT:
8431 case ISD::VP_LRINT:
8432 case ISD::VP_LLRINT:
8433 return lowerVPOp(Op, DAG);
8434 case ISD::VP_AND:
8435 case ISD::VP_OR:
8436 case ISD::VP_XOR:
8437 return lowerLogicVPOp(Op, DAG);
8438 case ISD::VP_FADD:
8439 case ISD::VP_FSUB:
8440 case ISD::VP_FMUL:
8441 case ISD::VP_FDIV:
8442 case ISD::VP_FNEG:
8443 case ISD::VP_FABS:
8444 case ISD::VP_SQRT:
8445 case ISD::VP_FMA:
8446 case ISD::VP_FMINNUM:
8447 case ISD::VP_FMAXNUM:
8448 case ISD::VP_FCOPYSIGN:
8449 if (isPromotedOpNeedingSplit(Op, Subtarget))
8450 return SplitVPOp(Op, DAG);
8451 [[fallthrough]];
8452 case ISD::VP_SRA:
8453 case ISD::VP_SRL:
8454 case ISD::VP_SHL:
8455 return lowerVPOp(Op, DAG);
8456 case ISD::VP_IS_FPCLASS:
8457 return LowerIS_FPCLASS(Op, DAG);
8458 case ISD::VP_SIGN_EXTEND:
8459 case ISD::VP_ZERO_EXTEND:
8460 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8461 return lowerVPExtMaskOp(Op, DAG);
8462 return lowerVPOp(Op, DAG);
8463 case ISD::VP_TRUNCATE:
8464 return lowerVectorTruncLike(Op, DAG);
8465 case ISD::VP_FP_EXTEND:
8466 case ISD::VP_FP_ROUND:
8467 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8468 case ISD::VP_SINT_TO_FP:
8469 case ISD::VP_UINT_TO_FP:
8470 if (Op.getValueType().isVector() &&
8471 ((Op.getValueType().getScalarType() == MVT::f16 &&
8472 (Subtarget.hasVInstructionsF16Minimal() &&
8473 !Subtarget.hasVInstructionsF16())) ||
8474 Op.getValueType().getScalarType() == MVT::bf16)) {
8475 if (isPromotedOpNeedingSplit(Op, Subtarget))
8476 return SplitVectorOp(Op, DAG);
8477 // int -> f32
8478 SDLoc DL(Op);
8479 MVT NVT =
8480 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8481 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8482 // f32 -> [b]f16
8483 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8484 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8485 }
8486 [[fallthrough]];
8487 case ISD::VP_FP_TO_SINT:
8488 case ISD::VP_FP_TO_UINT:
8489 if (SDValue Op1 = Op.getOperand(0);
8490 Op1.getValueType().isVector() &&
8491 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8492 (Subtarget.hasVInstructionsF16Minimal() &&
8493 !Subtarget.hasVInstructionsF16())) ||
8494 Op1.getValueType().getScalarType() == MVT::bf16)) {
8495 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8496 return SplitVectorOp(Op, DAG);
8497 // [b]f16 -> f32
8498 SDLoc DL(Op);
8499 MVT NVT = MVT::getVectorVT(MVT::f32,
8500 Op1.getValueType().getVectorElementCount());
8501 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8502 // f32 -> int
8503 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8504 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8505 }
8506 return lowerVPFPIntConvOp(Op, DAG);
8507 case ISD::VP_SETCC:
8508 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8509 return SplitVPOp(Op, DAG);
8510 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8511 return lowerVPSetCCMaskOp(Op, DAG);
8512 [[fallthrough]];
8513 case ISD::VP_SMIN:
8514 case ISD::VP_SMAX:
8515 case ISD::VP_UMIN:
8516 case ISD::VP_UMAX:
8517 case ISD::VP_BITREVERSE:
8518 case ISD::VP_BSWAP:
8519 return lowerVPOp(Op, DAG);
8520 case ISD::VP_CTLZ:
8521 case ISD::VP_CTLZ_ZERO_UNDEF:
8522 if (Subtarget.hasStdExtZvbb())
8523 return lowerVPOp(Op, DAG);
8524 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8525 case ISD::VP_CTTZ:
8526 case ISD::VP_CTTZ_ZERO_UNDEF:
8527 if (Subtarget.hasStdExtZvbb())
8528 return lowerVPOp(Op, DAG);
8529 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8530 case ISD::VP_CTPOP:
8531 return lowerVPOp(Op, DAG);
8532 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8533 return lowerVPStridedLoad(Op, DAG);
8534 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8535 return lowerVPStridedStore(Op, DAG);
8536 case ISD::VP_FCEIL:
8537 case ISD::VP_FFLOOR:
8538 case ISD::VP_FRINT:
8539 case ISD::VP_FNEARBYINT:
8540 case ISD::VP_FROUND:
8541 case ISD::VP_FROUNDEVEN:
8542 case ISD::VP_FROUNDTOZERO:
8543 if (isPromotedOpNeedingSplit(Op, Subtarget))
8544 return SplitVPOp(Op, DAG);
8545 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8546 case ISD::VP_FMAXIMUM:
8547 case ISD::VP_FMINIMUM:
8548 if (isPromotedOpNeedingSplit(Op, Subtarget))
8549 return SplitVPOp(Op, DAG);
8550 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8551 case ISD::EXPERIMENTAL_VP_SPLICE:
8552 return lowerVPSpliceExperimental(Op, DAG);
8553 case ISD::EXPERIMENTAL_VP_REVERSE:
8554 return lowerVPReverseExperimental(Op, DAG);
8555 case ISD::EXPERIMENTAL_VP_SPLAT:
8556 return lowerVPSplatExperimental(Op, DAG);
8557 case ISD::CLEAR_CACHE: {
8558 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8559 "llvm.clear_cache only needs custom lower on Linux targets");
8560 SDLoc DL(Op);
8561 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8562 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8563 Op.getOperand(2), Flags, DL);
8564 }
8565 case ISD::DYNAMIC_STACKALLOC:
8566 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8567 case ISD::INIT_TRAMPOLINE:
8568 return lowerINIT_TRAMPOLINE(Op, DAG);
8569 case ISD::ADJUST_TRAMPOLINE:
8570 return lowerADJUST_TRAMPOLINE(Op, DAG);
8571 case ISD::PARTIAL_REDUCE_UMLA:
8572 case ISD::PARTIAL_REDUCE_SMLA:
8573 case ISD::PARTIAL_REDUCE_SUMLA:
8574 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8575 }
8576}
8577
8578SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8579 SDValue Start, SDValue End,
8580 SDValue Flags, SDLoc DL) const {
8581 MakeLibCallOptions CallOptions;
8582 std::pair<SDValue, SDValue> CallResult =
8583 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8584 {Start, End, Flags}, CallOptions, DL, InChain);
8585
8586 // This function returns void so only the out chain matters.
8587 return CallResult.second;
8588}
8589
8590SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8591 SelectionDAG &DAG) const {
8592 if (!Subtarget.is64Bit())
8593 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8594
8595 // Create an MCCodeEmitter to encode instructions.
8596 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8597 assert(TLO);
8598 MCContext &MCCtx = TLO->getContext();
8599
8600 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8601 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8602
8603 SDValue Root = Op.getOperand(0);
8604 SDValue Trmp = Op.getOperand(1); // trampoline
8605 SDLoc dl(Op);
8606
8607 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8608
8609 // We store in the trampoline buffer the following instructions and data.
8610 // Offset:
8611 // 0: auipc t2, 0
8612 // 4: ld t0, 24(t2)
8613 // 8: ld t2, 16(t2)
8614 // 12: jalr t0
8615 // 16: <StaticChainOffset>
8616 // 24: <FunctionAddressOffset>
8617 // 32:
8618 // Offset with branch control flow protection enabled:
8619 // 0: lpad <imm20>
8620 // 4: auipc t3, 0
8621 // 8: ld t2, 28(t3)
8622 // 12: ld t3, 20(t3)
8623 // 16: jalr t2
8624 // 20: <StaticChainOffset>
8625 // 28: <FunctionAddressOffset>
8626 // 36:
8627
8628 const bool HasCFBranch =
8629 Subtarget.hasStdExtZicfilp() &&
8631 "cf-protection-branch");
8632 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8633 const unsigned StaticChainOffset = StaticChainIdx * 4;
8634 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8635
8636 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8637 assert(STI);
8638 auto GetEncoding = [&](const MCInst &MC) {
8641 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8642 uint32_t Encoding = support::endian::read32le(CB.data());
8643 return Encoding;
8644 };
8645
8646 SmallVector<SDValue> OutChains;
8647
8648 SmallVector<uint32_t> Encodings;
8649 if (!HasCFBranch) {
8650 Encodings.append(
8651 {// auipc t2, 0
8652 // Loads the current PC into t2.
8653 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8654 // ld t0, 24(t2)
8655 // Loads the function address into t0. Note that we are using offsets
8656 // pc-relative to the first instruction of the trampoline.
8657 GetEncoding(MCInstBuilder(RISCV::LD)
8658 .addReg(RISCV::X5)
8659 .addReg(RISCV::X7)
8660 .addImm(FunctionAddressOffset)),
8661 // ld t2, 16(t2)
8662 // Load the value of the static chain.
8663 GetEncoding(MCInstBuilder(RISCV::LD)
8664 .addReg(RISCV::X7)
8665 .addReg(RISCV::X7)
8666 .addImm(StaticChainOffset)),
8667 // jalr t0
8668 // Jump to the function.
8669 GetEncoding(MCInstBuilder(RISCV::JALR)
8670 .addReg(RISCV::X0)
8671 .addReg(RISCV::X5)
8672 .addImm(0))});
8673 } else {
8674 Encodings.append(
8675 {// auipc x0, <imm20> (lpad <imm20>)
8676 // Landing pad.
8677 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8678 // auipc t3, 0
8679 // Loads the current PC into t3.
8680 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8681 // ld t2, (FunctionAddressOffset - 4)(t3)
8682 // Loads the function address into t2. Note that we are using offsets
8683 // pc-relative to the SECOND instruction of the trampoline.
8684 GetEncoding(MCInstBuilder(RISCV::LD)
8685 .addReg(RISCV::X7)
8686 .addReg(RISCV::X28)
8687 .addImm(FunctionAddressOffset - 4)),
8688 // ld t3, (StaticChainOffset - 4)(t3)
8689 // Load the value of the static chain.
8690 GetEncoding(MCInstBuilder(RISCV::LD)
8691 .addReg(RISCV::X28)
8692 .addReg(RISCV::X28)
8693 .addImm(StaticChainOffset - 4)),
8694 // jalr t2
8695 // Software-guarded jump to the function.
8696 GetEncoding(MCInstBuilder(RISCV::JALR)
8697 .addReg(RISCV::X0)
8698 .addReg(RISCV::X7)
8699 .addImm(0))});
8700 }
8701
8702 // Store encoded instructions.
8703 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8704 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8705 DAG.getConstant(Idx * 4, dl, MVT::i64))
8706 : Trmp;
8707 OutChains.push_back(DAG.getTruncStore(
8708 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8709 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8710 }
8711
8712 // Now store the variable part of the trampoline.
8713 SDValue FunctionAddress = Op.getOperand(2);
8714 SDValue StaticChain = Op.getOperand(3);
8715
8716 // Store the given static chain and function pointer in the trampoline buffer.
8717 struct OffsetValuePair {
8718 const unsigned Offset;
8719 const SDValue Value;
8720 SDValue Addr = SDValue(); // Used to cache the address.
8721 } OffsetValues[] = {
8722 {StaticChainOffset, StaticChain},
8723 {FunctionAddressOffset, FunctionAddress},
8724 };
8725 for (auto &OffsetValue : OffsetValues) {
8726 SDValue Addr =
8727 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8728 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8729 OffsetValue.Addr = Addr;
8730 OutChains.push_back(
8731 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8732 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8733 }
8734
8735 assert(OutChains.size() == StaticChainIdx + 2 &&
8736 "Size of OutChains mismatch");
8737 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8738
8739 // The end of instructions of trampoline is the same as the static chain
8740 // address that we computed earlier.
8741 SDValue EndOfTrmp = OffsetValues[0].Addr;
8742
8743 // Call clear cache on the trampoline instructions.
8744 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8745 Trmp, EndOfTrmp);
8746
8747 return Chain;
8748}
8749
8750SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8751 SelectionDAG &DAG) const {
8752 if (!Subtarget.is64Bit())
8753 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8754
8755 return Op.getOperand(0);
8756}
8757
8758SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8759 SelectionDAG &DAG) const {
8760 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8761 // TODO: There are many other sub-cases we could potentially lower, are
8762 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8763 SDLoc DL(Op);
8764 MVT VT = Op.getSimpleValueType();
8765 SDValue Accum = Op.getOperand(0);
8766 assert(Accum.getSimpleValueType() == VT &&
8767 VT.getVectorElementType() == MVT::i32);
8768 SDValue A = Op.getOperand(1);
8769 SDValue B = Op.getOperand(2);
8770 MVT ArgVT = A.getSimpleValueType();
8771 assert(ArgVT == B.getSimpleValueType() &&
8772 ArgVT.getVectorElementType() == MVT::i8);
8773 (void)ArgVT;
8774
8775 // The zvqdotq pseudos are defined with sources and destination both
8776 // being i32. This cast is needed for correctness to avoid incorrect
8777 // .vx matching of i8 splats.
8778 A = DAG.getBitcast(VT, A);
8779 B = DAG.getBitcast(VT, B);
8780
8781 MVT ContainerVT = VT;
8782 if (VT.isFixedLengthVector()) {
8783 ContainerVT = getContainerForFixedLengthVector(VT);
8784 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8785 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8786 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8787 }
8788
8789 unsigned Opc;
8790 switch (Op.getOpcode()) {
8791 case ISD::PARTIAL_REDUCE_SMLA:
8792 Opc = RISCVISD::VQDOT_VL;
8793 break;
8794 case ISD::PARTIAL_REDUCE_UMLA:
8795 Opc = RISCVISD::VQDOTU_VL;
8796 break;
8797 case ISD::PARTIAL_REDUCE_SUMLA:
8798 Opc = RISCVISD::VQDOTSU_VL;
8799 break;
8800 default:
8801 llvm_unreachable("Unexpected opcode");
8802 }
8803 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8804 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8805 if (VT.isFixedLengthVector())
8806 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8807 return Res;
8808}
8809
8811 SelectionDAG &DAG, unsigned Flags) {
8812 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8813}
8814
8816 SelectionDAG &DAG, unsigned Flags) {
8817 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8818 Flags);
8819}
8820
8822 SelectionDAG &DAG, unsigned Flags) {
8823 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8824 N->getOffset(), Flags);
8825}
8826
8828 SelectionDAG &DAG, unsigned Flags) {
8829 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8830}
8831
8833 EVT Ty, SelectionDAG &DAG) {
8835 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8836 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8837 return DAG.getLoad(
8838 Ty, DL, DAG.getEntryNode(), LC,
8840}
8841
8843 EVT Ty, SelectionDAG &DAG) {
8845 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8846 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8847 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8848 return DAG.getLoad(
8849 Ty, DL, DAG.getEntryNode(), LC,
8851}
8852
8853template <class NodeTy>
8854SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8855 bool IsLocal, bool IsExternWeak) const {
8856 SDLoc DL(N);
8857 EVT Ty = getPointerTy(DAG.getDataLayout());
8858
8859 // When HWASAN is used and tagging of global variables is enabled
8860 // they should be accessed via the GOT, since the tagged address of a global
8861 // is incompatible with existing code models. This also applies to non-pic
8862 // mode.
8863 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8864 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8865 if (IsLocal && !Subtarget.allowTaggedGlobals())
8866 // Use PC-relative addressing to access the symbol. This generates the
8867 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8868 // %pcrel_lo(auipc)).
8869 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8870
8871 // Use PC-relative addressing to access the GOT for this symbol, then load
8872 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8873 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8874 SDValue Load =
8875 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8876 MachineFunction &MF = DAG.getMachineFunction();
8877 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8881 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8882 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8883 return Load;
8884 }
8885
8886 switch (getTargetMachine().getCodeModel()) {
8887 default:
8888 reportFatalUsageError("Unsupported code model for lowering");
8889 case CodeModel::Small: {
8890 // Generate a sequence for accessing addresses within the first 2 GiB of
8891 // address space.
8892 if (Subtarget.hasVendorXqcili()) {
8893 // Use QC.E.LI to generate the address, as this is easier to relax than
8894 // LUI/ADDI.
8895 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8896 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8897 }
8898
8899 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8900 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8901 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8902 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8903 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8904 }
8905 case CodeModel::Medium: {
8906 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8907 if (IsExternWeak) {
8908 // An extern weak symbol may be undefined, i.e. have value 0, which may
8909 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8910 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8911 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8912 SDValue Load =
8913 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8914 MachineFunction &MF = DAG.getMachineFunction();
8915 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8919 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8920 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8921 return Load;
8922 }
8923
8924 // Generate a sequence for accessing addresses within any 2GiB range within
8925 // the address space. This generates the pattern (PseudoLLA sym), which
8926 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8927 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8928 }
8929 case CodeModel::Large: {
8930 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8931 return getLargeGlobalAddress(G, DL, Ty, DAG);
8932
8933 // Using pc-relative mode for other node type.
8934 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8935 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8936 }
8937 }
8938}
8939
8940SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8941 SelectionDAG &DAG) const {
8942 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8943 assert(N->getOffset() == 0 && "unexpected offset in global node");
8944 const GlobalValue *GV = N->getGlobal();
8945 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8946}
8947
8948SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8949 SelectionDAG &DAG) const {
8950 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8951
8952 return getAddr(N, DAG);
8953}
8954
8955SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8956 SelectionDAG &DAG) const {
8957 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8958
8959 return getAddr(N, DAG);
8960}
8961
8962SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8963 SelectionDAG &DAG) const {
8964 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8965
8966 return getAddr(N, DAG);
8967}
8968
8969SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8970 SelectionDAG &DAG,
8971 bool UseGOT) const {
8972 SDLoc DL(N);
8973 EVT Ty = getPointerTy(DAG.getDataLayout());
8974 const GlobalValue *GV = N->getGlobal();
8975 MVT XLenVT = Subtarget.getXLenVT();
8976
8977 if (UseGOT) {
8978 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8979 // load the address from the GOT and add the thread pointer. This generates
8980 // the pattern (PseudoLA_TLS_IE sym), which expands to
8981 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8982 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8983 SDValue Load =
8984 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8985 MachineFunction &MF = DAG.getMachineFunction();
8986 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8990 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8991 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8992
8993 // Add the thread pointer.
8994 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8995 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8996 }
8997
8998 // Generate a sequence for accessing the address relative to the thread
8999 // pointer, with the appropriate adjustment for the thread pointer offset.
9000 // This generates the pattern
9001 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9002 SDValue AddrHi =
9004 SDValue AddrAdd =
9006 SDValue AddrLo =
9008
9009 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9010 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9011 SDValue MNAdd =
9012 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9013 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9014}
9015
9016SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9017 SelectionDAG &DAG) const {
9018 SDLoc DL(N);
9019 EVT Ty = getPointerTy(DAG.getDataLayout());
9020 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9021 const GlobalValue *GV = N->getGlobal();
9022
9023 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9024 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9025 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9026 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9027 SDValue Load =
9028 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9029
9030 // Prepare argument list to generate call.
9032 Args.emplace_back(Load, CallTy);
9033
9034 // Setup call to __tls_get_addr.
9035 TargetLowering::CallLoweringInfo CLI(DAG);
9036 CLI.setDebugLoc(DL)
9037 .setChain(DAG.getEntryNode())
9038 .setLibCallee(CallingConv::C, CallTy,
9039 DAG.getExternalSymbol("__tls_get_addr", Ty),
9040 std::move(Args));
9041
9042 return LowerCallTo(CLI).first;
9043}
9044
9045SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9046 SelectionDAG &DAG) const {
9047 SDLoc DL(N);
9048 EVT Ty = getPointerTy(DAG.getDataLayout());
9049 const GlobalValue *GV = N->getGlobal();
9050
9051 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9052 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9053 //
9054 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9055 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9056 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9057 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9058 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9059 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9060}
9061
9062SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9063 SelectionDAG &DAG) const {
9064 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9065 assert(N->getOffset() == 0 && "unexpected offset in global node");
9066
9067 if (DAG.getTarget().useEmulatedTLS())
9068 return LowerToTLSEmulatedModel(N, DAG);
9069
9071
9074 reportFatalUsageError("In GHC calling convention TLS is not supported");
9075
9076 SDValue Addr;
9077 switch (Model) {
9079 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9080 break;
9082 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9083 break;
9086 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9087 : getDynamicTLSAddr(N, DAG);
9088 break;
9089 }
9090
9091 return Addr;
9092}
9093
9094// Return true if Val is equal to (setcc LHS, RHS, CC).
9095// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9096// Otherwise, return std::nullopt.
9097static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9098 ISD::CondCode CC, SDValue Val) {
9099 assert(Val->getOpcode() == ISD::SETCC);
9100 SDValue LHS2 = Val.getOperand(0);
9101 SDValue RHS2 = Val.getOperand(1);
9102 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9103
9104 if (LHS == LHS2 && RHS == RHS2) {
9105 if (CC == CC2)
9106 return true;
9107 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9108 return false;
9109 } else if (LHS == RHS2 && RHS == LHS2) {
9111 if (CC == CC2)
9112 return true;
9113 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9114 return false;
9115 }
9116
9117 return std::nullopt;
9118}
9119
9121 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9122}
9123
9125 const RISCVSubtarget &Subtarget) {
9126 SDValue CondV = N->getOperand(0);
9127 SDValue TrueV = N->getOperand(1);
9128 SDValue FalseV = N->getOperand(2);
9129 MVT VT = N->getSimpleValueType(0);
9130 SDLoc DL(N);
9131
9132 if (!Subtarget.hasConditionalMoveFusion()) {
9133 // (select c, -1, y) -> -c | y
9134 if (isAllOnesConstant(TrueV)) {
9135 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9136 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9137 }
9138 // (select c, y, -1) -> (c-1) | y
9139 if (isAllOnesConstant(FalseV)) {
9140 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9141 DAG.getAllOnesConstant(DL, VT));
9142 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9143 }
9144
9145 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9146
9147 // (select c, 0, y) -> (c-1) & y
9148 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9149 SDValue Neg =
9150 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9151 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9152 }
9153 // (select c, y, 0) -> -c & y
9154 if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) {
9155 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9156 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9157 }
9158 }
9159
9160 // select c, ~x, x --> xor -c, x
9161 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9162 const APInt &TrueVal = TrueV->getAsAPIntVal();
9163 const APInt &FalseVal = FalseV->getAsAPIntVal();
9164 if (~TrueVal == FalseVal) {
9165 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9166 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9167 }
9168 }
9169
9170 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9171 // when both truev and falsev are also setcc.
9172 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9173 FalseV.getOpcode() == ISD::SETCC) {
9174 SDValue LHS = CondV.getOperand(0);
9175 SDValue RHS = CondV.getOperand(1);
9176 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9177
9178 // (select x, x, y) -> x | y
9179 // (select !x, x, y) -> x & y
9180 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9181 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9182 DAG.getFreeze(FalseV));
9183 }
9184 // (select x, y, x) -> x & y
9185 // (select !x, y, x) -> x | y
9186 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9187 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9188 DAG.getFreeze(TrueV), FalseV);
9189 }
9190 }
9191
9192 return SDValue();
9193}
9194
9195// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9196// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9197// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9198// being `0` or `-1`. In such cases we can replace `select` with `and`.
9199// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9200// than `c0`?
9201static SDValue
9203 const RISCVSubtarget &Subtarget) {
9204 if (Subtarget.hasShortForwardBranchOpt())
9205 return SDValue();
9206
9207 unsigned SelOpNo = 0;
9208 SDValue Sel = BO->getOperand(0);
9209 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9210 SelOpNo = 1;
9211 Sel = BO->getOperand(1);
9212 }
9213
9214 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9215 return SDValue();
9216
9217 unsigned ConstSelOpNo = 1;
9218 unsigned OtherSelOpNo = 2;
9219 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9220 ConstSelOpNo = 2;
9221 OtherSelOpNo = 1;
9222 }
9223 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9224 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9225 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9226 return SDValue();
9227
9228 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9229 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9230 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9231 return SDValue();
9232
9233 SDLoc DL(Sel);
9234 EVT VT = BO->getValueType(0);
9235
9236 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9237 if (SelOpNo == 1)
9238 std::swap(NewConstOps[0], NewConstOps[1]);
9239
9240 SDValue NewConstOp =
9241 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9242 if (!NewConstOp)
9243 return SDValue();
9244
9245 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9246 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9247 return SDValue();
9248
9249 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9250 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9251 if (SelOpNo == 1)
9252 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9253 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9254
9255 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9256 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9257 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9258}
9259
9260SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9261 SDValue CondV = Op.getOperand(0);
9262 SDValue TrueV = Op.getOperand(1);
9263 SDValue FalseV = Op.getOperand(2);
9264 SDLoc DL(Op);
9265 MVT VT = Op.getSimpleValueType();
9266 MVT XLenVT = Subtarget.getXLenVT();
9267
9268 // Lower vector SELECTs to VSELECTs by splatting the condition.
9269 if (VT.isVector()) {
9270 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9271 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9272 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9273 }
9274
9275 // Try some other optimizations before falling back to generic lowering.
9276 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9277 return V;
9278
9279 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9280 // nodes to implement the SELECT. Performing the lowering here allows for
9281 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9282 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9283 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9284
9285 // (select c, t, 0) -> (czero_eqz t, c)
9286 if (isNullConstant(FalseV))
9287 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9288 // (select c, 0, f) -> (czero_nez f, c)
9289 if (isNullConstant(TrueV))
9290 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9291
9292 // Check to see if a given operation is a 'NOT', if so return the negated
9293 // operand
9294 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9295 using namespace llvm::SDPatternMatch;
9296 SDValue Xor;
9297 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9298 return Xor;
9299 }
9300 return std::nullopt;
9301 };
9302 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9303 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9304 if (TrueV.getOpcode() == ISD::AND &&
9305 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9306 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9307 ? getNotOperand(TrueV.getOperand(1))
9308 : getNotOperand(TrueV.getOperand(0));
9309 if (NotOperand) {
9310 SDValue CMOV =
9311 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9312 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9313 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9314 }
9315 return DAG.getNode(
9316 ISD::OR, DL, VT, TrueV,
9317 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9318 }
9319
9320 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9321 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9322 if (FalseV.getOpcode() == ISD::AND &&
9323 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9324 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9325 ? getNotOperand(FalseV.getOperand(1))
9326 : getNotOperand(FalseV.getOperand(0));
9327 if (NotOperand) {
9328 SDValue CMOV =
9329 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9330 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9331 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9332 }
9333 return DAG.getNode(
9334 ISD::OR, DL, VT, FalseV,
9335 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9336 }
9337
9338 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9339 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9340 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9341 const APInt &TrueVal = TrueV->getAsAPIntVal();
9342 const APInt &FalseVal = FalseV->getAsAPIntVal();
9343
9344 // Prefer these over Zicond to avoid materializing an immediate:
9345 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9346 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9347 if (CondV.getOpcode() == ISD::SETCC &&
9348 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9349 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9350 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9351 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9352 int64_t TrueImm = TrueVal.getSExtValue();
9353 int64_t FalseImm = FalseVal.getSExtValue();
9354 if (CCVal == ISD::SETGT)
9355 std::swap(TrueImm, FalseImm);
9356 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9357 isInt<12>(TrueImm - FalseImm)) {
9358 SDValue SRA =
9359 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9360 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9361 SDValue AND =
9362 DAG.getNode(ISD::AND, DL, VT, SRA,
9363 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9364 return DAG.getNode(ISD::ADD, DL, VT, AND,
9365 DAG.getSignedConstant(FalseImm, DL, VT));
9366 }
9367 }
9368 }
9369
9370 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9371 // a constant in register
9372 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9373 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9374 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9375 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9376 }
9377 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9378 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9379 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9380 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9381 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9382 }
9383
9384 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9385 const int DeltaCost = RISCVMatInt::getIntMatCost(
9386 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9387 // Does the addend fold into an ADDI
9388 if (Addend.isSignedIntN(12))
9389 return DeltaCost;
9390 const int AddendCost = RISCVMatInt::getIntMatCost(
9391 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9392 return AddendCost + DeltaCost;
9393 };
9394 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9395 getCost(TrueVal - FalseVal, FalseVal);
9396 SDValue LHSVal = DAG.getConstant(
9397 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9398 SDValue CMOV =
9399 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9400 DL, VT, LHSVal, CondV);
9401 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9402 }
9403
9404 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9405 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9406 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9407 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9408 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9409 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9410 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9411 // Fall back to XORI if Const == -0x800
9412 if (RawConstVal == -0x800) {
9413 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9414 SDValue CMOV =
9415 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9416 DL, VT, XorOp, CondV);
9417 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9418 }
9419 // Efficient only if the constant and its negation fit into `ADDI`
9420 // Prefer Add/Sub over Xor since can be compressed for small immediates
9421 if (isInt<12>(RawConstVal)) {
9422 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9423 SDValue CMOV =
9424 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9425 DL, VT, SubOp, CondV);
9426 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9427 }
9428 }
9429
9430 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9431 // Unless we have the short forward branch optimization.
9432 if (!Subtarget.hasConditionalMoveFusion())
9433 return DAG.getNode(
9434 ISD::OR, DL, VT,
9435 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9436 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9438 }
9439
9440 if (Op.hasOneUse()) {
9441 unsigned UseOpc = Op->user_begin()->getOpcode();
9442 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9443 SDNode *BinOp = *Op->user_begin();
9444 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9445 DAG, Subtarget)) {
9446 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9447 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9448 // may return a constant node and cause crash in lowerSELECT.
9449 if (NewSel.getOpcode() == ISD::SELECT)
9450 return lowerSELECT(NewSel, DAG);
9451 return NewSel;
9452 }
9453 }
9454 }
9455
9456 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9457 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9458 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9459 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9460 if (FPTV && FPFV) {
9461 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9462 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9463 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9464 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9465 DAG.getConstant(1, DL, XLenVT));
9466 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9467 }
9468 }
9469
9470 // If the condition is not an integer SETCC which operates on XLenVT, we need
9471 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9472 // (select condv, truev, falsev)
9473 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9474 if (CondV.getOpcode() != ISD::SETCC ||
9475 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9476 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9477 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9478
9479 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9480
9481 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9482 }
9483
9484 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9485 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9486 // advantage of the integer compare+branch instructions. i.e.:
9487 // (select (setcc lhs, rhs, cc), truev, falsev)
9488 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9489 SDValue LHS = CondV.getOperand(0);
9490 SDValue RHS = CondV.getOperand(1);
9491 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9492
9493 // Special case for a select of 2 constants that have a difference of 1.
9494 // Normally this is done by DAGCombine, but if the select is introduced by
9495 // type legalization or op legalization, we miss it. Restricting to SETLT
9496 // case for now because that is what signed saturating add/sub need.
9497 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9498 // but we would probably want to swap the true/false values if the condition
9499 // is SETGE/SETLE to avoid an XORI.
9500 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9501 CCVal == ISD::SETLT) {
9502 const APInt &TrueVal = TrueV->getAsAPIntVal();
9503 const APInt &FalseVal = FalseV->getAsAPIntVal();
9504 if (TrueVal - 1 == FalseVal)
9505 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9506 if (TrueVal + 1 == FalseVal)
9507 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9508 }
9509
9510 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9511 // 1 < x ? x : 1 -> 0 < x ? x : 1
9512 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9513 RHS == TrueV && LHS == FalseV) {
9514 LHS = DAG.getConstant(0, DL, VT);
9515 // 0 <u x is the same as x != 0.
9516 if (CCVal == ISD::SETULT) {
9517 std::swap(LHS, RHS);
9518 CCVal = ISD::SETNE;
9519 }
9520 }
9521
9522 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9523 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9524 RHS == FalseV) {
9525 RHS = DAG.getConstant(0, DL, VT);
9526 }
9527
9528 SDValue TargetCC = DAG.getCondCode(CCVal);
9529
9530 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9531 // (select (setcc lhs, rhs, CC), constant, falsev)
9532 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9533 std::swap(TrueV, FalseV);
9534 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9535 }
9536
9537 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9538 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9539}
9540
9541SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9542 SDValue CondV = Op.getOperand(1);
9543 SDLoc DL(Op);
9544 MVT XLenVT = Subtarget.getXLenVT();
9545
9546 if (CondV.getOpcode() == ISD::SETCC &&
9547 CondV.getOperand(0).getValueType() == XLenVT) {
9548 SDValue LHS = CondV.getOperand(0);
9549 SDValue RHS = CondV.getOperand(1);
9550 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9551
9552 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9553
9554 SDValue TargetCC = DAG.getCondCode(CCVal);
9555 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9556 LHS, RHS, TargetCC, Op.getOperand(2));
9557 }
9558
9559 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9560 CondV, DAG.getConstant(0, DL, XLenVT),
9561 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9562}
9563
9564SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9565 MachineFunction &MF = DAG.getMachineFunction();
9566 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9567
9568 SDLoc DL(Op);
9569 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9571
9572 // vastart just stores the address of the VarArgsFrameIndex slot into the
9573 // memory location argument.
9574 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9575 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9576 MachinePointerInfo(SV));
9577}
9578
9579SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9580 SelectionDAG &DAG) const {
9581 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9582 MachineFunction &MF = DAG.getMachineFunction();
9583 MachineFrameInfo &MFI = MF.getFrameInfo();
9584 MFI.setFrameAddressIsTaken(true);
9585 Register FrameReg = RI.getFrameRegister(MF);
9586 int XLenInBytes = Subtarget.getXLen() / 8;
9587
9588 EVT VT = Op.getValueType();
9589 SDLoc DL(Op);
9590 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9591 unsigned Depth = Op.getConstantOperandVal(0);
9592 while (Depth--) {
9593 int Offset = -(XLenInBytes * 2);
9594 SDValue Ptr = DAG.getNode(
9595 ISD::ADD, DL, VT, FrameAddr,
9597 FrameAddr =
9598 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9599 }
9600 return FrameAddr;
9601}
9602
9603SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9604 SelectionDAG &DAG) const {
9605 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9606 MachineFunction &MF = DAG.getMachineFunction();
9607 MachineFrameInfo &MFI = MF.getFrameInfo();
9608 MFI.setReturnAddressIsTaken(true);
9609 MVT XLenVT = Subtarget.getXLenVT();
9610 int XLenInBytes = Subtarget.getXLen() / 8;
9611
9612 EVT VT = Op.getValueType();
9613 SDLoc DL(Op);
9614 unsigned Depth = Op.getConstantOperandVal(0);
9615 if (Depth) {
9616 int Off = -XLenInBytes;
9617 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9618 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9619 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9620 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9621 MachinePointerInfo());
9622 }
9623
9624 // Return the value of the return address register, marking it an implicit
9625 // live-in.
9626 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9627 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9628}
9629
9630SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9631 SelectionDAG &DAG) const {
9632 SDLoc DL(Op);
9633 SDValue Lo = Op.getOperand(0);
9634 SDValue Hi = Op.getOperand(1);
9635 SDValue Shamt = Op.getOperand(2);
9636 EVT VT = Lo.getValueType();
9637
9638 // if Shamt-XLEN < 0: // Shamt < XLEN
9639 // Lo = Lo << Shamt
9640 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9641 // else:
9642 // Lo = 0
9643 // Hi = Lo << (Shamt-XLEN)
9644
9645 SDValue Zero = DAG.getConstant(0, DL, VT);
9646 SDValue One = DAG.getConstant(1, DL, VT);
9647 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9648 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9649 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9650 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9651
9652 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9653 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9654 SDValue ShiftRightLo =
9655 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9656 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9657 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9658 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9659
9660 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9661
9662 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9663 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9664
9665 SDValue Parts[2] = {Lo, Hi};
9666 return DAG.getMergeValues(Parts, DL);
9667}
9668
9669SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9670 bool IsSRA) const {
9671 SDLoc DL(Op);
9672 SDValue Lo = Op.getOperand(0);
9673 SDValue Hi = Op.getOperand(1);
9674 SDValue Shamt = Op.getOperand(2);
9675 EVT VT = Lo.getValueType();
9676
9677 // SRA expansion:
9678 // if Shamt-XLEN < 0: // Shamt < XLEN
9679 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9680 // Hi = Hi >>s Shamt
9681 // else:
9682 // Lo = Hi >>s (Shamt-XLEN);
9683 // Hi = Hi >>s (XLEN-1)
9684 //
9685 // SRL expansion:
9686 // if Shamt-XLEN < 0: // Shamt < XLEN
9687 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9688 // Hi = Hi >>u Shamt
9689 // else:
9690 // Lo = Hi >>u (Shamt-XLEN);
9691 // Hi = 0;
9692
9693 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9694
9695 SDValue Zero = DAG.getConstant(0, DL, VT);
9696 SDValue One = DAG.getConstant(1, DL, VT);
9697 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9698 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9699 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9700 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9701
9702 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9703 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9704 SDValue ShiftLeftHi =
9705 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9706 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9707 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9708 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9709 SDValue HiFalse =
9710 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9711
9712 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9713
9714 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9715 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9716
9717 SDValue Parts[2] = {Lo, Hi};
9718 return DAG.getMergeValues(Parts, DL);
9719}
9720
9721// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9722// legal equivalently-sized i8 type, so we can use that as a go-between.
9723SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9724 SelectionDAG &DAG) const {
9725 SDLoc DL(Op);
9726 MVT VT = Op.getSimpleValueType();
9727 SDValue SplatVal = Op.getOperand(0);
9728 // All-zeros or all-ones splats are handled specially.
9729 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9730 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9731 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9732 }
9733 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9734 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9735 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9736 }
9737 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9738 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9739 DAG.getConstant(1, DL, SplatVal.getValueType()));
9740 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9741 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9742 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9743}
9744
9745// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9746// illegal (currently only vXi64 RV32).
9747// FIXME: We could also catch non-constant sign-extended i32 values and lower
9748// them to VMV_V_X_VL.
9749SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9750 SelectionDAG &DAG) const {
9751 SDLoc DL(Op);
9752 MVT VecVT = Op.getSimpleValueType();
9753 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9754 "Unexpected SPLAT_VECTOR_PARTS lowering");
9755
9756 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9757 SDValue Lo = Op.getOperand(0);
9758 SDValue Hi = Op.getOperand(1);
9759
9760 MVT ContainerVT = VecVT;
9761 if (VecVT.isFixedLengthVector())
9762 ContainerVT = getContainerForFixedLengthVector(VecVT);
9763
9764 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9765
9766 SDValue Res =
9767 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9768
9769 if (VecVT.isFixedLengthVector())
9770 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9771
9772 return Res;
9773}
9774
9775// Custom-lower extensions from mask vectors by using a vselect either with 1
9776// for zero/any-extension or -1 for sign-extension:
9777// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9778// Note that any-extension is lowered identically to zero-extension.
9779SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9780 int64_t ExtTrueVal) const {
9781 SDLoc DL(Op);
9782 MVT VecVT = Op.getSimpleValueType();
9783 SDValue Src = Op.getOperand(0);
9784 // Only custom-lower extensions from mask types
9785 assert(Src.getValueType().isVector() &&
9786 Src.getValueType().getVectorElementType() == MVT::i1);
9787
9788 if (VecVT.isScalableVector()) {
9789 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9790 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9791 if (Src.getOpcode() == ISD::XOR &&
9792 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9793 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9794 SplatTrueVal);
9795 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9796 }
9797
9798 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9799 MVT I1ContainerVT =
9800 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9801
9802 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9803
9804 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9805
9806 MVT XLenVT = Subtarget.getXLenVT();
9807 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9808 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9809
9810 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9811 SDValue Xor = Src.getOperand(0);
9812 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9813 SDValue ScalableOnes = Xor.getOperand(1);
9814 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9815 ScalableOnes.getOperand(0).isUndef() &&
9817 ScalableOnes.getOperand(1).getNode())) {
9818 CC = Xor.getOperand(0);
9819 std::swap(SplatZero, SplatTrueVal);
9820 }
9821 }
9822 }
9823
9824 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9825 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9826 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9827 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9828 SDValue Select =
9829 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9830 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9831
9832 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9833}
9834
9835// Custom-lower truncations from vectors to mask vectors by using a mask and a
9836// setcc operation:
9837// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9838SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9839 SelectionDAG &DAG) const {
9840 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9841 SDLoc DL(Op);
9842 EVT MaskVT = Op.getValueType();
9843 // Only expect to custom-lower truncations to mask types
9844 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9845 "Unexpected type for vector mask lowering");
9846 SDValue Src = Op.getOperand(0);
9847 MVT VecVT = Src.getSimpleValueType();
9848 SDValue Mask, VL;
9849 if (IsVPTrunc) {
9850 Mask = Op.getOperand(1);
9851 VL = Op.getOperand(2);
9852 }
9853 // If this is a fixed vector, we need to convert it to a scalable vector.
9854 MVT ContainerVT = VecVT;
9855
9856 if (VecVT.isFixedLengthVector()) {
9857 ContainerVT = getContainerForFixedLengthVector(VecVT);
9858 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9859 if (IsVPTrunc) {
9860 MVT MaskContainerVT =
9861 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9862 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9863 }
9864 }
9865
9866 if (!IsVPTrunc) {
9867 std::tie(Mask, VL) =
9868 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9869 }
9870
9871 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9872 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9873
9874 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9875 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9876 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9877 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9878
9879 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9880 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9881 DAG.getUNDEF(ContainerVT), Mask, VL);
9882 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9883 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9884 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9885 if (MaskVT.isFixedLengthVector())
9886 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9887 return Trunc;
9888}
9889
9890SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9891 SelectionDAG &DAG) const {
9892 unsigned Opc = Op.getOpcode();
9893 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9894 SDLoc DL(Op);
9895
9896 MVT VT = Op.getSimpleValueType();
9897 // Only custom-lower vector truncates
9898 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9899
9900 // Truncates to mask types are handled differently
9901 if (VT.getVectorElementType() == MVT::i1)
9902 return lowerVectorMaskTruncLike(Op, DAG);
9903
9904 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9905 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9906 // truncate by one power of two at a time.
9907 MVT DstEltVT = VT.getVectorElementType();
9908
9909 SDValue Src = Op.getOperand(0);
9910 MVT SrcVT = Src.getSimpleValueType();
9911 MVT SrcEltVT = SrcVT.getVectorElementType();
9912
9913 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9914 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9915 "Unexpected vector truncate lowering");
9916
9917 MVT ContainerVT = SrcVT;
9918 SDValue Mask, VL;
9919 if (IsVPTrunc) {
9920 Mask = Op.getOperand(1);
9921 VL = Op.getOperand(2);
9922 }
9923 if (SrcVT.isFixedLengthVector()) {
9924 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9925 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9926 if (IsVPTrunc) {
9927 MVT MaskVT = getMaskTypeFor(ContainerVT);
9928 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9929 }
9930 }
9931
9932 SDValue Result = Src;
9933 if (!IsVPTrunc) {
9934 std::tie(Mask, VL) =
9935 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9936 }
9937
9938 unsigned NewOpc;
9940 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9941 else if (Opc == ISD::TRUNCATE_USAT_U)
9942 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9943 else
9944 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9945
9946 do {
9947 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9948 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9949 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9950 } while (SrcEltVT != DstEltVT);
9951
9952 if (SrcVT.isFixedLengthVector())
9953 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9954
9955 return Result;
9956}
9957
9958SDValue
9959RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9960 SelectionDAG &DAG) const {
9961 SDLoc DL(Op);
9962 SDValue Chain = Op.getOperand(0);
9963 SDValue Src = Op.getOperand(1);
9964 MVT VT = Op.getSimpleValueType();
9965 MVT SrcVT = Src.getSimpleValueType();
9966 MVT ContainerVT = VT;
9967 if (VT.isFixedLengthVector()) {
9968 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9969 ContainerVT =
9970 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9971 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9972 }
9973
9974 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9975
9976 // RVV can only widen/truncate fp to types double/half the size as the source.
9977 if ((VT.getVectorElementType() == MVT::f64 &&
9978 (SrcVT.getVectorElementType() == MVT::f16 ||
9979 SrcVT.getVectorElementType() == MVT::bf16)) ||
9980 ((VT.getVectorElementType() == MVT::f16 ||
9981 VT.getVectorElementType() == MVT::bf16) &&
9982 SrcVT.getVectorElementType() == MVT::f64)) {
9983 // For double rounding, the intermediate rounding should be round-to-odd.
9984 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9985 ? RISCVISD::STRICT_FP_EXTEND_VL
9986 : RISCVISD::STRICT_VFNCVT_ROD_VL;
9987 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9988 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9989 Chain, Src, Mask, VL);
9990 Chain = Src.getValue(1);
9991 }
9992
9993 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9994 ? RISCVISD::STRICT_FP_EXTEND_VL
9995 : RISCVISD::STRICT_FP_ROUND_VL;
9996 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
9997 Chain, Src, Mask, VL);
9998 if (VT.isFixedLengthVector()) {
9999 // StrictFP operations have two result values. Their lowered result should
10000 // have same result count.
10001 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10002 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10003 }
10004 return Res;
10005}
10006
10007SDValue
10008RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10009 SelectionDAG &DAG) const {
10010 bool IsVP =
10011 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10012 bool IsExtend =
10013 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10014 // RVV can only do truncate fp to types half the size as the source. We
10015 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10016 // conversion instruction.
10017 SDLoc DL(Op);
10018 MVT VT = Op.getSimpleValueType();
10019
10020 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10021
10022 SDValue Src = Op.getOperand(0);
10023 MVT SrcVT = Src.getSimpleValueType();
10024
10025 bool IsDirectExtend =
10026 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10027 (SrcVT.getVectorElementType() != MVT::f16 &&
10028 SrcVT.getVectorElementType() != MVT::bf16));
10029 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10030 VT.getVectorElementType() != MVT::bf16) ||
10031 SrcVT.getVectorElementType() != MVT::f64);
10032
10033 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10034
10035 // We have regular SD node patterns for direct non-VL extends.
10036 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10037 return Op;
10038
10039 // Prepare any fixed-length vector operands.
10040 MVT ContainerVT = VT;
10041 SDValue Mask, VL;
10042 if (IsVP) {
10043 Mask = Op.getOperand(1);
10044 VL = Op.getOperand(2);
10045 }
10046 if (VT.isFixedLengthVector()) {
10047 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10048 ContainerVT =
10049 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10050 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10051 if (IsVP) {
10052 MVT MaskVT = getMaskTypeFor(ContainerVT);
10053 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10054 }
10055 }
10056
10057 if (!IsVP)
10058 std::tie(Mask, VL) =
10059 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10060
10061 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10062
10063 if (IsDirectConv) {
10064 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10065 if (VT.isFixedLengthVector())
10066 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10067 return Src;
10068 }
10069
10070 unsigned InterConvOpc =
10071 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10072
10073 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10074 SDValue IntermediateConv =
10075 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10076 SDValue Result =
10077 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10078 if (VT.isFixedLengthVector())
10079 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10080 return Result;
10081}
10082
10083// Given a scalable vector type and an index into it, returns the type for the
10084// smallest subvector that the index fits in. This can be used to reduce LMUL
10085// for operations like vslidedown.
10086//
10087// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10088static std::optional<MVT>
10089getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10090 const RISCVSubtarget &Subtarget) {
10091 assert(VecVT.isScalableVector());
10092 const unsigned EltSize = VecVT.getScalarSizeInBits();
10093 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10094 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10095 MVT SmallerVT;
10096 if (MaxIdx < MinVLMAX)
10097 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10098 else if (MaxIdx < MinVLMAX * 2)
10099 SmallerVT =
10101 else if (MaxIdx < MinVLMAX * 4)
10102 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10105 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10106 return std::nullopt;
10107 return SmallerVT;
10108}
10109
10111 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10112 if (!IdxC || isNullConstant(Idx))
10113 return false;
10114 return isUInt<5>(IdxC->getZExtValue());
10115}
10116
10117// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10118// first position of a vector, and that vector is slid up to the insert index.
10119// By limiting the active vector length to index+1 and merging with the
10120// original vector (with an undisturbed tail policy for elements >= VL), we
10121// achieve the desired result of leaving all elements untouched except the one
10122// at VL-1, which is replaced with the desired value.
10123SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10124 SelectionDAG &DAG) const {
10125 SDLoc DL(Op);
10126 MVT VecVT = Op.getSimpleValueType();
10127 MVT XLenVT = Subtarget.getXLenVT();
10128 SDValue Vec = Op.getOperand(0);
10129 SDValue Val = Op.getOperand(1);
10130 MVT ValVT = Val.getSimpleValueType();
10131 SDValue Idx = Op.getOperand(2);
10132
10133 if (VecVT.getVectorElementType() == MVT::i1) {
10134 // FIXME: For now we just promote to an i8 vector and insert into that,
10135 // but this is probably not optimal.
10136 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10137 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10138 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10139 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10140 }
10141
10142 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10143 ValVT == MVT::bf16) {
10144 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10145 MVT IntVT = VecVT.changeTypeToInteger();
10146 SDValue IntInsert = DAG.getNode(
10147 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10148 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10149 return DAG.getBitcast(VecVT, IntInsert);
10150 }
10151
10152 MVT ContainerVT = VecVT;
10153 // If the operand is a fixed-length vector, convert to a scalable one.
10154 if (VecVT.isFixedLengthVector()) {
10155 ContainerVT = getContainerForFixedLengthVector(VecVT);
10156 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10157 }
10158
10159 // If we know the index we're going to insert at, we can shrink Vec so that
10160 // we're performing the scalar inserts and slideup on a smaller LMUL.
10161 SDValue OrigVec = Vec;
10162 std::optional<unsigned> AlignedIdx;
10163 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10164 const unsigned OrigIdx = IdxC->getZExtValue();
10165 // Do we know an upper bound on LMUL?
10166 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10167 DL, DAG, Subtarget)) {
10168 ContainerVT = *ShrunkVT;
10169 AlignedIdx = 0;
10170 }
10171
10172 // If we're compiling for an exact VLEN value, we can always perform
10173 // the insert in m1 as we can determine the register corresponding to
10174 // the index in the register group.
10175 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10176 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10177 EVT ElemVT = VecVT.getVectorElementType();
10178 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10179 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10180 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10181 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10182 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10183 ContainerVT = M1VT;
10184 }
10185
10186 if (AlignedIdx)
10187 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10188 }
10189
10190 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10191 // Even i64-element vectors on RV32 can be lowered without scalar
10192 // legalization if the most-significant 32 bits of the value are not affected
10193 // by the sign-extension of the lower 32 bits.
10194 // TODO: We could also catch sign extensions of a 32-bit value.
10195 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10196 const auto *CVal = cast<ConstantSDNode>(Val);
10197 if (isInt<32>(CVal->getSExtValue())) {
10198 IsLegalInsert = true;
10199 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10200 }
10201 }
10202
10203 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10204
10205 SDValue ValInVec;
10206
10207 if (IsLegalInsert) {
10208 unsigned Opc =
10209 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10210 if (isNullConstant(Idx)) {
10211 if (!VecVT.isFloatingPoint())
10212 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10213 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10214
10215 if (AlignedIdx)
10216 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10217 if (!VecVT.isFixedLengthVector())
10218 return Vec;
10219 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10220 }
10221
10222 // Use ri.vinsert.v.x if available.
10223 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10225 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10226 SDValue PolicyOp =
10228 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10229 VL, PolicyOp);
10230 if (AlignedIdx)
10231 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10232 if (!VecVT.isFixedLengthVector())
10233 return Vec;
10234 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10235 }
10236
10237 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10238 } else {
10239 // On RV32, i64-element vectors must be specially handled to place the
10240 // value at element 0, by using two vslide1down instructions in sequence on
10241 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10242 // this.
10243 SDValue ValLo, ValHi;
10244 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10245 MVT I32ContainerVT =
10246 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10247 SDValue I32Mask =
10248 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10249 // Limit the active VL to two.
10250 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10251 // If the Idx is 0 we can insert directly into the vector.
10252 if (isNullConstant(Idx)) {
10253 // First slide in the lo value, then the hi in above it. We use slide1down
10254 // to avoid the register group overlap constraint of vslide1up.
10255 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10256 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10257 // If the source vector is undef don't pass along the tail elements from
10258 // the previous slide1down.
10259 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10260 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10261 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10262 // Bitcast back to the right container type.
10263 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10264
10265 if (AlignedIdx)
10266 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10267 if (!VecVT.isFixedLengthVector())
10268 return ValInVec;
10269 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10270 }
10271
10272 // First slide in the lo value, then the hi in above it. We use slide1down
10273 // to avoid the register group overlap constraint of vslide1up.
10274 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10275 DAG.getUNDEF(I32ContainerVT),
10276 DAG.getUNDEF(I32ContainerVT), ValLo,
10277 I32Mask, InsertI64VL);
10278 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10279 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10280 I32Mask, InsertI64VL);
10281 // Bitcast back to the right container type.
10282 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10283 }
10284
10285 // Now that the value is in a vector, slide it into position.
10286 SDValue InsertVL =
10287 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10288
10289 // Use tail agnostic policy if Idx is the last index of Vec.
10291 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10292 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10294 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10295 Idx, Mask, InsertVL, Policy);
10296
10297 if (AlignedIdx)
10298 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10299 if (!VecVT.isFixedLengthVector())
10300 return Slideup;
10301 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10302}
10303
10304// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10305// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10306// types this is done using VMV_X_S to allow us to glean information about the
10307// sign bits of the result.
10308SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10309 SelectionDAG &DAG) const {
10310 SDLoc DL(Op);
10311 SDValue Idx = Op.getOperand(1);
10312 SDValue Vec = Op.getOperand(0);
10313 EVT EltVT = Op.getValueType();
10314 MVT VecVT = Vec.getSimpleValueType();
10315 MVT XLenVT = Subtarget.getXLenVT();
10316
10317 if (VecVT.getVectorElementType() == MVT::i1) {
10318 // Use vfirst.m to extract the first bit.
10319 if (isNullConstant(Idx)) {
10320 MVT ContainerVT = VecVT;
10321 if (VecVT.isFixedLengthVector()) {
10322 ContainerVT = getContainerForFixedLengthVector(VecVT);
10323 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10324 }
10325 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10326 SDValue Vfirst =
10327 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10328 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10329 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10330 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10331 }
10332 if (VecVT.isFixedLengthVector()) {
10333 unsigned NumElts = VecVT.getVectorNumElements();
10334 if (NumElts >= 8) {
10335 MVT WideEltVT;
10336 unsigned WidenVecLen;
10337 SDValue ExtractElementIdx;
10338 SDValue ExtractBitIdx;
10339 unsigned MaxEEW = Subtarget.getELen();
10340 MVT LargestEltVT = MVT::getIntegerVT(
10341 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10342 if (NumElts <= LargestEltVT.getSizeInBits()) {
10343 assert(isPowerOf2_32(NumElts) &&
10344 "the number of elements should be power of 2");
10345 WideEltVT = MVT::getIntegerVT(NumElts);
10346 WidenVecLen = 1;
10347 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10348 ExtractBitIdx = Idx;
10349 } else {
10350 WideEltVT = LargestEltVT;
10351 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10352 // extract element index = index / element width
10353 ExtractElementIdx = DAG.getNode(
10354 ISD::SRL, DL, XLenVT, Idx,
10355 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10356 // mask bit index = index % element width
10357 ExtractBitIdx = DAG.getNode(
10358 ISD::AND, DL, XLenVT, Idx,
10359 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10360 }
10361 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10362 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10363 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10364 Vec, ExtractElementIdx);
10365 // Extract the bit from GPR.
10366 SDValue ShiftRight =
10367 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10368 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10369 DAG.getConstant(1, DL, XLenVT));
10370 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10371 }
10372 }
10373 // Otherwise, promote to an i8 vector and extract from that.
10374 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10375 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10376 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10377 }
10378
10379 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10380 EltVT == MVT::bf16) {
10381 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10382 MVT IntVT = VecVT.changeTypeToInteger();
10383 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10384 SDValue IntExtract =
10385 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10386 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10387 }
10388
10389 // If this is a fixed vector, we need to convert it to a scalable vector.
10390 MVT ContainerVT = VecVT;
10391 if (VecVT.isFixedLengthVector()) {
10392 ContainerVT = getContainerForFixedLengthVector(VecVT);
10393 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10394 }
10395
10396 // If we're compiling for an exact VLEN value and we have a known
10397 // constant index, we can always perform the extract in m1 (or
10398 // smaller) as we can determine the register corresponding to
10399 // the index in the register group.
10400 const auto VLen = Subtarget.getRealVLen();
10401 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10402 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10403 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10404 unsigned OrigIdx = IdxC->getZExtValue();
10405 EVT ElemVT = VecVT.getVectorElementType();
10406 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10407 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10408 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10409 unsigned ExtractIdx =
10410 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10411 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10412 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10413 ContainerVT = M1VT;
10414 }
10415
10416 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10417 // contains our index.
10418 std::optional<uint64_t> MaxIdx;
10419 if (VecVT.isFixedLengthVector())
10420 MaxIdx = VecVT.getVectorNumElements() - 1;
10421 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10422 MaxIdx = IdxC->getZExtValue();
10423 if (MaxIdx) {
10424 if (auto SmallerVT =
10425 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10426 ContainerVT = *SmallerVT;
10427 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10428 }
10429 }
10430
10431 // Use ri.vextract.x.v if available.
10432 // TODO: Avoid index 0 and just use the vmv.x.s
10433 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10435 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10436 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10437 }
10438
10439 // If after narrowing, the required slide is still greater than LMUL2,
10440 // fallback to generic expansion and go through the stack. This is done
10441 // for a subtle reason: extracting *all* elements out of a vector is
10442 // widely expected to be linear in vector size, but because vslidedown
10443 // is linear in LMUL, performing N extracts using vslidedown becomes
10444 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10445 // seems to have the same problem (the store is linear in LMUL), but the
10446 // generic expansion *memoizes* the store, and thus for many extracts of
10447 // the same vector we end up with one store and a bunch of loads.
10448 // TODO: We don't have the same code for insert_vector_elt because we
10449 // have BUILD_VECTOR and handle the degenerate case there. Should we
10450 // consider adding an inverse BUILD_VECTOR node?
10451 MVT LMUL2VT =
10453 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10454 return SDValue();
10455
10456 // If the index is 0, the vector is already in the right position.
10457 if (!isNullConstant(Idx)) {
10458 // Use a VL of 1 to avoid processing more elements than we need.
10459 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10460 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10461 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10462 }
10463
10464 if (!EltVT.isInteger()) {
10465 // Floating-point extracts are handled in TableGen.
10466 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10467 }
10468
10469 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10470 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10471}
10472
10473// Some RVV intrinsics may claim that they want an integer operand to be
10474// promoted or expanded.
10476 const RISCVSubtarget &Subtarget) {
10477 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10478 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10479 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10480 "Unexpected opcode");
10481
10482 if (!Subtarget.hasVInstructions())
10483 return SDValue();
10484
10485 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10486 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10487 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10488
10489 SDLoc DL(Op);
10490
10492 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10493 if (!II || !II->hasScalarOperand())
10494 return SDValue();
10495
10496 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10497 assert(SplatOp < Op.getNumOperands());
10498
10500 SDValue &ScalarOp = Operands[SplatOp];
10501 MVT OpVT = ScalarOp.getSimpleValueType();
10502 MVT XLenVT = Subtarget.getXLenVT();
10503
10504 // If this isn't a scalar, or its type is XLenVT we're done.
10505 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10506 return SDValue();
10507
10508 // Simplest case is that the operand needs to be promoted to XLenVT.
10509 if (OpVT.bitsLT(XLenVT)) {
10510 // If the operand is a constant, sign extend to increase our chances
10511 // of being able to use a .vi instruction. ANY_EXTEND would become a
10512 // a zero extend and the simm5 check in isel would fail.
10513 // FIXME: Should we ignore the upper bits in isel instead?
10514 unsigned ExtOpc =
10516 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10517 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10518 }
10519
10520 // Use the previous operand to get the vXi64 VT. The result might be a mask
10521 // VT for compares. Using the previous operand assumes that the previous
10522 // operand will never have a smaller element size than a scalar operand and
10523 // that a widening operation never uses SEW=64.
10524 // NOTE: If this fails the below assert, we can probably just find the
10525 // element count from any operand or result and use it to construct the VT.
10526 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10527 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10528
10529 // The more complex case is when the scalar is larger than XLenVT.
10530 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10531 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10532
10533 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10534 // instruction to sign-extend since SEW>XLEN.
10535 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10536 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10537 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10538 }
10539
10540 switch (IntNo) {
10541 case Intrinsic::riscv_vslide1up:
10542 case Intrinsic::riscv_vslide1down:
10543 case Intrinsic::riscv_vslide1up_mask:
10544 case Intrinsic::riscv_vslide1down_mask: {
10545 // We need to special case these when the scalar is larger than XLen.
10546 unsigned NumOps = Op.getNumOperands();
10547 bool IsMasked = NumOps == 7;
10548
10549 // Convert the vector source to the equivalent nxvXi32 vector.
10550 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10551 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10552 SDValue ScalarLo, ScalarHi;
10553 std::tie(ScalarLo, ScalarHi) =
10554 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10555
10556 // Double the VL since we halved SEW.
10557 SDValue AVL = getVLOperand(Op);
10558 SDValue I32VL;
10559
10560 // Optimize for constant AVL
10561 if (isa<ConstantSDNode>(AVL)) {
10562 const auto [MinVLMAX, MaxVLMAX] =
10564
10565 uint64_t AVLInt = AVL->getAsZExtVal();
10566 if (AVLInt <= MinVLMAX) {
10567 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10568 } else if (AVLInt >= 2 * MaxVLMAX) {
10569 // Just set vl to VLMAX in this situation
10570 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10571 } else {
10572 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10573 // is related to the hardware implementation.
10574 // So let the following code handle
10575 }
10576 }
10577 if (!I32VL) {
10579 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10580 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10581 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10582 SDValue SETVL =
10583 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10584 // Using vsetvli instruction to get actually used length which related to
10585 // the hardware implementation
10586 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10587 SEW, LMUL);
10588 I32VL =
10589 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10590 }
10591
10592 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10593
10594 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10595 // instructions.
10596 SDValue Passthru;
10597 if (IsMasked)
10598 Passthru = DAG.getUNDEF(I32VT);
10599 else
10600 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10601
10602 if (IntNo == Intrinsic::riscv_vslide1up ||
10603 IntNo == Intrinsic::riscv_vslide1up_mask) {
10604 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10605 ScalarHi, I32Mask, I32VL);
10606 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10607 ScalarLo, I32Mask, I32VL);
10608 } else {
10609 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10610 ScalarLo, I32Mask, I32VL);
10611 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10612 ScalarHi, I32Mask, I32VL);
10613 }
10614
10615 // Convert back to nxvXi64.
10616 Vec = DAG.getBitcast(VT, Vec);
10617
10618 if (!IsMasked)
10619 return Vec;
10620 // Apply mask after the operation.
10621 SDValue Mask = Operands[NumOps - 3];
10622 SDValue MaskedOff = Operands[1];
10623 // Assume Policy operand is the last operand.
10624 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10625 // We don't need to select maskedoff if it's undef.
10626 if (MaskedOff.isUndef())
10627 return Vec;
10628 // TAMU
10629 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10630 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10631 DAG.getUNDEF(VT), AVL);
10632 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10633 // It's fine because vmerge does not care mask policy.
10634 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10635 MaskedOff, AVL);
10636 }
10637 }
10638
10639 // We need to convert the scalar to a splat vector.
10640 SDValue VL = getVLOperand(Op);
10641 assert(VL.getValueType() == XLenVT);
10642 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10643 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10644}
10645
10646// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10647// scalable vector llvm.get.vector.length for now.
10648//
10649// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10650// (vscale * VF). The vscale and VF are independent of element width. We use
10651// SEW=8 for the vsetvli because it is the only element width that supports all
10652// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10653// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10654// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10655// SEW and LMUL are better for the surrounding vector instructions.
10657 const RISCVSubtarget &Subtarget) {
10658 MVT XLenVT = Subtarget.getXLenVT();
10659
10660 // The smallest LMUL is only valid for the smallest element width.
10661 const unsigned ElementWidth = 8;
10662
10663 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10664 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10665 // We don't support VF==1 with ELEN==32.
10666 [[maybe_unused]] unsigned MinVF =
10667 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10668
10669 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10670 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10671 "Unexpected VF");
10672
10673 bool Fractional = VF < LMul1VF;
10674 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10675 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10676 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10677
10678 SDLoc DL(N);
10679
10680 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10681 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10682
10683 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10684
10685 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10686 SDValue Res =
10687 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10688 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10689}
10690
10692 const RISCVSubtarget &Subtarget) {
10693 SDValue Op0 = N->getOperand(1);
10694 MVT OpVT = Op0.getSimpleValueType();
10695 MVT ContainerVT = OpVT;
10696 if (OpVT.isFixedLengthVector()) {
10697 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10698 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10699 }
10700 MVT XLenVT = Subtarget.getXLenVT();
10701 SDLoc DL(N);
10702 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10703 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10704 if (isOneConstant(N->getOperand(2)))
10705 return Res;
10706
10707 // Convert -1 to VL.
10708 SDValue Setcc =
10709 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10710 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10711 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10712}
10713
10714static inline void promoteVCIXScalar(const SDValue &Op,
10716 SelectionDAG &DAG) {
10717 const RISCVSubtarget &Subtarget =
10719
10720 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10721 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10722 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10723 SDLoc DL(Op);
10724
10726 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10727 if (!II || !II->hasScalarOperand())
10728 return;
10729
10730 unsigned SplatOp = II->ScalarOperand + 1;
10731 assert(SplatOp < Op.getNumOperands());
10732
10733 SDValue &ScalarOp = Operands[SplatOp];
10734 MVT OpVT = ScalarOp.getSimpleValueType();
10735 MVT XLenVT = Subtarget.getXLenVT();
10736
10737 // The code below is partially copied from lowerVectorIntrinsicScalars.
10738 // If this isn't a scalar, or its type is XLenVT we're done.
10739 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10740 return;
10741
10742 // Manually emit promote operation for scalar operation.
10743 if (OpVT.bitsLT(XLenVT)) {
10744 unsigned ExtOpc =
10746 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10747 }
10748}
10749
10750static void processVCIXOperands(SDValue &OrigOp,
10752 SelectionDAG &DAG) {
10753 promoteVCIXScalar(OrigOp, Operands, DAG);
10754 const RISCVSubtarget &Subtarget =
10756 for (SDValue &V : Operands) {
10757 EVT ValType = V.getValueType();
10758 if (ValType.isVector() && ValType.isFloatingPoint()) {
10759 MVT InterimIVT =
10760 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10761 ValType.getVectorElementCount());
10762 V = DAG.getBitcast(InterimIVT, V);
10763 }
10764 if (ValType.isFixedLengthVector()) {
10765 MVT OpContainerVT = getContainerForFixedLengthVector(
10766 DAG, V.getSimpleValueType(), Subtarget);
10767 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10768 }
10769 }
10770}
10771
10772// LMUL * VLEN should be greater than or equal to EGS * SEW
10773static inline bool isValidEGW(int EGS, EVT VT,
10774 const RISCVSubtarget &Subtarget) {
10775 return (Subtarget.getRealMinVLen() *
10777 EGS * VT.getScalarSizeInBits();
10778}
10779
10780SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10781 SelectionDAG &DAG) const {
10782 unsigned IntNo = Op.getConstantOperandVal(0);
10783 SDLoc DL(Op);
10784 MVT XLenVT = Subtarget.getXLenVT();
10785
10786 switch (IntNo) {
10787 default:
10788 break; // Don't custom lower most intrinsics.
10789 case Intrinsic::riscv_tuple_insert: {
10790 SDValue Vec = Op.getOperand(1);
10791 SDValue SubVec = Op.getOperand(2);
10792 SDValue Index = Op.getOperand(3);
10793
10794 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10795 SubVec, Index);
10796 }
10797 case Intrinsic::riscv_tuple_extract: {
10798 SDValue Vec = Op.getOperand(1);
10799 SDValue Index = Op.getOperand(2);
10800
10801 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10802 Index);
10803 }
10804 case Intrinsic::thread_pointer: {
10805 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10806 return DAG.getRegister(RISCV::X4, PtrVT);
10807 }
10808 case Intrinsic::riscv_orc_b:
10809 case Intrinsic::riscv_brev8:
10810 case Intrinsic::riscv_sha256sig0:
10811 case Intrinsic::riscv_sha256sig1:
10812 case Intrinsic::riscv_sha256sum0:
10813 case Intrinsic::riscv_sha256sum1:
10814 case Intrinsic::riscv_sm3p0:
10815 case Intrinsic::riscv_sm3p1: {
10816 unsigned Opc;
10817 switch (IntNo) {
10818 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10819 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10820 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10821 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10822 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10823 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10824 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10825 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10826 }
10827
10828 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10829 }
10830 case Intrinsic::riscv_sm4ks:
10831 case Intrinsic::riscv_sm4ed: {
10832 unsigned Opc =
10833 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10834
10835 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10836 Op.getOperand(3));
10837 }
10838 case Intrinsic::riscv_zip:
10839 case Intrinsic::riscv_unzip: {
10840 unsigned Opc =
10841 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10842 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10843 }
10844 case Intrinsic::riscv_mopr:
10845 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10846 Op.getOperand(2));
10847
10848 case Intrinsic::riscv_moprr: {
10849 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10850 Op.getOperand(2), Op.getOperand(3));
10851 }
10852 case Intrinsic::riscv_clmul:
10853 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10854 Op.getOperand(2));
10855 case Intrinsic::riscv_clmulh:
10856 case Intrinsic::riscv_clmulr: {
10857 unsigned Opc =
10858 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10859 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10860 }
10861 case Intrinsic::experimental_get_vector_length:
10862 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10863 case Intrinsic::experimental_cttz_elts:
10864 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10865 case Intrinsic::riscv_vmv_x_s: {
10866 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10867 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10868 }
10869 case Intrinsic::riscv_vfmv_f_s:
10870 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10871 case Intrinsic::riscv_vmv_v_x:
10872 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10873 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10874 Subtarget);
10875 case Intrinsic::riscv_vfmv_v_f:
10876 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10877 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10878 case Intrinsic::riscv_vmv_s_x: {
10879 SDValue Scalar = Op.getOperand(2);
10880
10881 if (Scalar.getValueType().bitsLE(XLenVT)) {
10882 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10883 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10884 Op.getOperand(1), Scalar, Op.getOperand(3));
10885 }
10886
10887 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10888
10889 // This is an i64 value that lives in two scalar registers. We have to
10890 // insert this in a convoluted way. First we build vXi64 splat containing
10891 // the two values that we assemble using some bit math. Next we'll use
10892 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10893 // to merge element 0 from our splat into the source vector.
10894 // FIXME: This is probably not the best way to do this, but it is
10895 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10896 // point.
10897 // sw lo, (a0)
10898 // sw hi, 4(a0)
10899 // vlse vX, (a0)
10900 //
10901 // vid.v vVid
10902 // vmseq.vx mMask, vVid, 0
10903 // vmerge.vvm vDest, vSrc, vVal, mMask
10904 MVT VT = Op.getSimpleValueType();
10905 SDValue Vec = Op.getOperand(1);
10906 SDValue VL = getVLOperand(Op);
10907
10908 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10909 if (Op.getOperand(1).isUndef())
10910 return SplattedVal;
10911 SDValue SplattedIdx =
10912 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10913 DAG.getConstant(0, DL, MVT::i32), VL);
10914
10915 MVT MaskVT = getMaskTypeFor(VT);
10916 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10917 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10918 SDValue SelectCond =
10919 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10920 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10921 DAG.getUNDEF(MaskVT), Mask, VL});
10922 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10923 Vec, DAG.getUNDEF(VT), VL);
10924 }
10925 case Intrinsic::riscv_vfmv_s_f:
10926 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10927 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10928 // EGS * EEW >= 128 bits
10929 case Intrinsic::riscv_vaesdf_vv:
10930 case Intrinsic::riscv_vaesdf_vs:
10931 case Intrinsic::riscv_vaesdm_vv:
10932 case Intrinsic::riscv_vaesdm_vs:
10933 case Intrinsic::riscv_vaesef_vv:
10934 case Intrinsic::riscv_vaesef_vs:
10935 case Intrinsic::riscv_vaesem_vv:
10936 case Intrinsic::riscv_vaesem_vs:
10937 case Intrinsic::riscv_vaeskf1:
10938 case Intrinsic::riscv_vaeskf2:
10939 case Intrinsic::riscv_vaesz_vs:
10940 case Intrinsic::riscv_vsm4k:
10941 case Intrinsic::riscv_vsm4r_vv:
10942 case Intrinsic::riscv_vsm4r_vs: {
10943 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10944 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10945 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10946 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10947 return Op;
10948 }
10949 // EGS * EEW >= 256 bits
10950 case Intrinsic::riscv_vsm3c:
10951 case Intrinsic::riscv_vsm3me: {
10952 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10953 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10954 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10955 return Op;
10956 }
10957 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10958 case Intrinsic::riscv_vsha2ch:
10959 case Intrinsic::riscv_vsha2cl:
10960 case Intrinsic::riscv_vsha2ms: {
10961 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10962 !Subtarget.hasStdExtZvknhb())
10963 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10964 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10965 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10966 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10967 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10968 return Op;
10969 }
10970 case Intrinsic::riscv_sf_vc_v_x:
10971 case Intrinsic::riscv_sf_vc_v_i:
10972 case Intrinsic::riscv_sf_vc_v_xv:
10973 case Intrinsic::riscv_sf_vc_v_iv:
10974 case Intrinsic::riscv_sf_vc_v_vv:
10975 case Intrinsic::riscv_sf_vc_v_fv:
10976 case Intrinsic::riscv_sf_vc_v_xvv:
10977 case Intrinsic::riscv_sf_vc_v_ivv:
10978 case Intrinsic::riscv_sf_vc_v_vvv:
10979 case Intrinsic::riscv_sf_vc_v_fvv:
10980 case Intrinsic::riscv_sf_vc_v_xvw:
10981 case Intrinsic::riscv_sf_vc_v_ivw:
10982 case Intrinsic::riscv_sf_vc_v_vvw:
10983 case Intrinsic::riscv_sf_vc_v_fvw: {
10984 MVT VT = Op.getSimpleValueType();
10985
10986 SmallVector<SDValue> Operands{Op->op_values()};
10988
10989 MVT RetVT = VT;
10990 if (VT.isFixedLengthVector())
10992 else if (VT.isFloatingPoint())
10995
10996 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
10997
10998 if (VT.isFixedLengthVector())
10999 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11000 else if (VT.isFloatingPoint())
11001 NewNode = DAG.getBitcast(VT, NewNode);
11002
11003 if (Op == NewNode)
11004 break;
11005
11006 return NewNode;
11007 }
11008 }
11009
11010 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11011}
11012
11014 unsigned Type) {
11015 SDLoc DL(Op);
11016 SmallVector<SDValue> Operands{Op->op_values()};
11017 Operands.erase(Operands.begin() + 1);
11018
11019 const RISCVSubtarget &Subtarget =
11021 MVT VT = Op.getSimpleValueType();
11022 MVT RetVT = VT;
11023 MVT FloatVT = VT;
11024
11025 if (VT.isFloatingPoint()) {
11026 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11027 VT.getVectorElementCount());
11028 FloatVT = RetVT;
11029 }
11030 if (VT.isFixedLengthVector())
11032 Subtarget);
11033
11035
11036 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11037 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11038 SDValue Chain = NewNode.getValue(1);
11039
11040 if (VT.isFixedLengthVector())
11041 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11042 if (VT.isFloatingPoint())
11043 NewNode = DAG.getBitcast(VT, NewNode);
11044
11045 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11046
11047 return NewNode;
11048}
11049
11051 unsigned Type) {
11052 SmallVector<SDValue> Operands{Op->op_values()};
11053 Operands.erase(Operands.begin() + 1);
11055
11056 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11057}
11058
11059static SDValue
11061 const RISCVSubtarget &Subtarget,
11062 SelectionDAG &DAG) {
11063 bool IsStrided;
11064 switch (IntNo) {
11065 case Intrinsic::riscv_seg2_load_mask:
11066 case Intrinsic::riscv_seg3_load_mask:
11067 case Intrinsic::riscv_seg4_load_mask:
11068 case Intrinsic::riscv_seg5_load_mask:
11069 case Intrinsic::riscv_seg6_load_mask:
11070 case Intrinsic::riscv_seg7_load_mask:
11071 case Intrinsic::riscv_seg8_load_mask:
11072 IsStrided = false;
11073 break;
11074 case Intrinsic::riscv_sseg2_load_mask:
11075 case Intrinsic::riscv_sseg3_load_mask:
11076 case Intrinsic::riscv_sseg4_load_mask:
11077 case Intrinsic::riscv_sseg5_load_mask:
11078 case Intrinsic::riscv_sseg6_load_mask:
11079 case Intrinsic::riscv_sseg7_load_mask:
11080 case Intrinsic::riscv_sseg8_load_mask:
11081 IsStrided = true;
11082 break;
11083 default:
11084 llvm_unreachable("unexpected intrinsic ID");
11085 };
11086
11087 static const Intrinsic::ID VlsegInts[7] = {
11088 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11089 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11090 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11091 Intrinsic::riscv_vlseg8_mask};
11092 static const Intrinsic::ID VlssegInts[7] = {
11093 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11094 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11095 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11096 Intrinsic::riscv_vlsseg8_mask};
11097
11098 SDLoc DL(Op);
11099 unsigned NF = Op->getNumValues() - 1;
11100 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11101 MVT XLenVT = Subtarget.getXLenVT();
11102 MVT VT = Op->getSimpleValueType(0);
11103 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11104 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11105 ContainerVT.getScalarSizeInBits();
11106 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11107
11108 // Operands: (chain, int_id, pointer, mask, vl) or
11109 // (chain, int_id, pointer, offset, mask, vl)
11110 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11111 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11112 MVT MaskVT = Mask.getSimpleValueType();
11113 MVT MaskContainerVT =
11114 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11115 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11116
11117 SDValue IntID = DAG.getTargetConstant(
11118 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11119 auto *Load = cast<MemIntrinsicSDNode>(Op);
11120
11121 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11123 Load->getChain(),
11124 IntID,
11125 DAG.getUNDEF(VecTupTy),
11126 Op.getOperand(2),
11127 Mask,
11128 VL,
11131 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11132 // Insert the stride operand.
11133 if (IsStrided)
11134 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11135
11136 SDValue Result =
11138 Load->getMemoryVT(), Load->getMemOperand());
11140 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11141 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11142 Result.getValue(0),
11143 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11144 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11145 }
11146 Results.push_back(Result.getValue(1));
11147 return DAG.getMergeValues(Results, DL);
11148}
11149
11150SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11151 SelectionDAG &DAG) const {
11152 unsigned IntNo = Op.getConstantOperandVal(1);
11153 switch (IntNo) {
11154 default:
11155 break;
11156 case Intrinsic::riscv_seg2_load_mask:
11157 case Intrinsic::riscv_seg3_load_mask:
11158 case Intrinsic::riscv_seg4_load_mask:
11159 case Intrinsic::riscv_seg5_load_mask:
11160 case Intrinsic::riscv_seg6_load_mask:
11161 case Intrinsic::riscv_seg7_load_mask:
11162 case Intrinsic::riscv_seg8_load_mask:
11163 case Intrinsic::riscv_sseg2_load_mask:
11164 case Intrinsic::riscv_sseg3_load_mask:
11165 case Intrinsic::riscv_sseg4_load_mask:
11166 case Intrinsic::riscv_sseg5_load_mask:
11167 case Intrinsic::riscv_sseg6_load_mask:
11168 case Intrinsic::riscv_sseg7_load_mask:
11169 case Intrinsic::riscv_sseg8_load_mask:
11170 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11171
11172 case Intrinsic::riscv_sf_vc_v_x_se:
11173 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11174 case Intrinsic::riscv_sf_vc_v_i_se:
11175 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11176 case Intrinsic::riscv_sf_vc_v_xv_se:
11177 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11178 case Intrinsic::riscv_sf_vc_v_iv_se:
11179 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11180 case Intrinsic::riscv_sf_vc_v_vv_se:
11181 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11182 case Intrinsic::riscv_sf_vc_v_fv_se:
11183 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11184 case Intrinsic::riscv_sf_vc_v_xvv_se:
11185 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11186 case Intrinsic::riscv_sf_vc_v_ivv_se:
11187 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11188 case Intrinsic::riscv_sf_vc_v_vvv_se:
11189 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11190 case Intrinsic::riscv_sf_vc_v_fvv_se:
11191 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11192 case Intrinsic::riscv_sf_vc_v_xvw_se:
11193 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11194 case Intrinsic::riscv_sf_vc_v_ivw_se:
11195 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11196 case Intrinsic::riscv_sf_vc_v_vvw_se:
11197 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11198 case Intrinsic::riscv_sf_vc_v_fvw_se:
11199 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11200 }
11201
11202 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11203}
11204
11205static SDValue
11207 const RISCVSubtarget &Subtarget,
11208 SelectionDAG &DAG) {
11209 bool IsStrided;
11210 switch (IntNo) {
11211 case Intrinsic::riscv_seg2_store_mask:
11212 case Intrinsic::riscv_seg3_store_mask:
11213 case Intrinsic::riscv_seg4_store_mask:
11214 case Intrinsic::riscv_seg5_store_mask:
11215 case Intrinsic::riscv_seg6_store_mask:
11216 case Intrinsic::riscv_seg7_store_mask:
11217 case Intrinsic::riscv_seg8_store_mask:
11218 IsStrided = false;
11219 break;
11220 case Intrinsic::riscv_sseg2_store_mask:
11221 case Intrinsic::riscv_sseg3_store_mask:
11222 case Intrinsic::riscv_sseg4_store_mask:
11223 case Intrinsic::riscv_sseg5_store_mask:
11224 case Intrinsic::riscv_sseg6_store_mask:
11225 case Intrinsic::riscv_sseg7_store_mask:
11226 case Intrinsic::riscv_sseg8_store_mask:
11227 IsStrided = true;
11228 break;
11229 default:
11230 llvm_unreachable("unexpected intrinsic ID");
11231 }
11232
11233 SDLoc DL(Op);
11234 static const Intrinsic::ID VssegInts[] = {
11235 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11236 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11237 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11238 Intrinsic::riscv_vsseg8_mask};
11239 static const Intrinsic::ID VsssegInts[] = {
11240 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11241 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11242 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11243 Intrinsic::riscv_vssseg8_mask};
11244
11245 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11246 // (chain, int_id, vec*, ptr, stride, mask, vl)
11247 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11248 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11249 MVT XLenVT = Subtarget.getXLenVT();
11250 MVT VT = Op->getOperand(2).getSimpleValueType();
11251 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11252 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11253 ContainerVT.getScalarSizeInBits();
11254 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11255
11256 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11257 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11258 MVT MaskVT = Mask.getSimpleValueType();
11259 MVT MaskContainerVT =
11260 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11261 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11262
11263 SDValue IntID = DAG.getTargetConstant(
11264 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11265 SDValue Ptr = Op->getOperand(NF + 2);
11266
11267 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11268
11269 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11270 for (unsigned i = 0; i < NF; i++)
11271 StoredVal = DAG.getNode(
11272 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11273 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11274 DAG, Subtarget),
11275 DAG.getTargetConstant(i, DL, MVT::i32));
11276
11278 FixedIntrinsic->getChain(),
11279 IntID,
11280 StoredVal,
11281 Ptr,
11282 Mask,
11283 VL,
11284 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11285 // Insert the stride operand.
11286 if (IsStrided)
11287 Ops.insert(std::next(Ops.begin(), 4),
11288 Op.getOperand(Op.getNumOperands() - 3));
11289
11290 return DAG.getMemIntrinsicNode(
11291 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11292 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11293}
11294
11295SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11296 SelectionDAG &DAG) const {
11297 unsigned IntNo = Op.getConstantOperandVal(1);
11298 switch (IntNo) {
11299 default:
11300 break;
11301 case Intrinsic::riscv_seg2_store_mask:
11302 case Intrinsic::riscv_seg3_store_mask:
11303 case Intrinsic::riscv_seg4_store_mask:
11304 case Intrinsic::riscv_seg5_store_mask:
11305 case Intrinsic::riscv_seg6_store_mask:
11306 case Intrinsic::riscv_seg7_store_mask:
11307 case Intrinsic::riscv_seg8_store_mask:
11308 case Intrinsic::riscv_sseg2_store_mask:
11309 case Intrinsic::riscv_sseg3_store_mask:
11310 case Intrinsic::riscv_sseg4_store_mask:
11311 case Intrinsic::riscv_sseg5_store_mask:
11312 case Intrinsic::riscv_sseg6_store_mask:
11313 case Intrinsic::riscv_sseg7_store_mask:
11314 case Intrinsic::riscv_sseg8_store_mask:
11315 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11316
11317 case Intrinsic::riscv_sf_vc_xv_se:
11318 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11319 case Intrinsic::riscv_sf_vc_iv_se:
11320 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11321 case Intrinsic::riscv_sf_vc_vv_se:
11322 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11323 case Intrinsic::riscv_sf_vc_fv_se:
11324 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11325 case Intrinsic::riscv_sf_vc_xvv_se:
11326 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11327 case Intrinsic::riscv_sf_vc_ivv_se:
11328 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11329 case Intrinsic::riscv_sf_vc_vvv_se:
11330 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11331 case Intrinsic::riscv_sf_vc_fvv_se:
11332 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11333 case Intrinsic::riscv_sf_vc_xvw_se:
11334 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11335 case Intrinsic::riscv_sf_vc_ivw_se:
11336 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11337 case Intrinsic::riscv_sf_vc_vvw_se:
11338 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11339 case Intrinsic::riscv_sf_vc_fvw_se:
11340 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11341 }
11342
11343 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11344}
11345
11346static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11347 switch (ISDOpcode) {
11348 default:
11349 llvm_unreachable("Unhandled reduction");
11350 case ISD::VP_REDUCE_ADD:
11351 case ISD::VECREDUCE_ADD:
11352 return RISCVISD::VECREDUCE_ADD_VL;
11353 case ISD::VP_REDUCE_UMAX:
11354 case ISD::VECREDUCE_UMAX:
11355 return RISCVISD::VECREDUCE_UMAX_VL;
11356 case ISD::VP_REDUCE_SMAX:
11357 case ISD::VECREDUCE_SMAX:
11358 return RISCVISD::VECREDUCE_SMAX_VL;
11359 case ISD::VP_REDUCE_UMIN:
11360 case ISD::VECREDUCE_UMIN:
11361 return RISCVISD::VECREDUCE_UMIN_VL;
11362 case ISD::VP_REDUCE_SMIN:
11363 case ISD::VECREDUCE_SMIN:
11364 return RISCVISD::VECREDUCE_SMIN_VL;
11365 case ISD::VP_REDUCE_AND:
11366 case ISD::VECREDUCE_AND:
11367 return RISCVISD::VECREDUCE_AND_VL;
11368 case ISD::VP_REDUCE_OR:
11369 case ISD::VECREDUCE_OR:
11370 return RISCVISD::VECREDUCE_OR_VL;
11371 case ISD::VP_REDUCE_XOR:
11372 case ISD::VECREDUCE_XOR:
11373 return RISCVISD::VECREDUCE_XOR_VL;
11374 case ISD::VP_REDUCE_FADD:
11375 return RISCVISD::VECREDUCE_FADD_VL;
11376 case ISD::VP_REDUCE_SEQ_FADD:
11377 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11378 case ISD::VP_REDUCE_FMAX:
11379 case ISD::VP_REDUCE_FMAXIMUM:
11380 return RISCVISD::VECREDUCE_FMAX_VL;
11381 case ISD::VP_REDUCE_FMIN:
11382 case ISD::VP_REDUCE_FMINIMUM:
11383 return RISCVISD::VECREDUCE_FMIN_VL;
11384 }
11385
11386}
11387
11388SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11389 SelectionDAG &DAG,
11390 bool IsVP) const {
11391 SDLoc DL(Op);
11392 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11393 MVT VecVT = Vec.getSimpleValueType();
11394 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11395 Op.getOpcode() == ISD::VECREDUCE_OR ||
11396 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11397 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11398 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11399 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11400 "Unexpected reduction lowering");
11401
11402 MVT XLenVT = Subtarget.getXLenVT();
11403
11404 MVT ContainerVT = VecVT;
11405 if (VecVT.isFixedLengthVector()) {
11406 ContainerVT = getContainerForFixedLengthVector(VecVT);
11407 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11408 }
11409
11410 SDValue Mask, VL;
11411 if (IsVP) {
11412 Mask = Op.getOperand(2);
11413 VL = Op.getOperand(3);
11414 } else {
11415 std::tie(Mask, VL) =
11416 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11417 }
11418
11419 ISD::CondCode CC;
11420 switch (Op.getOpcode()) {
11421 default:
11422 llvm_unreachable("Unhandled reduction");
11423 case ISD::VECREDUCE_AND:
11424 case ISD::VP_REDUCE_AND: {
11425 // vcpop ~x == 0
11426 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11427 if (IsVP || VecVT.isFixedLengthVector())
11428 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11429 else
11430 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11431 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11432 CC = ISD::SETEQ;
11433 break;
11434 }
11435 case ISD::VECREDUCE_OR:
11436 case ISD::VP_REDUCE_OR:
11437 // vcpop x != 0
11438 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11439 CC = ISD::SETNE;
11440 break;
11441 case ISD::VECREDUCE_XOR:
11442 case ISD::VP_REDUCE_XOR: {
11443 // ((vcpop x) & 1) != 0
11444 SDValue One = DAG.getConstant(1, DL, XLenVT);
11445 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11446 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11447 CC = ISD::SETNE;
11448 break;
11449 }
11450 }
11451
11452 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11453 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11454 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11455
11456 if (!IsVP)
11457 return SetCC;
11458
11459 // Now include the start value in the operation.
11460 // Note that we must return the start value when no elements are operated
11461 // upon. The vcpop instructions we've emitted in each case above will return
11462 // 0 for an inactive vector, and so we've already received the neutral value:
11463 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11464 // can simply include the start value.
11465 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11466 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11467}
11468
11469static bool isNonZeroAVL(SDValue AVL) {
11470 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11471 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11472 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11473 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11474}
11475
11476/// Helper to lower a reduction sequence of the form:
11477/// scalar = reduce_op vec, scalar_start
11478static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11479 SDValue StartValue, SDValue Vec, SDValue Mask,
11480 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11481 const RISCVSubtarget &Subtarget) {
11482 const MVT VecVT = Vec.getSimpleValueType();
11483 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11484 const MVT XLenVT = Subtarget.getXLenVT();
11485 const bool NonZeroAVL = isNonZeroAVL(VL);
11486
11487 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11488 // or the original VT if fractional.
11489 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11490 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11491 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11492 // be the result of the reduction operation.
11493 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11494 SDValue InitialValue =
11495 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11496 if (M1VT != InnerVT)
11497 InitialValue =
11498 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11499 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11501 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11502 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11503 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11504}
11505
11506SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11507 SelectionDAG &DAG) const {
11508 SDLoc DL(Op);
11509 SDValue Vec = Op.getOperand(0);
11510 EVT VecEVT = Vec.getValueType();
11511
11512 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11513
11514 // Due to ordering in legalize types we may have a vector type that needs to
11515 // be split. Do that manually so we can get down to a legal type.
11516 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11518 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11519 VecEVT = Lo.getValueType();
11520 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11521 }
11522
11523 // TODO: The type may need to be widened rather than split. Or widened before
11524 // it can be split.
11525 if (!isTypeLegal(VecEVT))
11526 return SDValue();
11527
11528 MVT VecVT = VecEVT.getSimpleVT();
11529 MVT VecEltVT = VecVT.getVectorElementType();
11530 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11531
11532 MVT ContainerVT = VecVT;
11533 if (VecVT.isFixedLengthVector()) {
11534 ContainerVT = getContainerForFixedLengthVector(VecVT);
11535 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11536 }
11537
11538 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11539
11540 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11541 switch (BaseOpc) {
11542 case ISD::AND:
11543 case ISD::OR:
11544 case ISD::UMAX:
11545 case ISD::UMIN:
11546 case ISD::SMAX:
11547 case ISD::SMIN:
11548 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11549 }
11550 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11551 Mask, VL, DL, DAG, Subtarget);
11552}
11553
11554// Given a reduction op, this function returns the matching reduction opcode,
11555// the vector SDValue and the scalar SDValue required to lower this to a
11556// RISCVISD node.
11557static std::tuple<unsigned, SDValue, SDValue>
11559 const RISCVSubtarget &Subtarget) {
11560 SDLoc DL(Op);
11561 auto Flags = Op->getFlags();
11562 unsigned Opcode = Op.getOpcode();
11563 switch (Opcode) {
11564 default:
11565 llvm_unreachable("Unhandled reduction");
11566 case ISD::VECREDUCE_FADD: {
11567 // Use positive zero if we can. It is cheaper to materialize.
11568 SDValue Zero =
11569 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11570 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11571 }
11572 case ISD::VECREDUCE_SEQ_FADD:
11573 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11574 Op.getOperand(0));
11575 case ISD::VECREDUCE_FMINIMUM:
11576 case ISD::VECREDUCE_FMAXIMUM:
11577 case ISD::VECREDUCE_FMIN:
11578 case ISD::VECREDUCE_FMAX: {
11579 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11580 unsigned RVVOpc =
11581 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11582 ? RISCVISD::VECREDUCE_FMIN_VL
11583 : RISCVISD::VECREDUCE_FMAX_VL;
11584 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11585 }
11586 }
11587}
11588
11589SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11590 SelectionDAG &DAG) const {
11591 SDLoc DL(Op);
11592 MVT VecEltVT = Op.getSimpleValueType();
11593
11594 unsigned RVVOpcode;
11595 SDValue VectorVal, ScalarVal;
11596 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11597 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11598 MVT VecVT = VectorVal.getSimpleValueType();
11599
11600 MVT ContainerVT = VecVT;
11601 if (VecVT.isFixedLengthVector()) {
11602 ContainerVT = getContainerForFixedLengthVector(VecVT);
11603 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11604 }
11605
11606 MVT ResVT = Op.getSimpleValueType();
11607 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11608 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11609 VL, DL, DAG, Subtarget);
11610 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11611 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11612 return Res;
11613
11614 if (Op->getFlags().hasNoNaNs())
11615 return Res;
11616
11617 // Force output to NaN if any element is Nan.
11618 SDValue IsNan =
11619 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11620 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11621 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11622 MVT XLenVT = Subtarget.getXLenVT();
11623 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11624 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11625 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11626 return DAG.getSelect(
11627 DL, ResVT, NoNaNs, Res,
11628 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11629}
11630
11631SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11632 SelectionDAG &DAG) const {
11633 SDLoc DL(Op);
11634 unsigned Opc = Op.getOpcode();
11635 SDValue Start = Op.getOperand(0);
11636 SDValue Vec = Op.getOperand(1);
11637 EVT VecEVT = Vec.getValueType();
11638 MVT XLenVT = Subtarget.getXLenVT();
11639
11640 // TODO: The type may need to be widened rather than split. Or widened before
11641 // it can be split.
11642 if (!isTypeLegal(VecEVT))
11643 return SDValue();
11644
11645 MVT VecVT = VecEVT.getSimpleVT();
11646 unsigned RVVOpcode = getRVVReductionOp(Opc);
11647
11648 if (VecVT.isFixedLengthVector()) {
11649 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11650 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11651 }
11652
11653 SDValue VL = Op.getOperand(3);
11654 SDValue Mask = Op.getOperand(2);
11655 SDValue Res =
11656 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11657 Vec, Mask, VL, DL, DAG, Subtarget);
11658 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11659 Op->getFlags().hasNoNaNs())
11660 return Res;
11661
11662 // Propagate NaNs.
11663 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11664 // Check if any of the elements in Vec is NaN.
11665 SDValue IsNaN = DAG.getNode(
11666 RISCVISD::SETCC_VL, DL, PredVT,
11667 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11668 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11669 // Check if the start value is NaN.
11670 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11671 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11672 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11673 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11674 MVT ResVT = Res.getSimpleValueType();
11675 return DAG.getSelect(
11676 DL, ResVT, NoNaNs, Res,
11677 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11678}
11679
11680SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11681 SelectionDAG &DAG) const {
11682 SDValue Vec = Op.getOperand(0);
11683 SDValue SubVec = Op.getOperand(1);
11684 MVT VecVT = Vec.getSimpleValueType();
11685 MVT SubVecVT = SubVec.getSimpleValueType();
11686
11687 SDLoc DL(Op);
11688 MVT XLenVT = Subtarget.getXLenVT();
11689 unsigned OrigIdx = Op.getConstantOperandVal(2);
11690 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11691
11692 if (OrigIdx == 0 && Vec.isUndef())
11693 return Op;
11694
11695 // We don't have the ability to slide mask vectors up indexed by their i1
11696 // elements; the smallest we can do is i8. Often we are able to bitcast to
11697 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11698 // into a scalable one, we might not necessarily have enough scalable
11699 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11700 if (SubVecVT.getVectorElementType() == MVT::i1) {
11701 if (VecVT.getVectorMinNumElements() >= 8 &&
11702 SubVecVT.getVectorMinNumElements() >= 8) {
11703 assert(OrigIdx % 8 == 0 && "Invalid index");
11704 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11705 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11706 "Unexpected mask vector lowering");
11707 OrigIdx /= 8;
11708 SubVecVT =
11709 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11710 SubVecVT.isScalableVector());
11711 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11712 VecVT.isScalableVector());
11713 Vec = DAG.getBitcast(VecVT, Vec);
11714 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11715 } else {
11716 // We can't slide this mask vector up indexed by its i1 elements.
11717 // This poses a problem when we wish to insert a scalable vector which
11718 // can't be re-expressed as a larger type. Just choose the slow path and
11719 // extend to a larger type, then truncate back down.
11720 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11721 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11722 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11723 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11724 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11725 Op.getOperand(2));
11726 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11727 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11728 }
11729 }
11730
11731 // If the subvector vector is a fixed-length type and we don't know VLEN
11732 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11733 // don't know which register of a LMUL group contains the specific subvector
11734 // as we only know the minimum register size. Therefore we must slide the
11735 // vector group up the full amount.
11736 const auto VLen = Subtarget.getRealVLen();
11737 if (SubVecVT.isFixedLengthVector() && !VLen) {
11738 MVT ContainerVT = VecVT;
11739 if (VecVT.isFixedLengthVector()) {
11740 ContainerVT = getContainerForFixedLengthVector(VecVT);
11741 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11742 }
11743
11744 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11745
11746 SDValue Mask =
11747 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11748 // Set the vector length to only the number of elements we care about. Note
11749 // that for slideup this includes the offset.
11750 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11751 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11752
11753 // Use tail agnostic policy if we're inserting over Vec's tail.
11755 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11757
11758 // If we're inserting into the lowest elements, use a tail undisturbed
11759 // vmv.v.v.
11760 if (OrigIdx == 0) {
11761 SubVec =
11762 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11763 } else {
11764 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11765 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11766 SlideupAmt, Mask, VL, Policy);
11767 }
11768
11769 if (VecVT.isFixedLengthVector())
11770 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11771 return DAG.getBitcast(Op.getValueType(), SubVec);
11772 }
11773
11774 MVT ContainerVecVT = VecVT;
11775 if (VecVT.isFixedLengthVector()) {
11776 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11777 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11778 }
11779
11780 MVT ContainerSubVecVT = SubVecVT;
11781 if (SubVecVT.isFixedLengthVector()) {
11782 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11783 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11784 }
11785
11786 unsigned SubRegIdx;
11787 ElementCount RemIdx;
11788 // insert_subvector scales the index by vscale if the subvector is scalable,
11789 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11790 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11791 if (SubVecVT.isFixedLengthVector()) {
11792 assert(VLen);
11793 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11794 auto Decompose =
11796 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11797 SubRegIdx = Decompose.first;
11798 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11799 (OrigIdx % Vscale));
11800 } else {
11801 auto Decompose =
11803 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11804 SubRegIdx = Decompose.first;
11805 RemIdx = ElementCount::getScalable(Decompose.second);
11806 }
11807
11808 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11810 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11811 bool ExactlyVecRegSized =
11812 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11813 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11814
11815 // 1. If the Idx has been completely eliminated and this subvector's size is
11816 // a vector register or a multiple thereof, or the surrounding elements are
11817 // undef, then this is a subvector insert which naturally aligns to a vector
11818 // register. These can easily be handled using subregister manipulation.
11819 // 2. If the subvector isn't an exact multiple of a valid register group size,
11820 // then the insertion must preserve the undisturbed elements of the register.
11821 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11822 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11823 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11824 // of that LMUL=1 type back into the larger vector (resolving to another
11825 // subregister operation). See below for how our VSLIDEUP works. We go via a
11826 // LMUL=1 type to avoid allocating a large register group to hold our
11827 // subvector.
11828 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11829 if (SubVecVT.isFixedLengthVector()) {
11830 // We may get NoSubRegister if inserting at index 0 and the subvec
11831 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11832 if (SubRegIdx == RISCV::NoSubRegister) {
11833 assert(OrigIdx == 0);
11834 return Op;
11835 }
11836
11837 // Use a insert_subvector that will resolve to an insert subreg.
11838 assert(VLen);
11839 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11840 SDValue Insert =
11841 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11842 if (VecVT.isFixedLengthVector())
11843 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11844 return Insert;
11845 }
11846 return Op;
11847 }
11848
11849 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11850 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11851 // (in our case undisturbed). This means we can set up a subvector insertion
11852 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11853 // size of the subvector.
11854 MVT InterSubVT = ContainerVecVT;
11855 SDValue AlignedExtract = Vec;
11856 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11857 if (SubVecVT.isFixedLengthVector()) {
11858 assert(VLen);
11859 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11860 }
11861 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11862 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11863 // Extract a subvector equal to the nearest full vector register type. This
11864 // should resolve to a EXTRACT_SUBREG instruction.
11865 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11866 }
11867
11868 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11869
11870 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11871
11872 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11873 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11874
11875 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11877 if (Subtarget.expandVScale(EndIndex) ==
11878 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11880
11881 // If we're inserting into the lowest elements, use a tail undisturbed
11882 // vmv.v.v.
11883 if (RemIdx.isZero()) {
11884 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11885 SubVec, VL);
11886 } else {
11887 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11888
11889 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11890 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11891
11892 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11893 SlideupAmt, Mask, VL, Policy);
11894 }
11895
11896 // If required, insert this subvector back into the correct vector register.
11897 // This should resolve to an INSERT_SUBREG instruction.
11898 if (ContainerVecVT.bitsGT(InterSubVT))
11899 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11900
11901 if (VecVT.isFixedLengthVector())
11902 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11903
11904 // We might have bitcast from a mask type: cast back to the original type if
11905 // required.
11906 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11907}
11908
11909SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11910 SelectionDAG &DAG) const {
11911 SDValue Vec = Op.getOperand(0);
11912 MVT SubVecVT = Op.getSimpleValueType();
11913 MVT VecVT = Vec.getSimpleValueType();
11914
11915 SDLoc DL(Op);
11916 MVT XLenVT = Subtarget.getXLenVT();
11917 unsigned OrigIdx = Op.getConstantOperandVal(1);
11918 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11919
11920 // With an index of 0 this is a cast-like subvector, which can be performed
11921 // with subregister operations.
11922 if (OrigIdx == 0)
11923 return Op;
11924
11925 // We don't have the ability to slide mask vectors down indexed by their i1
11926 // elements; the smallest we can do is i8. Often we are able to bitcast to
11927 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11928 // from a scalable one, we might not necessarily have enough scalable
11929 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11930 if (SubVecVT.getVectorElementType() == MVT::i1) {
11931 if (VecVT.getVectorMinNumElements() >= 8 &&
11932 SubVecVT.getVectorMinNumElements() >= 8) {
11933 assert(OrigIdx % 8 == 0 && "Invalid index");
11934 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11935 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11936 "Unexpected mask vector lowering");
11937 OrigIdx /= 8;
11938 SubVecVT =
11939 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11940 SubVecVT.isScalableVector());
11941 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11942 VecVT.isScalableVector());
11943 Vec = DAG.getBitcast(VecVT, Vec);
11944 } else {
11945 // We can't slide this mask vector down, indexed by its i1 elements.
11946 // This poses a problem when we wish to extract a scalable vector which
11947 // can't be re-expressed as a larger type. Just choose the slow path and
11948 // extend to a larger type, then truncate back down.
11949 // TODO: We could probably improve this when extracting certain fixed
11950 // from fixed, where we can extract as i8 and shift the correct element
11951 // right to reach the desired subvector?
11952 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11953 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11954 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11955 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11956 Op.getOperand(1));
11957 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11958 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11959 }
11960 }
11961
11962 const auto VLen = Subtarget.getRealVLen();
11963
11964 // If the subvector vector is a fixed-length type and we don't know VLEN
11965 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11966 // don't know which register of a LMUL group contains the specific subvector
11967 // as we only know the minimum register size. Therefore we must slide the
11968 // vector group down the full amount.
11969 if (SubVecVT.isFixedLengthVector() && !VLen) {
11970 MVT ContainerVT = VecVT;
11971 if (VecVT.isFixedLengthVector()) {
11972 ContainerVT = getContainerForFixedLengthVector(VecVT);
11973 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11974 }
11975
11976 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11977 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
11978 if (auto ShrunkVT =
11979 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
11980 ContainerVT = *ShrunkVT;
11981 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11982 }
11983
11984 SDValue Mask =
11985 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11986 // Set the vector length to only the number of elements we care about. This
11987 // avoids sliding down elements we're going to discard straight away.
11988 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11989 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11990 SDValue Slidedown =
11991 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11992 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
11993 // Now we can use a cast-like subvector extract to get the result.
11994 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
11995 return DAG.getBitcast(Op.getValueType(), Slidedown);
11996 }
11997
11998 if (VecVT.isFixedLengthVector()) {
11999 VecVT = getContainerForFixedLengthVector(VecVT);
12000 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12001 }
12002
12003 MVT ContainerSubVecVT = SubVecVT;
12004 if (SubVecVT.isFixedLengthVector())
12005 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12006
12007 unsigned SubRegIdx;
12008 ElementCount RemIdx;
12009 // extract_subvector scales the index by vscale if the subvector is scalable,
12010 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12011 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12012 if (SubVecVT.isFixedLengthVector()) {
12013 assert(VLen);
12014 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12015 auto Decompose =
12017 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12018 SubRegIdx = Decompose.first;
12019 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12020 (OrigIdx % Vscale));
12021 } else {
12022 auto Decompose =
12024 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12025 SubRegIdx = Decompose.first;
12026 RemIdx = ElementCount::getScalable(Decompose.second);
12027 }
12028
12029 // If the Idx has been completely eliminated then this is a subvector extract
12030 // which naturally aligns to a vector register. These can easily be handled
12031 // using subregister manipulation. We use an extract_subvector that will
12032 // resolve to an extract subreg.
12033 if (RemIdx.isZero()) {
12034 if (SubVecVT.isFixedLengthVector()) {
12035 assert(VLen);
12036 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12037 Vec =
12038 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12039 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12040 }
12041 return Op;
12042 }
12043
12044 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12045 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12046 // divide exactly.
12047 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12048 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12049
12050 // If the vector type is an LMUL-group type, extract a subvector equal to the
12051 // nearest full vector register type.
12052 MVT InterSubVT = VecVT;
12053 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12054 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12055 // we should have successfully decomposed the extract into a subregister.
12056 // We use an extract_subvector that will resolve to a subreg extract.
12057 assert(SubRegIdx != RISCV::NoSubRegister);
12058 (void)SubRegIdx;
12059 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12060 if (SubVecVT.isFixedLengthVector()) {
12061 assert(VLen);
12062 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12063 }
12064 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12065 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12066 }
12067
12068 // Slide this vector register down by the desired number of elements in order
12069 // to place the desired subvector starting at element 0.
12070 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12071 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12072 if (SubVecVT.isFixedLengthVector())
12073 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12074 SDValue Slidedown =
12075 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12076 Vec, SlidedownAmt, Mask, VL);
12077
12078 // Now the vector is in the right position, extract our final subvector. This
12079 // should resolve to a COPY.
12080 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12081
12082 // We might have bitcast from a mask type: cast back to the original type if
12083 // required.
12084 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12085}
12086
12087// Widen a vector's operands to i8, then truncate its results back to the
12088// original type, typically i1. All operand and result types must be the same.
12090 SelectionDAG &DAG) {
12091 MVT VT = N.getSimpleValueType();
12092 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12094 for (SDValue Op : N->ops()) {
12095 assert(Op.getSimpleValueType() == VT &&
12096 "Operands and result must be same type");
12097 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12098 }
12099
12100 unsigned NumVals = N->getNumValues();
12101
12103 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12104 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12105 SmallVector<SDValue, 4> TruncVals;
12106 for (unsigned I = 0; I < NumVals; I++) {
12107 TruncVals.push_back(
12108 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12109 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12110 }
12111
12112 if (TruncVals.size() > 1)
12113 return DAG.getMergeValues(TruncVals, DL);
12114 return TruncVals.front();
12115}
12116
12117SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12118 SelectionDAG &DAG) const {
12119 SDLoc DL(Op);
12120 MVT VecVT = Op.getSimpleValueType();
12121
12122 const unsigned Factor = Op->getNumValues();
12123 assert(Factor <= 8);
12124
12125 // 1 bit element vectors need to be widened to e8
12126 if (VecVT.getVectorElementType() == MVT::i1)
12127 return widenVectorOpsToi8(Op, DL, DAG);
12128
12129 // Convert to scalable vectors first.
12130 if (VecVT.isFixedLengthVector()) {
12131 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12133 for (unsigned i = 0U; i < Factor; ++i)
12134 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12135 Subtarget);
12136
12137 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12138 SDValue NewDeinterleave =
12140
12141 SmallVector<SDValue, 8> Res(Factor);
12142 for (unsigned i = 0U; i < Factor; ++i)
12143 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12144 DAG, Subtarget);
12145 return DAG.getMergeValues(Res, DL);
12146 }
12147
12148 // If concatenating would exceed LMUL=8, we need to split.
12149 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12150 (8 * RISCV::RVVBitsPerBlock)) {
12151 SmallVector<SDValue, 8> Ops(Factor * 2);
12152 for (unsigned i = 0; i != Factor; ++i) {
12153 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12154 Ops[i * 2] = OpLo;
12155 Ops[i * 2 + 1] = OpHi;
12156 }
12157
12158 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12159
12161 ArrayRef(Ops).slice(0, Factor));
12163 ArrayRef(Ops).slice(Factor, Factor));
12164
12165 SmallVector<SDValue, 8> Res(Factor);
12166 for (unsigned i = 0; i != Factor; ++i)
12167 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12168 Hi.getValue(i));
12169
12170 return DAG.getMergeValues(Res, DL);
12171 }
12172
12173 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12174 MVT VT = Op->getSimpleValueType(0);
12175 SDValue V1 = Op->getOperand(0);
12176 SDValue V2 = Op->getOperand(1);
12177
12178 // For fractional LMUL, check if we can use a higher LMUL
12179 // instruction to avoid a vslidedown.
12180 if (SDValue Src = foldConcatVector(V1, V2);
12181 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12182 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12183 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12184 // Freeze the source so we can increase its use count.
12185 Src = DAG.getFreeze(Src);
12186 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12187 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12188 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12189 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12190 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12191 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12192 return DAG.getMergeValues({Even, Odd}, DL);
12193 }
12194
12195 // Freeze the sources so we can increase their use count.
12196 V1 = DAG.getFreeze(V1);
12197 V2 = DAG.getFreeze(V2);
12198 SDValue Even =
12199 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12200 SDValue Odd =
12201 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12202 return DAG.getMergeValues({Even, Odd}, DL);
12203 }
12204
12205 SmallVector<SDValue, 8> Ops(Op->op_values());
12206
12207 // Concatenate the vectors as one vector to deinterleave
12208 MVT ConcatVT =
12211 PowerOf2Ceil(Factor)));
12212 if (Ops.size() < PowerOf2Ceil(Factor))
12213 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12214 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12215
12216 if (Factor == 2) {
12217 // We can deinterleave through vnsrl.wi if the element type is smaller than
12218 // ELEN
12219 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12220 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12221 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12222 return DAG.getMergeValues({Even, Odd}, DL);
12223 }
12224
12225 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12226 // possibly mask vector, then extract the required subvector. Doing this
12227 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12228 // creation to be rematerialized during register allocation to reduce
12229 // register pressure if needed.
12230
12231 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12232
12233 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12234 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12235 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12236
12237 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12238 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12239 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12240
12241 // vcompress the even and odd elements into two separate vectors
12242 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12243 EvenMask, DAG.getUNDEF(ConcatVT));
12244 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12245 OddMask, DAG.getUNDEF(ConcatVT));
12246
12247 // Extract the result half of the gather for even and odd
12248 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12249 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12250
12251 return DAG.getMergeValues({Even, Odd}, DL);
12252 }
12253
12254 // Store with unit-stride store and load it back with segmented load.
12255 MVT XLenVT = Subtarget.getXLenVT();
12256 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12257 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12258
12259 // Allocate a stack slot.
12260 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12262 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12263 auto &MF = DAG.getMachineFunction();
12264 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12265 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12266
12267 SDValue StoreOps[] = {DAG.getEntryNode(),
12268 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12269 Concat, StackPtr, VL};
12270
12271 SDValue Chain = DAG.getMemIntrinsicNode(
12272 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12273 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12275
12276 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12277 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12278 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12279 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12280 Intrinsic::riscv_vlseg8_mask};
12281
12282 SDValue LoadOps[] = {
12283 Chain,
12284 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12285 Passthru,
12286 StackPtr,
12287 Mask,
12288 VL,
12291 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12292
12293 unsigned Sz =
12294 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12295 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12296
12298 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12299 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12301
12302 SmallVector<SDValue, 8> Res(Factor);
12303
12304 for (unsigned i = 0U; i < Factor; ++i)
12305 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12306 DAG.getTargetConstant(i, DL, MVT::i32));
12307
12308 return DAG.getMergeValues(Res, DL);
12309}
12310
12311SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12312 SelectionDAG &DAG) const {
12313 SDLoc DL(Op);
12314 MVT VecVT = Op.getSimpleValueType();
12315
12316 const unsigned Factor = Op.getNumOperands();
12317 assert(Factor <= 8);
12318
12319 // i1 vectors need to be widened to i8
12320 if (VecVT.getVectorElementType() == MVT::i1)
12321 return widenVectorOpsToi8(Op, DL, DAG);
12322
12323 // Convert to scalable vectors first.
12324 if (VecVT.isFixedLengthVector()) {
12325 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12327 for (unsigned i = 0U; i < Factor; ++i)
12328 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12329 Subtarget);
12330
12331 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12332 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12333
12334 SmallVector<SDValue, 8> Res(Factor);
12335 for (unsigned i = 0U; i < Factor; ++i)
12336 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12337 Subtarget);
12338 return DAG.getMergeValues(Res, DL);
12339 }
12340
12341 MVT XLenVT = Subtarget.getXLenVT();
12342 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12343
12344 // If the VT is larger than LMUL=8, we need to split and reassemble.
12345 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12346 (8 * RISCV::RVVBitsPerBlock)) {
12347 SmallVector<SDValue, 8> Ops(Factor * 2);
12348 for (unsigned i = 0; i != Factor; ++i) {
12349 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12350 Ops[i] = OpLo;
12351 Ops[i + Factor] = OpHi;
12352 }
12353
12354 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12355
12356 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12357 ArrayRef(Ops).take_front(Factor)),
12359 ArrayRef(Ops).drop_front(Factor))};
12360
12361 SmallVector<SDValue, 8> Concats(Factor);
12362 for (unsigned i = 0; i != Factor; ++i) {
12363 unsigned IdxLo = 2 * i;
12364 unsigned IdxHi = 2 * i + 1;
12365 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12366 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12367 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12368 }
12369
12370 return DAG.getMergeValues(Concats, DL);
12371 }
12372
12373 SDValue Interleaved;
12374
12375 // Spill to the stack using a segment store for simplicity.
12376 if (Factor != 2) {
12377 EVT MemVT =
12379 VecVT.getVectorElementCount() * Factor);
12380
12381 // Allocate a stack slot.
12382 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12384 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12385 EVT PtrVT = StackPtr.getValueType();
12386 auto &MF = DAG.getMachineFunction();
12387 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12388 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12389
12390 static const Intrinsic::ID IntrIds[] = {
12391 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12392 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12393 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12394 Intrinsic::riscv_vsseg8_mask,
12395 };
12396
12397 unsigned Sz =
12398 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12399 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12400
12401 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12402 for (unsigned i = 0; i < Factor; i++)
12403 StoredVal =
12404 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12405 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12406
12407 SDValue Ops[] = {DAG.getEntryNode(),
12408 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12409 StoredVal,
12410 StackPtr,
12411 Mask,
12412 VL,
12414 DL, XLenVT)};
12415
12416 SDValue Chain = DAG.getMemIntrinsicNode(
12417 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12418 VecVT.getVectorElementType(), PtrInfo, Alignment,
12420
12421 SmallVector<SDValue, 8> Loads(Factor);
12422
12424 DAG.getVScale(DL, PtrVT,
12425 APInt(PtrVT.getFixedSizeInBits(),
12426 VecVT.getStoreSize().getKnownMinValue()));
12427 for (unsigned i = 0; i != Factor; ++i) {
12428 if (i != 0)
12429 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12430
12431 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12432 }
12433
12434 return DAG.getMergeValues(Loads, DL);
12435 }
12436
12437 // Use ri.vzip2{a,b} if available
12438 // TODO: Figure out the best lowering for the spread variants
12439 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12440 !Op.getOperand(1).isUndef()) {
12441 // Freeze the sources so we can increase their use count.
12442 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12443 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12444 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12445 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12446 return DAG.getMergeValues({Lo, Hi}, DL);
12447 }
12448
12449 // If the element type is smaller than ELEN, then we can interleave with
12450 // vwaddu.vv and vwmaccu.vx
12451 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12452 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12453 DAG, Subtarget);
12454 } else {
12455 // Otherwise, fallback to using vrgathere16.vv
12456 MVT ConcatVT =
12459 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12460 Op.getOperand(0), Op.getOperand(1));
12461
12462 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12463
12464 // 0 1 2 3 4 5 6 7 ...
12465 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12466
12467 // 1 1 1 1 1 1 1 1 ...
12468 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12469
12470 // 1 0 1 0 1 0 1 0 ...
12471 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12472 OddMask = DAG.getSetCC(
12473 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12474 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12476
12477 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12478
12479 // Build up the index vector for interleaving the concatenated vector
12480 // 0 0 1 1 2 2 3 3 ...
12481 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12482 // 0 n 1 n+1 2 n+2 3 n+3 ...
12483 Idx =
12484 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12485
12486 // Then perform the interleave
12487 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12488 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12489 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12490 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12491 }
12492
12493 // Extract the two halves from the interleaved result
12494 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12495 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12496 VecVT.getVectorMinNumElements());
12497
12498 return DAG.getMergeValues({Lo, Hi}, DL);
12499}
12500
12501// Lower step_vector to the vid instruction. Any non-identity step value must
12502// be accounted for my manual expansion.
12503SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12504 SelectionDAG &DAG) const {
12505 SDLoc DL(Op);
12506 MVT VT = Op.getSimpleValueType();
12507 assert(VT.isScalableVector() && "Expected scalable vector");
12508 MVT XLenVT = Subtarget.getXLenVT();
12509 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12510 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12511 uint64_t StepValImm = Op.getConstantOperandVal(0);
12512 if (StepValImm != 1) {
12513 if (isPowerOf2_64(StepValImm)) {
12514 SDValue StepVal =
12515 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12516 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12517 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12518 } else {
12519 SDValue StepVal = lowerScalarSplat(
12520 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12521 VL, VT, DL, DAG, Subtarget);
12522 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12523 }
12524 }
12525 return StepVec;
12526}
12527
12528// Implement vector_reverse using vrgather.vv with indices determined by
12529// subtracting the id of each element from (VLMAX-1). This will convert
12530// the indices like so:
12531// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12532// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12533SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12534 SelectionDAG &DAG) const {
12535 SDLoc DL(Op);
12536 MVT VecVT = Op.getSimpleValueType();
12537 if (VecVT.getVectorElementType() == MVT::i1) {
12538 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12539 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12540 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12541 return DAG.getSetCC(DL, VecVT, Op2,
12542 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12543 }
12544
12545 MVT ContainerVT = VecVT;
12546 SDValue Vec = Op.getOperand(0);
12547 if (VecVT.isFixedLengthVector()) {
12548 ContainerVT = getContainerForFixedLengthVector(VecVT);
12549 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12550 }
12551
12552 MVT XLenVT = Subtarget.getXLenVT();
12553 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12554
12555 // On some uarchs vrgather.vv will read from every input register for each
12556 // output register, regardless of the indices. However to reverse a vector
12557 // each output register only needs to read from one register. So decompose it
12558 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12559 // O(LMUL^2).
12560 //
12561 // vsetvli a1, zero, e64, m4, ta, ma
12562 // vrgatherei16.vv v12, v8, v16
12563 // ->
12564 // vsetvli a1, zero, e64, m1, ta, ma
12565 // vrgather.vv v15, v8, v16
12566 // vrgather.vv v14, v9, v16
12567 // vrgather.vv v13, v10, v16
12568 // vrgather.vv v12, v11, v16
12569 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12570 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12571 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12572 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12573 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12574 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12575
12576 // Fixed length vectors might not fit exactly into their container, and so
12577 // leave a gap in the front of the vector after being reversed. Slide this
12578 // away.
12579 //
12580 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12581 // 0 1 2 3 x x x x <- reverse
12582 // x x x x 0 1 2 3 <- vslidedown.vx
12583 if (VecVT.isFixedLengthVector()) {
12584 SDValue Offset = DAG.getNode(
12585 ISD::SUB, DL, XLenVT,
12586 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12587 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12588 Concat =
12589 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12590 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12591 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12592 }
12593 return Concat;
12594 }
12595
12596 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12597 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12598 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12599 unsigned MaxVLMAX =
12600 VecVT.isFixedLengthVector()
12601 ? VecVT.getVectorNumElements()
12602 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12603
12604 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12605 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12606
12607 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12608 // to use vrgatherei16.vv.
12609 if (MaxVLMAX > 256 && EltSize == 8) {
12610 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12611 // Reverse each half, then reassemble them in reverse order.
12612 // NOTE: It's also possible that after splitting that VLMAX no longer
12613 // requires vrgatherei16.vv.
12614 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12615 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12616 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12617 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12618 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12619 // Reassemble the low and high pieces reversed.
12620 // FIXME: This is a CONCAT_VECTORS.
12621 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12622 return DAG.getInsertSubvector(DL, Res, Lo,
12623 LoVT.getVectorMinNumElements());
12624 }
12625
12626 // Just promote the int type to i16 which will double the LMUL.
12627 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12628 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12629 }
12630
12631 // At LMUL > 1, do the index computation in 16 bits to reduce register
12632 // pressure.
12633 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12634 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12635 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12636 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12637 IntVT = IntVT.changeVectorElementType(MVT::i16);
12638 }
12639
12640 // Calculate VLMAX-1 for the desired SEW.
12641 SDValue VLMinus1 = DAG.getNode(
12642 ISD::SUB, DL, XLenVT,
12643 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12644 DAG.getConstant(1, DL, XLenVT));
12645
12646 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12647 bool IsRV32E64 =
12648 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12649 SDValue SplatVL;
12650 if (!IsRV32E64)
12651 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12652 else
12653 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12654 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12655
12656 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12657 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12658 DAG.getUNDEF(IntVT), Mask, VL);
12659
12660 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12661 DAG.getUNDEF(ContainerVT), Mask, VL);
12662 if (VecVT.isFixedLengthVector())
12663 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12664 return Gather;
12665}
12666
12667SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12668 SelectionDAG &DAG) const {
12669 SDLoc DL(Op);
12670 SDValue V1 = Op.getOperand(0);
12671 SDValue V2 = Op.getOperand(1);
12672 MVT XLenVT = Subtarget.getXLenVT();
12673 MVT VecVT = Op.getSimpleValueType();
12674
12675 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12676
12677 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12678 SDValue DownOffset, UpOffset;
12679 if (ImmValue >= 0) {
12680 // The operand is a TargetConstant, we need to rebuild it as a regular
12681 // constant.
12682 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12683 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12684 } else {
12685 // The operand is a TargetConstant, we need to rebuild it as a regular
12686 // constant rather than negating the original operand.
12687 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12688 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12689 }
12690
12691 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12692
12693 SDValue SlideDown = getVSlidedown(
12694 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12695 Subtarget.hasVLDependentLatency() ? UpOffset
12696 : DAG.getRegister(RISCV::X0, XLenVT));
12697 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12698 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12700}
12701
12702SDValue
12703RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12704 SelectionDAG &DAG) const {
12705 SDLoc DL(Op);
12706 auto *Load = cast<LoadSDNode>(Op);
12707
12709 Load->getMemoryVT(),
12710 *Load->getMemOperand()) &&
12711 "Expecting a correctly-aligned load");
12712
12713 MVT VT = Op.getSimpleValueType();
12714 MVT XLenVT = Subtarget.getXLenVT();
12715 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12716
12717 // If we know the exact VLEN and our fixed length vector completely fills
12718 // the container, use a whole register load instead.
12719 const auto [MinVLMAX, MaxVLMAX] =
12720 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12721 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12722 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12723 MachineMemOperand *MMO = Load->getMemOperand();
12724 SDValue NewLoad =
12725 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12726 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12727 MMO->getAAInfo(), MMO->getRanges());
12728 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12729 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12730 }
12731
12732 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12733
12734 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12735 SDValue IntID = DAG.getTargetConstant(
12736 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12737 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12738 if (!IsMaskOp)
12739 Ops.push_back(DAG.getUNDEF(ContainerVT));
12740 Ops.push_back(Load->getBasePtr());
12741 Ops.push_back(VL);
12742 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12743 SDValue NewLoad =
12745 Load->getMemoryVT(), Load->getMemOperand());
12746
12747 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12748 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12749}
12750
12751SDValue
12752RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12753 SelectionDAG &DAG) const {
12754 SDLoc DL(Op);
12755 auto *Store = cast<StoreSDNode>(Op);
12756
12758 Store->getMemoryVT(),
12759 *Store->getMemOperand()) &&
12760 "Expecting a correctly-aligned store");
12761
12762 SDValue StoreVal = Store->getValue();
12763 MVT VT = StoreVal.getSimpleValueType();
12764 MVT XLenVT = Subtarget.getXLenVT();
12765
12766 // If the size less than a byte, we need to pad with zeros to make a byte.
12767 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12768 VT = MVT::v8i1;
12769 StoreVal =
12770 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12771 }
12772
12773 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12774
12775 SDValue NewValue =
12776 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12777
12778 // If we know the exact VLEN and our fixed length vector completely fills
12779 // the container, use a whole register store instead.
12780 const auto [MinVLMAX, MaxVLMAX] =
12781 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12782 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12783 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12784 MachineMemOperand *MMO = Store->getMemOperand();
12785 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12786 MMO->getPointerInfo(), MMO->getBaseAlign(),
12787 MMO->getFlags(), MMO->getAAInfo());
12788 }
12789
12790 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12791
12792 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12793 SDValue IntID = DAG.getTargetConstant(
12794 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12795 return DAG.getMemIntrinsicNode(
12796 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12797 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12798 Store->getMemoryVT(), Store->getMemOperand());
12799}
12800
12801SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12802 SelectionDAG &DAG) const {
12803 SDLoc DL(Op);
12804 MVT VT = Op.getSimpleValueType();
12805
12806 const auto *MemSD = cast<MemSDNode>(Op);
12807 EVT MemVT = MemSD->getMemoryVT();
12808 MachineMemOperand *MMO = MemSD->getMemOperand();
12809 SDValue Chain = MemSD->getChain();
12810 SDValue BasePtr = MemSD->getBasePtr();
12811
12812 SDValue Mask, PassThru, VL;
12813 bool IsExpandingLoad = false;
12814 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12815 Mask = VPLoad->getMask();
12816 PassThru = DAG.getUNDEF(VT);
12817 VL = VPLoad->getVectorLength();
12818 } else {
12819 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12820 Mask = MLoad->getMask();
12821 PassThru = MLoad->getPassThru();
12822 IsExpandingLoad = MLoad->isExpandingLoad();
12823 }
12824
12825 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12826
12827 MVT XLenVT = Subtarget.getXLenVT();
12828
12829 MVT ContainerVT = VT;
12830 if (VT.isFixedLengthVector()) {
12831 ContainerVT = getContainerForFixedLengthVector(VT);
12832 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12833 if (!IsUnmasked) {
12834 MVT MaskVT = getMaskTypeFor(ContainerVT);
12835 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12836 }
12837 }
12838
12839 if (!VL)
12840 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12841
12842 SDValue ExpandingVL;
12843 if (!IsUnmasked && IsExpandingLoad) {
12844 ExpandingVL = VL;
12845 VL =
12846 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12847 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12848 }
12849
12850 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12851 : Intrinsic::riscv_vle_mask;
12852 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12853 if (IntID == Intrinsic::riscv_vle)
12854 Ops.push_back(DAG.getUNDEF(ContainerVT));
12855 else
12856 Ops.push_back(PassThru);
12857 Ops.push_back(BasePtr);
12858 if (IntID == Intrinsic::riscv_vle_mask)
12859 Ops.push_back(Mask);
12860 Ops.push_back(VL);
12861 if (IntID == Intrinsic::riscv_vle_mask)
12862 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12863
12864 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12865
12866 SDValue Result =
12867 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12868 Chain = Result.getValue(1);
12869 if (ExpandingVL) {
12870 MVT IndexVT = ContainerVT;
12871 if (ContainerVT.isFloatingPoint())
12872 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12873
12874 MVT IndexEltVT = IndexVT.getVectorElementType();
12875 bool UseVRGATHEREI16 = false;
12876 // If index vector is an i8 vector and the element count exceeds 256, we
12877 // should change the element type of index vector to i16 to avoid
12878 // overflow.
12879 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12880 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12881 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12882 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12883 UseVRGATHEREI16 = true;
12884 }
12885
12886 SDValue Iota =
12887 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12888 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12889 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12890 Result =
12891 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12892 : RISCVISD::VRGATHER_VV_VL,
12893 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12894 }
12895
12896 if (VT.isFixedLengthVector())
12897 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12898
12899 return DAG.getMergeValues({Result, Chain}, DL);
12900}
12901
12902SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12903 SDLoc DL(Op);
12904 MVT VT = Op->getSimpleValueType(0);
12905
12906 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12907 EVT MemVT = VPLoadFF->getMemoryVT();
12908 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12909 SDValue Chain = VPLoadFF->getChain();
12910 SDValue BasePtr = VPLoadFF->getBasePtr();
12911
12912 SDValue Mask = VPLoadFF->getMask();
12913 SDValue VL = VPLoadFF->getVectorLength();
12914
12915 MVT XLenVT = Subtarget.getXLenVT();
12916
12917 MVT ContainerVT = VT;
12918 if (VT.isFixedLengthVector()) {
12919 ContainerVT = getContainerForFixedLengthVector(VT);
12920 MVT MaskVT = getMaskTypeFor(ContainerVT);
12921 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12922 }
12923
12924 unsigned IntID = Intrinsic::riscv_vleff_mask;
12925 SDValue Ops[] = {
12926 Chain,
12927 DAG.getTargetConstant(IntID, DL, XLenVT),
12928 DAG.getUNDEF(ContainerVT),
12929 BasePtr,
12930 Mask,
12931 VL,
12933
12934 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12935
12936 SDValue Result =
12937 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12938 SDValue OutVL = Result.getValue(1);
12939 Chain = Result.getValue(2);
12940
12941 if (VT.isFixedLengthVector())
12942 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12943
12944 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12945}
12946
12947SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12948 SelectionDAG &DAG) const {
12949 SDLoc DL(Op);
12950
12951 const auto *MemSD = cast<MemSDNode>(Op);
12952 EVT MemVT = MemSD->getMemoryVT();
12953 MachineMemOperand *MMO = MemSD->getMemOperand();
12954 SDValue Chain = MemSD->getChain();
12955 SDValue BasePtr = MemSD->getBasePtr();
12956 SDValue Val, Mask, VL;
12957
12958 bool IsCompressingStore = false;
12959 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12960 Val = VPStore->getValue();
12961 Mask = VPStore->getMask();
12962 VL = VPStore->getVectorLength();
12963 } else {
12964 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12965 Val = MStore->getValue();
12966 Mask = MStore->getMask();
12967 IsCompressingStore = MStore->isCompressingStore();
12968 }
12969
12970 bool IsUnmasked =
12971 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12972
12973 MVT VT = Val.getSimpleValueType();
12974 MVT XLenVT = Subtarget.getXLenVT();
12975
12976 MVT ContainerVT = VT;
12977 if (VT.isFixedLengthVector()) {
12978 ContainerVT = getContainerForFixedLengthVector(VT);
12979
12980 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12981 if (!IsUnmasked || IsCompressingStore) {
12982 MVT MaskVT = getMaskTypeFor(ContainerVT);
12983 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12984 }
12985 }
12986
12987 if (!VL)
12988 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12989
12990 if (IsCompressingStore) {
12991 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12992 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
12993 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
12994 VL =
12995 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12996 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12997 }
12998
12999 unsigned IntID =
13000 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13001 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13002 Ops.push_back(Val);
13003 Ops.push_back(BasePtr);
13004 if (!IsUnmasked)
13005 Ops.push_back(Mask);
13006 Ops.push_back(VL);
13007
13009 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13010}
13011
13012SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13013 SelectionDAG &DAG) const {
13014 SDLoc DL(Op);
13015 SDValue Val = Op.getOperand(0);
13016 SDValue Mask = Op.getOperand(1);
13017 SDValue Passthru = Op.getOperand(2);
13018
13019 MVT VT = Val.getSimpleValueType();
13020 MVT XLenVT = Subtarget.getXLenVT();
13021 MVT ContainerVT = VT;
13022 if (VT.isFixedLengthVector()) {
13023 ContainerVT = getContainerForFixedLengthVector(VT);
13024 MVT MaskVT = getMaskTypeFor(ContainerVT);
13025 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13026 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13027 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13028 }
13029
13030 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13031 SDValue Res =
13032 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13033 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13034 Passthru, Val, Mask, VL);
13035
13036 if (VT.isFixedLengthVector())
13037 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13038
13039 return Res;
13040}
13041
13042SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13043 SelectionDAG &DAG) const {
13044 unsigned Opc = Op.getOpcode();
13045 SDLoc DL(Op);
13046 SDValue Chain = Op.getOperand(0);
13047 SDValue Op1 = Op.getOperand(1);
13048 SDValue Op2 = Op.getOperand(2);
13049 SDValue CC = Op.getOperand(3);
13050 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13051 MVT VT = Op.getSimpleValueType();
13052 MVT InVT = Op1.getSimpleValueType();
13053
13054 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13055 // condition code.
13056 if (Opc == ISD::STRICT_FSETCCS) {
13057 // Expand strict_fsetccs(x, oeq) to
13058 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13059 SDVTList VTList = Op->getVTList();
13060 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13061 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13062 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13063 Op2, OLECCVal);
13064 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13065 Op1, OLECCVal);
13066 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13067 Tmp1.getValue(1), Tmp2.getValue(1));
13068 // Tmp1 and Tmp2 might be the same node.
13069 if (Tmp1 != Tmp2)
13070 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13071 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13072 }
13073
13074 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13075 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13076 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13077 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13078 Op2, OEQCCVal);
13079 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13080 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13081 }
13082 }
13083
13084 MVT ContainerInVT = InVT;
13085 if (InVT.isFixedLengthVector()) {
13086 ContainerInVT = getContainerForFixedLengthVector(InVT);
13087 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13088 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13089 }
13090 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13091
13092 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13093
13094 SDValue Res;
13095 if (Opc == ISD::STRICT_FSETCC &&
13096 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13097 CCVal == ISD::SETOLE)) {
13098 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13099 // active when both input elements are ordered.
13100 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13101 SDValue OrderMask1 = DAG.getNode(
13102 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13103 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13104 True, VL});
13105 SDValue OrderMask2 = DAG.getNode(
13106 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13107 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13108 True, VL});
13109 Mask =
13110 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13111 // Use Mask as the passthru operand to let the result be 0 if either of the
13112 // inputs is unordered.
13113 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13114 DAG.getVTList(MaskVT, MVT::Other),
13115 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13116 } else {
13117 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13118 : RISCVISD::STRICT_FSETCCS_VL;
13119 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13120 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13121 }
13122
13123 if (VT.isFixedLengthVector()) {
13124 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13125 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13126 }
13127 return Res;
13128}
13129
13130// Lower vector ABS to smax(X, sub(0, X)).
13131SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13132 SDLoc DL(Op);
13133 MVT VT = Op.getSimpleValueType();
13134 SDValue X = Op.getOperand(0);
13135
13136 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13137 "Unexpected type for ISD::ABS");
13138
13139 MVT ContainerVT = VT;
13140 if (VT.isFixedLengthVector()) {
13141 ContainerVT = getContainerForFixedLengthVector(VT);
13142 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13143 }
13144
13145 SDValue Mask, VL;
13146 if (Op->getOpcode() == ISD::VP_ABS) {
13147 Mask = Op->getOperand(1);
13148 if (VT.isFixedLengthVector())
13149 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13150 Subtarget);
13151 VL = Op->getOperand(2);
13152 } else
13153 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13154
13155 SDValue SplatZero = DAG.getNode(
13156 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13157 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13158 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13159 DAG.getUNDEF(ContainerVT), Mask, VL);
13160 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13161 DAG.getUNDEF(ContainerVT), Mask, VL);
13162
13163 if (VT.isFixedLengthVector())
13164 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13165 return Max;
13166}
13167
13168SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13169 SelectionDAG &DAG) const {
13170 const auto &TSInfo =
13171 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13172
13173 unsigned NewOpc = getRISCVVLOp(Op);
13174 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13175 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13176
13177 MVT VT = Op.getSimpleValueType();
13178 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13179
13180 // Create list of operands by converting existing ones to scalable types.
13182 for (const SDValue &V : Op->op_values()) {
13183 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13184
13185 // Pass through non-vector operands.
13186 if (!V.getValueType().isVector()) {
13187 Ops.push_back(V);
13188 continue;
13189 }
13190
13191 // "cast" fixed length vector to a scalable vector.
13192 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13193 "Only fixed length vectors are supported!");
13194 MVT VContainerVT = ContainerVT.changeVectorElementType(
13195 V.getSimpleValueType().getVectorElementType());
13196 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13197 }
13198
13199 SDLoc DL(Op);
13200 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13201 if (HasPassthruOp)
13202 Ops.push_back(DAG.getUNDEF(ContainerVT));
13203 if (HasMask)
13204 Ops.push_back(Mask);
13205 Ops.push_back(VL);
13206
13207 // StrictFP operations have two result values. Their lowered result should
13208 // have same result count.
13209 if (Op->isStrictFPOpcode()) {
13210 SDValue ScalableRes =
13211 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13212 Op->getFlags());
13213 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13214 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13215 }
13216
13217 SDValue ScalableRes =
13218 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13219 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13220}
13221
13222// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13223// * Operands of each node are assumed to be in the same order.
13224// * The EVL operand is promoted from i32 to i64 on RV64.
13225// * Fixed-length vectors are converted to their scalable-vector container
13226// types.
13227SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13228 const auto &TSInfo =
13229 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13230
13231 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13232 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13233
13234 SDLoc DL(Op);
13235 MVT VT = Op.getSimpleValueType();
13237
13238 MVT ContainerVT = VT;
13239 if (VT.isFixedLengthVector())
13240 ContainerVT = getContainerForFixedLengthVector(VT);
13241
13242 for (const auto &OpIdx : enumerate(Op->ops())) {
13243 SDValue V = OpIdx.value();
13244 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13245 // Add dummy passthru value before the mask. Or if there isn't a mask,
13246 // before EVL.
13247 if (HasPassthruOp) {
13248 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13249 if (MaskIdx) {
13250 if (*MaskIdx == OpIdx.index())
13251 Ops.push_back(DAG.getUNDEF(ContainerVT));
13252 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13253 OpIdx.index()) {
13254 if (Op.getOpcode() == ISD::VP_MERGE) {
13255 // For VP_MERGE, copy the false operand instead of an undef value.
13256 Ops.push_back(Ops.back());
13257 } else {
13258 assert(Op.getOpcode() == ISD::VP_SELECT);
13259 // For VP_SELECT, add an undef value.
13260 Ops.push_back(DAG.getUNDEF(ContainerVT));
13261 }
13262 }
13263 }
13264 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13265 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13266 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13268 Subtarget.getXLenVT()));
13269 // Pass through operands which aren't fixed-length vectors.
13270 if (!V.getValueType().isFixedLengthVector()) {
13271 Ops.push_back(V);
13272 continue;
13273 }
13274 // "cast" fixed length vector to a scalable vector.
13275 MVT OpVT = V.getSimpleValueType();
13276 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13277 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13278 "Only fixed length vectors are supported!");
13279 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13280 }
13281
13282 if (!VT.isFixedLengthVector())
13283 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13284
13285 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13286
13287 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13288}
13289
13290SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13291 SelectionDAG &DAG) const {
13292 SDLoc DL(Op);
13293 MVT VT = Op.getSimpleValueType();
13294
13295 SDValue Src = Op.getOperand(0);
13296 // NOTE: Mask is dropped.
13297 SDValue VL = Op.getOperand(2);
13298
13299 MVT ContainerVT = VT;
13300 if (VT.isFixedLengthVector()) {
13301 ContainerVT = getContainerForFixedLengthVector(VT);
13302 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13303 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13304 }
13305
13306 MVT XLenVT = Subtarget.getXLenVT();
13307 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13308 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13309 DAG.getUNDEF(ContainerVT), Zero, VL);
13310
13311 SDValue SplatValue = DAG.getSignedConstant(
13312 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13313 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13314 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13315
13316 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13317 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13318 if (!VT.isFixedLengthVector())
13319 return Result;
13320 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13321}
13322
13323SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13324 SelectionDAG &DAG) const {
13325 SDLoc DL(Op);
13326 MVT VT = Op.getSimpleValueType();
13327
13328 SDValue Op1 = Op.getOperand(0);
13329 SDValue Op2 = Op.getOperand(1);
13330 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13331 // NOTE: Mask is dropped.
13332 SDValue VL = Op.getOperand(4);
13333
13334 MVT ContainerVT = VT;
13335 if (VT.isFixedLengthVector()) {
13336 ContainerVT = getContainerForFixedLengthVector(VT);
13337 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13338 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13339 }
13340
13342 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13343
13344 switch (Condition) {
13345 default:
13346 break;
13347 // X != Y --> (X^Y)
13348 case ISD::SETNE:
13349 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13350 break;
13351 // X == Y --> ~(X^Y)
13352 case ISD::SETEQ: {
13353 SDValue Temp =
13354 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13355 Result =
13356 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13357 break;
13358 }
13359 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13360 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13361 case ISD::SETGT:
13362 case ISD::SETULT: {
13363 SDValue Temp =
13364 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13365 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13366 break;
13367 }
13368 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13369 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13370 case ISD::SETLT:
13371 case ISD::SETUGT: {
13372 SDValue Temp =
13373 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13374 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13375 break;
13376 }
13377 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13378 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13379 case ISD::SETGE:
13380 case ISD::SETULE: {
13381 SDValue Temp =
13382 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13383 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13384 break;
13385 }
13386 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13387 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13388 case ISD::SETLE:
13389 case ISD::SETUGE: {
13390 SDValue Temp =
13391 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13392 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13393 break;
13394 }
13395 }
13396
13397 if (!VT.isFixedLengthVector())
13398 return Result;
13399 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13400}
13401
13402// Lower Floating-Point/Integer Type-Convert VP SDNodes
13403SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13404 SelectionDAG &DAG) const {
13405 SDLoc DL(Op);
13406
13407 SDValue Src = Op.getOperand(0);
13408 SDValue Mask = Op.getOperand(1);
13409 SDValue VL = Op.getOperand(2);
13410 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13411
13412 MVT DstVT = Op.getSimpleValueType();
13413 MVT SrcVT = Src.getSimpleValueType();
13414 if (DstVT.isFixedLengthVector()) {
13415 DstVT = getContainerForFixedLengthVector(DstVT);
13416 SrcVT = getContainerForFixedLengthVector(SrcVT);
13417 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13418 MVT MaskVT = getMaskTypeFor(DstVT);
13419 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13420 }
13421
13422 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13423 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13424
13426 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13427 if (SrcVT.isInteger()) {
13428 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13429
13430 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13431 ? RISCVISD::VSEXT_VL
13432 : RISCVISD::VZEXT_VL;
13433
13434 // Do we need to do any pre-widening before converting?
13435 if (SrcEltSize == 1) {
13436 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13437 MVT XLenVT = Subtarget.getXLenVT();
13438 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13439 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13440 DAG.getUNDEF(IntVT), Zero, VL);
13441 SDValue One = DAG.getSignedConstant(
13442 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13443 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13444 DAG.getUNDEF(IntVT), One, VL);
13445 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13446 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13447 } else if (DstEltSize > (2 * SrcEltSize)) {
13448 // Widen before converting.
13449 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13450 DstVT.getVectorElementCount());
13451 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13452 }
13453
13454 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13455 } else {
13456 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13457 "Wrong input/output vector types");
13458
13459 // Convert f16 to f32 then convert f32 to i64.
13460 if (DstEltSize > (2 * SrcEltSize)) {
13461 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13462 MVT InterimFVT =
13463 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13464 Src =
13465 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13466 }
13467
13468 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13469 }
13470 } else { // Narrowing + Conversion
13471 if (SrcVT.isInteger()) {
13472 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13473 // First do a narrowing convert to an FP type half the size, then round
13474 // the FP type to a small FP type if needed.
13475
13476 MVT InterimFVT = DstVT;
13477 if (SrcEltSize > (2 * DstEltSize)) {
13478 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13479 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13480 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13481 }
13482
13483 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13484
13485 if (InterimFVT != DstVT) {
13486 Src = Result;
13487 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13488 }
13489 } else {
13490 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13491 "Wrong input/output vector types");
13492 // First do a narrowing conversion to an integer half the size, then
13493 // truncate if needed.
13494
13495 if (DstEltSize == 1) {
13496 // First convert to the same size integer, then convert to mask using
13497 // setcc.
13498 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13499 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13500 DstVT.getVectorElementCount());
13501 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13502
13503 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13504 // otherwise the conversion was undefined.
13505 MVT XLenVT = Subtarget.getXLenVT();
13506 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13507 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13508 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13509 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13510 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13511 DAG.getUNDEF(DstVT), Mask, VL});
13512 } else {
13513 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13514 DstVT.getVectorElementCount());
13515
13516 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13517
13518 while (InterimIVT != DstVT) {
13519 SrcEltSize /= 2;
13520 Src = Result;
13521 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13522 DstVT.getVectorElementCount());
13523 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13524 Src, Mask, VL);
13525 }
13526 }
13527 }
13528 }
13529
13530 MVT VT = Op.getSimpleValueType();
13531 if (!VT.isFixedLengthVector())
13532 return Result;
13533 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13534}
13535
13536SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13537 SelectionDAG &DAG) const {
13538 SDLoc DL(Op);
13539 MVT VT = Op.getSimpleValueType();
13540 MVT XLenVT = Subtarget.getXLenVT();
13541
13542 SDValue Mask = Op.getOperand(0);
13543 SDValue TrueVal = Op.getOperand(1);
13544 SDValue FalseVal = Op.getOperand(2);
13545 SDValue VL = Op.getOperand(3);
13546
13547 // Use default legalization if a vector of EVL type would be legal.
13548 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13550 if (isTypeLegal(EVLVecVT))
13551 return SDValue();
13552
13553 MVT ContainerVT = VT;
13554 if (VT.isFixedLengthVector()) {
13555 ContainerVT = getContainerForFixedLengthVector(VT);
13556 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13557 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13558 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13559 }
13560
13561 // Promote to a vector of i8.
13562 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13563
13564 // Promote TrueVal and FalseVal using VLMax.
13565 // FIXME: Is there a better way to do this?
13566 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13567 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13568 DAG.getUNDEF(PromotedVT),
13569 DAG.getConstant(1, DL, XLenVT), VLMax);
13570 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13571 DAG.getUNDEF(PromotedVT),
13572 DAG.getConstant(0, DL, XLenVT), VLMax);
13573 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13574 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13575 // Any element past VL uses FalseVal, so use VLMax
13576 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13577 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13578
13579 // VP_MERGE the two promoted values.
13580 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13581 TrueVal, FalseVal, FalseVal, VL);
13582
13583 // Convert back to mask.
13584 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13585 SDValue Result = DAG.getNode(
13586 RISCVISD::SETCC_VL, DL, ContainerVT,
13587 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13588 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13589
13590 if (VT.isFixedLengthVector())
13591 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13592 return Result;
13593}
13594
13595SDValue
13596RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13597 SelectionDAG &DAG) const {
13598 using namespace SDPatternMatch;
13599
13600 SDLoc DL(Op);
13601
13602 SDValue Op1 = Op.getOperand(0);
13603 SDValue Op2 = Op.getOperand(1);
13604 SDValue Offset = Op.getOperand(2);
13605 SDValue Mask = Op.getOperand(3);
13606 SDValue EVL1 = Op.getOperand(4);
13607 SDValue EVL2 = Op.getOperand(5);
13608
13609 const MVT XLenVT = Subtarget.getXLenVT();
13610 MVT VT = Op.getSimpleValueType();
13611 MVT ContainerVT = VT;
13612 if (VT.isFixedLengthVector()) {
13613 ContainerVT = getContainerForFixedLengthVector(VT);
13614 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13615 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13616 MVT MaskVT = getMaskTypeFor(ContainerVT);
13617 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13618 }
13619
13620 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13621 if (IsMaskVector) {
13622 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13623
13624 // Expand input operands
13625 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13626 DAG.getUNDEF(ContainerVT),
13627 DAG.getConstant(1, DL, XLenVT), EVL1);
13628 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13629 DAG.getUNDEF(ContainerVT),
13630 DAG.getConstant(0, DL, XLenVT), EVL1);
13631 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13632 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13633
13634 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13635 DAG.getUNDEF(ContainerVT),
13636 DAG.getConstant(1, DL, XLenVT), EVL2);
13637 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13638 DAG.getUNDEF(ContainerVT),
13639 DAG.getConstant(0, DL, XLenVT), EVL2);
13640 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13641 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13642 }
13643
13644 auto getVectorFirstEle = [](SDValue Vec) {
13645 SDValue FirstEle;
13646 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13647 return FirstEle;
13648
13649 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13651 return Vec.getOperand(0);
13652
13653 return SDValue();
13654 };
13655
13656 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13657 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13658 MVT EltVT = ContainerVT.getVectorElementType();
13660 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13661 EltVT == MVT::bf16) {
13662 EltVT = EltVT.changeTypeToInteger();
13663 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13664 Op2 = DAG.getBitcast(ContainerVT, Op2);
13665 FirstEle =
13666 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13667 }
13668 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13669 : RISCVISD::VSLIDE1UP_VL,
13670 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13671 FirstEle, Mask, EVL2);
13672 Result = DAG.getBitcast(
13674 Result);
13675 return VT.isFixedLengthVector()
13676 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13677 : Result;
13678 }
13679
13680 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13681 SDValue DownOffset, UpOffset;
13682 if (ImmValue >= 0) {
13683 // The operand is a TargetConstant, we need to rebuild it as a regular
13684 // constant.
13685 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13686 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13687 } else {
13688 // The operand is a TargetConstant, we need to rebuild it as a regular
13689 // constant rather than negating the original operand.
13690 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13691 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13692 }
13693
13694 if (ImmValue != 0)
13695 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13696 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13697 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13698 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13699 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13700
13701 if (IsMaskVector) {
13702 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13703 Result = DAG.getNode(
13704 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13705 {Result, DAG.getConstant(0, DL, ContainerVT),
13706 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13707 Mask, EVL2});
13708 }
13709
13710 if (!VT.isFixedLengthVector())
13711 return Result;
13712 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13713}
13714
13715SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13716 SelectionDAG &DAG) const {
13717 SDLoc DL(Op);
13718 SDValue Val = Op.getOperand(0);
13719 SDValue Mask = Op.getOperand(1);
13720 SDValue VL = Op.getOperand(2);
13721 MVT VT = Op.getSimpleValueType();
13722
13723 MVT ContainerVT = VT;
13724 if (VT.isFixedLengthVector()) {
13725 ContainerVT = getContainerForFixedLengthVector(VT);
13726 MVT MaskVT = getMaskTypeFor(ContainerVT);
13727 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13728 }
13729
13731 if (VT.getScalarType() == MVT::i1) {
13732 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13733 Result =
13734 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13735 ContainerVT, VL);
13736 } else {
13737 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13738 SDValue LHS =
13739 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13740 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13741 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13742 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13743 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13744 DAG.getUNDEF(ContainerVT), Mask, VL});
13745 }
13746 } else {
13747 Result =
13748 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13749 }
13750
13751 if (!VT.isFixedLengthVector())
13752 return Result;
13753 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13754}
13755
13756SDValue
13757RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13758 SelectionDAG &DAG) const {
13759 SDLoc DL(Op);
13760 MVT VT = Op.getSimpleValueType();
13761 MVT XLenVT = Subtarget.getXLenVT();
13762
13763 SDValue Op1 = Op.getOperand(0);
13764 SDValue Mask = Op.getOperand(1);
13765 SDValue EVL = Op.getOperand(2);
13766
13767 MVT ContainerVT = VT;
13768 if (VT.isFixedLengthVector()) {
13769 ContainerVT = getContainerForFixedLengthVector(VT);
13770 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13771 MVT MaskVT = getMaskTypeFor(ContainerVT);
13772 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13773 }
13774
13775 MVT GatherVT = ContainerVT;
13776 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13777 // Check if we are working with mask vectors
13778 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13779 if (IsMaskVector) {
13780 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13781
13782 // Expand input operand
13783 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13784 DAG.getUNDEF(IndicesVT),
13785 DAG.getConstant(1, DL, XLenVT), EVL);
13786 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13787 DAG.getUNDEF(IndicesVT),
13788 DAG.getConstant(0, DL, XLenVT), EVL);
13789 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13790 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13791 }
13792
13793 unsigned EltSize = GatherVT.getScalarSizeInBits();
13794 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13795 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13796 unsigned MaxVLMAX =
13797 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13798
13799 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13800 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13801 // to use vrgatherei16.vv.
13802 // TODO: It's also possible to use vrgatherei16.vv for other types to
13803 // decrease register width for the index calculation.
13804 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13805 if (MaxVLMAX > 256 && EltSize == 8) {
13806 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13807 // Split the vector in half and reverse each half using a full register
13808 // reverse.
13809 // Swap the halves and concatenate them.
13810 // Slide the concatenated result by (VLMax - VL).
13811 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13812 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13813 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13814
13815 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13816 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13817
13818 // Reassemble the low and high pieces reversed.
13819 // NOTE: this Result is unmasked (because we do not need masks for
13820 // shuffles). If in the future this has to change, we can use a SELECT_VL
13821 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13822 SDValue Result =
13823 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13824
13825 // Slide off any elements from past EVL that were reversed into the low
13826 // elements.
13827 unsigned MinElts = GatherVT.getVectorMinNumElements();
13828 SDValue VLMax =
13829 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13830 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13831
13832 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13833 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13834
13835 if (IsMaskVector) {
13836 // Truncate Result back to a mask vector
13837 Result =
13838 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13839 {Result, DAG.getConstant(0, DL, GatherVT),
13841 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13842 }
13843
13844 if (!VT.isFixedLengthVector())
13845 return Result;
13846 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13847 }
13848
13849 // Just promote the int type to i16 which will double the LMUL.
13850 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13851 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13852 }
13853
13854 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13855 SDValue VecLen =
13856 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13857 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13858 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13859 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13860 DAG.getUNDEF(IndicesVT), Mask, EVL);
13861 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13862 DAG.getUNDEF(GatherVT), Mask, EVL);
13863
13864 if (IsMaskVector) {
13865 // Truncate Result back to a mask vector
13866 Result = DAG.getNode(
13867 RISCVISD::SETCC_VL, DL, ContainerVT,
13868 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13869 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13870 }
13871
13872 if (!VT.isFixedLengthVector())
13873 return Result;
13874 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13875}
13876
13877SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13878 SelectionDAG &DAG) const {
13879 MVT VT = Op.getSimpleValueType();
13880 if (VT.getVectorElementType() != MVT::i1)
13881 return lowerVPOp(Op, DAG);
13882
13883 // It is safe to drop mask parameter as masked-off elements are undef.
13884 SDValue Op1 = Op->getOperand(0);
13885 SDValue Op2 = Op->getOperand(1);
13886 SDValue VL = Op->getOperand(3);
13887
13888 MVT ContainerVT = VT;
13889 const bool IsFixed = VT.isFixedLengthVector();
13890 if (IsFixed) {
13891 ContainerVT = getContainerForFixedLengthVector(VT);
13892 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13893 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13894 }
13895
13896 SDLoc DL(Op);
13897 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13898 if (!IsFixed)
13899 return Val;
13900 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13901}
13902
13903SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13904 SelectionDAG &DAG) const {
13905 SDLoc DL(Op);
13906 MVT XLenVT = Subtarget.getXLenVT();
13907 MVT VT = Op.getSimpleValueType();
13908 MVT ContainerVT = VT;
13909 if (VT.isFixedLengthVector())
13910 ContainerVT = getContainerForFixedLengthVector(VT);
13911
13912 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13913
13914 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13915 // Check if the mask is known to be all ones
13916 SDValue Mask = VPNode->getMask();
13917 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13918
13919 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13920 : Intrinsic::riscv_vlse_mask,
13921 DL, XLenVT);
13922 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13923 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13924 VPNode->getStride()};
13925 if (!IsUnmasked) {
13926 if (VT.isFixedLengthVector()) {
13927 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13928 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13929 }
13930 Ops.push_back(Mask);
13931 }
13932 Ops.push_back(VPNode->getVectorLength());
13933 if (!IsUnmasked) {
13934 SDValue Policy =
13936 Ops.push_back(Policy);
13937 }
13938
13939 SDValue Result =
13941 VPNode->getMemoryVT(), VPNode->getMemOperand());
13942 SDValue Chain = Result.getValue(1);
13943
13944 if (VT.isFixedLengthVector())
13945 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13946
13947 return DAG.getMergeValues({Result, Chain}, DL);
13948}
13949
13950SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13951 SelectionDAG &DAG) const {
13952 SDLoc DL(Op);
13953 MVT XLenVT = Subtarget.getXLenVT();
13954
13955 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13956 SDValue StoreVal = VPNode->getValue();
13957 MVT VT = StoreVal.getSimpleValueType();
13958 MVT ContainerVT = VT;
13959 if (VT.isFixedLengthVector()) {
13960 ContainerVT = getContainerForFixedLengthVector(VT);
13961 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13962 }
13963
13964 // Check if the mask is known to be all ones
13965 SDValue Mask = VPNode->getMask();
13966 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13967
13968 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13969 : Intrinsic::riscv_vsse_mask,
13970 DL, XLenVT);
13971 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13972 VPNode->getBasePtr(), VPNode->getStride()};
13973 if (!IsUnmasked) {
13974 if (VT.isFixedLengthVector()) {
13975 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13976 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13977 }
13978 Ops.push_back(Mask);
13979 }
13980 Ops.push_back(VPNode->getVectorLength());
13981
13982 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
13983 Ops, VPNode->getMemoryVT(),
13984 VPNode->getMemOperand());
13985}
13986
13987// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13988// matched to a RVV indexed load. The RVV indexed load instructions only
13989// support the "unsigned unscaled" addressing mode; indices are implicitly
13990// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13991// signed or scaled indexing is extended to the XLEN value type and scaled
13992// accordingly.
13993SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
13994 SelectionDAG &DAG) const {
13995 SDLoc DL(Op);
13996 MVT VT = Op.getSimpleValueType();
13997
13998 const auto *MemSD = cast<MemSDNode>(Op.getNode());
13999 EVT MemVT = MemSD->getMemoryVT();
14000 MachineMemOperand *MMO = MemSD->getMemOperand();
14001 SDValue Chain = MemSD->getChain();
14002 SDValue BasePtr = MemSD->getBasePtr();
14003
14004 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14005 SDValue Index, Mask, PassThru, VL;
14006
14007 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14008 Index = VPGN->getIndex();
14009 Mask = VPGN->getMask();
14010 PassThru = DAG.getUNDEF(VT);
14011 VL = VPGN->getVectorLength();
14012 // VP doesn't support extending loads.
14014 } else {
14015 // Else it must be a MGATHER.
14016 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14017 Index = MGN->getIndex();
14018 Mask = MGN->getMask();
14019 PassThru = MGN->getPassThru();
14020 LoadExtType = MGN->getExtensionType();
14021 }
14022
14023 MVT IndexVT = Index.getSimpleValueType();
14024 MVT XLenVT = Subtarget.getXLenVT();
14025
14027 "Unexpected VTs!");
14028 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14029 // Targets have to explicitly opt-in for extending vector loads.
14030 assert(LoadExtType == ISD::NON_EXTLOAD &&
14031 "Unexpected extending MGATHER/VP_GATHER");
14032
14033 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14034 // the selection of the masked intrinsics doesn't do this for us.
14035 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14036
14037 MVT ContainerVT = VT;
14038 if (VT.isFixedLengthVector()) {
14039 ContainerVT = getContainerForFixedLengthVector(VT);
14040 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14041 ContainerVT.getVectorElementCount());
14042
14043 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14044
14045 if (!IsUnmasked) {
14046 MVT MaskVT = getMaskTypeFor(ContainerVT);
14047 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14048 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14049 }
14050 }
14051
14052 if (!VL)
14053 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14054
14055 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14056 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14057 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14058 }
14059
14060 unsigned IntID =
14061 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14062 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14063 if (IsUnmasked)
14064 Ops.push_back(DAG.getUNDEF(ContainerVT));
14065 else
14066 Ops.push_back(PassThru);
14067 Ops.push_back(BasePtr);
14068 Ops.push_back(Index);
14069 if (!IsUnmasked)
14070 Ops.push_back(Mask);
14071 Ops.push_back(VL);
14072 if (!IsUnmasked)
14073 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14074
14075 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14076 SDValue Result =
14077 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14078 Chain = Result.getValue(1);
14079
14080 if (VT.isFixedLengthVector())
14081 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14082
14083 return DAG.getMergeValues({Result, Chain}, DL);
14084}
14085
14086// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14087// matched to a RVV indexed store. The RVV indexed store instructions only
14088// support the "unsigned unscaled" addressing mode; indices are implicitly
14089// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14090// signed or scaled indexing is extended to the XLEN value type and scaled
14091// accordingly.
14092SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14093 SelectionDAG &DAG) const {
14094 SDLoc DL(Op);
14095 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14096 EVT MemVT = MemSD->getMemoryVT();
14097 MachineMemOperand *MMO = MemSD->getMemOperand();
14098 SDValue Chain = MemSD->getChain();
14099 SDValue BasePtr = MemSD->getBasePtr();
14100
14101 [[maybe_unused]] bool IsTruncatingStore = false;
14102 SDValue Index, Mask, Val, VL;
14103
14104 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14105 Index = VPSN->getIndex();
14106 Mask = VPSN->getMask();
14107 Val = VPSN->getValue();
14108 VL = VPSN->getVectorLength();
14109 // VP doesn't support truncating stores.
14110 IsTruncatingStore = false;
14111 } else {
14112 // Else it must be a MSCATTER.
14113 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14114 Index = MSN->getIndex();
14115 Mask = MSN->getMask();
14116 Val = MSN->getValue();
14117 IsTruncatingStore = MSN->isTruncatingStore();
14118 }
14119
14120 MVT VT = Val.getSimpleValueType();
14121 MVT IndexVT = Index.getSimpleValueType();
14122 MVT XLenVT = Subtarget.getXLenVT();
14123
14125 "Unexpected VTs!");
14126 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14127 // Targets have to explicitly opt-in for extending vector loads and
14128 // truncating vector stores.
14129 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14130
14131 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14132 // the selection of the masked intrinsics doesn't do this for us.
14133 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14134
14135 MVT ContainerVT = VT;
14136 if (VT.isFixedLengthVector()) {
14137 ContainerVT = getContainerForFixedLengthVector(VT);
14138 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14139 ContainerVT.getVectorElementCount());
14140
14141 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14142 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14143
14144 if (!IsUnmasked) {
14145 MVT MaskVT = getMaskTypeFor(ContainerVT);
14146 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14147 }
14148 }
14149
14150 if (!VL)
14151 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14152
14153 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14154 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14155 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14156 }
14157
14158 unsigned IntID =
14159 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14160 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14161 Ops.push_back(Val);
14162 Ops.push_back(BasePtr);
14163 Ops.push_back(Index);
14164 if (!IsUnmasked)
14165 Ops.push_back(Mask);
14166 Ops.push_back(VL);
14167
14169 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14170}
14171
14172SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14173 SelectionDAG &DAG) const {
14174 const MVT XLenVT = Subtarget.getXLenVT();
14175 SDLoc DL(Op);
14176 SDValue Chain = Op->getOperand(0);
14177 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14178 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14179 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14180
14181 // Encoding used for rounding mode in RISC-V differs from that used in
14182 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14183 // table, which consists of a sequence of 4-bit fields, each representing
14184 // corresponding FLT_ROUNDS mode.
14185 static const int Table =
14191
14192 SDValue Shift =
14193 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14194 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14195 DAG.getConstant(Table, DL, XLenVT), Shift);
14196 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14197 DAG.getConstant(7, DL, XLenVT));
14198
14199 return DAG.getMergeValues({Masked, Chain}, DL);
14200}
14201
14202SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14203 SelectionDAG &DAG) const {
14204 const MVT XLenVT = Subtarget.getXLenVT();
14205 SDLoc DL(Op);
14206 SDValue Chain = Op->getOperand(0);
14207 SDValue RMValue = Op->getOperand(1);
14208 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14209
14210 // Encoding used for rounding mode in RISC-V differs from that used in
14211 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14212 // a table, which consists of a sequence of 4-bit fields, each representing
14213 // corresponding RISC-V mode.
14214 static const unsigned Table =
14220
14221 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14222
14223 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14224 DAG.getConstant(2, DL, XLenVT));
14225 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14226 DAG.getConstant(Table, DL, XLenVT), Shift);
14227 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14228 DAG.getConstant(0x7, DL, XLenVT));
14229 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14230 RMValue);
14231}
14232
14233SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14234 SelectionDAG &DAG) const {
14235 const MVT XLenVT = Subtarget.getXLenVT();
14236 SDLoc DL(Op);
14237 SDValue Chain = Op->getOperand(0);
14238 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14239 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14240 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14241}
14242
14243SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14244 SelectionDAG &DAG) const {
14245 const MVT XLenVT = Subtarget.getXLenVT();
14246 SDLoc DL(Op);
14247 SDValue Chain = Op->getOperand(0);
14248 SDValue EnvValue = Op->getOperand(1);
14249 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14250
14251 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14252 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14253 EnvValue);
14254}
14255
14256SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14257 SelectionDAG &DAG) const {
14258 const MVT XLenVT = Subtarget.getXLenVT();
14259 SDLoc DL(Op);
14260 SDValue Chain = Op->getOperand(0);
14261 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14262 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14263
14264 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14265 EnvValue);
14266}
14267
14270
14271SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14272 SelectionDAG &DAG) const {
14273 const MVT XLenVT = Subtarget.getXLenVT();
14274 SDLoc DL(Op);
14275 SDValue Chain = Op->getOperand(0);
14276 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14277 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14278 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14279 Chain = Result.getValue(1);
14280 return DAG.getMergeValues({Result, Chain}, DL);
14281}
14282
14283SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14284 SelectionDAG &DAG) const {
14285 const MVT XLenVT = Subtarget.getXLenVT();
14286 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14287 SDLoc DL(Op);
14288 SDValue Chain = Op->getOperand(0);
14289 SDValue EnvValue = Op->getOperand(1);
14290 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14291 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14292
14293 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14294 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14295 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14296 ModeMask);
14297 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14298 EnvValue);
14299}
14300
14301SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14302 SelectionDAG &DAG) const {
14303 const MVT XLenVT = Subtarget.getXLenVT();
14304 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14305 SDLoc DL(Op);
14306 SDValue Chain = Op->getOperand(0);
14307 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14308 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14309
14310 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14311 ModeMask);
14312}
14313
14314SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14315 SelectionDAG &DAG) const {
14316 MachineFunction &MF = DAG.getMachineFunction();
14317
14318 bool isRISCV64 = Subtarget.is64Bit();
14319 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14320
14321 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14322 return DAG.getFrameIndex(FI, PtrVT);
14323}
14324
14325// Returns the opcode of the target-specific SDNode that implements the 32-bit
14326// form of the given Opcode.
14327static unsigned getRISCVWOpcode(unsigned Opcode) {
14328 switch (Opcode) {
14329 default:
14330 llvm_unreachable("Unexpected opcode");
14331 case ISD::SHL:
14332 return RISCVISD::SLLW;
14333 case ISD::SRA:
14334 return RISCVISD::SRAW;
14335 case ISD::SRL:
14336 return RISCVISD::SRLW;
14337 case ISD::SDIV:
14338 return RISCVISD::DIVW;
14339 case ISD::UDIV:
14340 return RISCVISD::DIVUW;
14341 case ISD::UREM:
14342 return RISCVISD::REMUW;
14343 case ISD::ROTL:
14344 return RISCVISD::ROLW;
14345 case ISD::ROTR:
14346 return RISCVISD::RORW;
14347 }
14348}
14349
14350// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14351// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14352// otherwise be promoted to i64, making it difficult to select the
14353// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14354// type i8/i16/i32 is lost.
14356 unsigned ExtOpc = ISD::ANY_EXTEND) {
14357 SDLoc DL(N);
14358 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14359 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14360 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14361 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14362 // ReplaceNodeResults requires we maintain the same type for the return value.
14363 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14364}
14365
14366// Converts the given 32-bit operation to a i64 operation with signed extension
14367// semantic to reduce the signed extension instructions.
14369 SDLoc DL(N);
14370 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14371 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14372 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14373 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14374 DAG.getValueType(MVT::i32));
14375 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14376}
14377
14380 SelectionDAG &DAG) const {
14381 SDLoc DL(N);
14382 switch (N->getOpcode()) {
14383 default:
14384 llvm_unreachable("Don't know how to custom type legalize this operation!");
14387 case ISD::FP_TO_SINT:
14388 case ISD::FP_TO_UINT: {
14389 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14390 "Unexpected custom legalisation");
14391 bool IsStrict = N->isStrictFPOpcode();
14392 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14393 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14394 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14395 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14397 if (!isTypeLegal(Op0.getValueType()))
14398 return;
14399 if (IsStrict) {
14400 SDValue Chain = N->getOperand(0);
14401 // In absence of Zfh, promote f16 to f32, then convert.
14402 if (Op0.getValueType() == MVT::f16 &&
14403 !Subtarget.hasStdExtZfhOrZhinx()) {
14404 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14405 {Chain, Op0});
14406 Chain = Op0.getValue(1);
14407 }
14408 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14409 : RISCVISD::STRICT_FCVT_WU_RV64;
14410 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14411 SDValue Res = DAG.getNode(
14412 Opc, DL, VTs, Chain, Op0,
14413 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14414 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14415 Results.push_back(Res.getValue(1));
14416 return;
14417 }
14418 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14419 // convert.
14420 if ((Op0.getValueType() == MVT::f16 &&
14421 !Subtarget.hasStdExtZfhOrZhinx()) ||
14422 Op0.getValueType() == MVT::bf16)
14423 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14424
14425 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14426 SDValue Res =
14427 DAG.getNode(Opc, DL, MVT::i64, Op0,
14428 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14429 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14430 return;
14431 }
14432 // If the FP type needs to be softened, emit a library call using the 'si'
14433 // version. If we left it to default legalization we'd end up with 'di'. If
14434 // the FP type doesn't need to be softened just let generic type
14435 // legalization promote the result type.
14436 RTLIB::Libcall LC;
14437 if (IsSigned)
14438 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14439 else
14440 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14441 MakeLibCallOptions CallOptions;
14442 EVT OpVT = Op0.getValueType();
14443 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14444 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14445 SDValue Result;
14446 std::tie(Result, Chain) =
14447 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14448 Results.push_back(Result);
14449 if (IsStrict)
14450 Results.push_back(Chain);
14451 break;
14452 }
14453 case ISD::LROUND: {
14454 SDValue Op0 = N->getOperand(0);
14455 EVT Op0VT = Op0.getValueType();
14456 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14458 if (!isTypeLegal(Op0VT))
14459 return;
14460
14461 // In absence of Zfh, promote f16 to f32, then convert.
14462 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14463 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14464
14465 SDValue Res =
14466 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14467 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14468 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14469 return;
14470 }
14471 // If the FP type needs to be softened, emit a library call to lround. We'll
14472 // need to truncate the result. We assume any value that doesn't fit in i32
14473 // is allowed to return an unspecified value.
14474 RTLIB::Libcall LC =
14475 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14476 MakeLibCallOptions CallOptions;
14477 EVT OpVT = Op0.getValueType();
14478 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14479 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14480 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14481 Results.push_back(Result);
14482 break;
14483 }
14484 case ISD::READCYCLECOUNTER:
14485 case ISD::READSTEADYCOUNTER: {
14486 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14487 "has custom type legalization on riscv32");
14488
14489 SDValue LoCounter, HiCounter;
14490 MVT XLenVT = Subtarget.getXLenVT();
14491 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14492 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14493 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14494 } else {
14495 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14496 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14497 }
14498 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14499 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14500 N->getOperand(0), LoCounter, HiCounter);
14501
14502 Results.push_back(
14503 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14504 Results.push_back(RCW.getValue(2));
14505 break;
14506 }
14507 case ISD::LOAD: {
14508 if (!ISD::isNON_EXTLoad(N))
14509 return;
14510
14511 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14512 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14514
14515 if (N->getValueType(0) == MVT::i64) {
14516 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14517 "Unexpected custom legalisation");
14518
14519 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14520 return;
14521
14522 SDLoc DL(N);
14523 SDValue Result = DAG.getMemIntrinsicNode(
14524 RISCVISD::LD_RV32, DL,
14525 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14526 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14527 SDValue Lo = Result.getValue(0);
14528 SDValue Hi = Result.getValue(1);
14529 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14530 Results.append({Pair, Result.getValue(2)});
14531 return;
14532 }
14533
14534 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14535 "Unexpected custom legalisation");
14536
14537 SDLoc dl(N);
14538 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14539 Ld->getBasePtr(), Ld->getMemoryVT(),
14540 Ld->getMemOperand());
14541 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14542 Results.push_back(Res.getValue(1));
14543 return;
14544 }
14545 case ISD::MUL: {
14546 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14547 unsigned XLen = Subtarget.getXLen();
14548 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14549 if (Size > XLen) {
14550 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14551 SDValue LHS = N->getOperand(0);
14552 SDValue RHS = N->getOperand(1);
14553 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14554
14555 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14556 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14557 // We need exactly one side to be unsigned.
14558 if (LHSIsU == RHSIsU)
14559 return;
14560
14561 auto MakeMULPair = [&](SDValue S, SDValue U) {
14562 MVT XLenVT = Subtarget.getXLenVT();
14563 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14564 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14565 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14566 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14567 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14568 };
14569
14570 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14571 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14572
14573 // The other operand should be signed, but still prefer MULH when
14574 // possible.
14575 if (RHSIsU && LHSIsS && !RHSIsS)
14576 Results.push_back(MakeMULPair(LHS, RHS));
14577 else if (LHSIsU && RHSIsS && !LHSIsS)
14578 Results.push_back(MakeMULPair(RHS, LHS));
14579
14580 return;
14581 }
14582 [[fallthrough]];
14583 }
14584 case ISD::ADD:
14585 case ISD::SUB:
14586 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14587 "Unexpected custom legalisation");
14588 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14589 break;
14590 case ISD::SHL:
14591 case ISD::SRA:
14592 case ISD::SRL:
14593 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14594 "Unexpected custom legalisation");
14595 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14596 // If we can use a BSET instruction, allow default promotion to apply.
14597 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14598 isOneConstant(N->getOperand(0)))
14599 break;
14600 Results.push_back(customLegalizeToWOp(N, DAG));
14601 break;
14602 }
14603
14604 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14605 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14606 // shift amount.
14607 if (N->getOpcode() == ISD::SHL) {
14608 SDLoc DL(N);
14609 SDValue NewOp0 =
14610 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14611 SDValue NewOp1 =
14612 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14613 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14614 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14615 DAG.getValueType(MVT::i32));
14616 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14617 }
14618
14619 break;
14620 case ISD::ROTL:
14621 case ISD::ROTR:
14622 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14623 "Unexpected custom legalisation");
14624 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14625 Subtarget.hasVendorXTHeadBb()) &&
14626 "Unexpected custom legalization");
14627 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14628 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14629 return;
14630 Results.push_back(customLegalizeToWOp(N, DAG));
14631 break;
14632 case ISD::CTTZ:
14634 case ISD::CTLZ:
14635 case ISD::CTLZ_ZERO_UNDEF: {
14636 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14637 "Unexpected custom legalisation");
14638
14639 SDValue NewOp0 =
14640 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14641 bool IsCTZ =
14642 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14643 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14644 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14645 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14646 return;
14647 }
14648 case ISD::SDIV:
14649 case ISD::UDIV:
14650 case ISD::UREM: {
14651 MVT VT = N->getSimpleValueType(0);
14652 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14653 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14654 "Unexpected custom legalisation");
14655 // Don't promote division/remainder by constant since we should expand those
14656 // to multiply by magic constant.
14657 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14658 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14659 !isIntDivCheap(N->getValueType(0), Attr))
14660 return;
14661
14662 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14663 // the upper 32 bits. For other types we need to sign or zero extend
14664 // based on the opcode.
14665 unsigned ExtOpc = ISD::ANY_EXTEND;
14666 if (VT != MVT::i32)
14667 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14669
14670 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14671 break;
14672 }
14673 case ISD::SADDO: {
14674 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14675 "Unexpected custom legalisation");
14676
14677 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14678 // use the default legalization.
14679 if (!isa<ConstantSDNode>(N->getOperand(1)))
14680 return;
14681
14682 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14683 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14684 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14685 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14686 DAG.getValueType(MVT::i32));
14687
14688 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14689
14690 // For an addition, the result should be less than one of the operands (LHS)
14691 // if and only if the other operand (RHS) is negative, otherwise there will
14692 // be overflow.
14693 // For a subtraction, the result should be less than one of the operands
14694 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14695 // otherwise there will be overflow.
14696 EVT OType = N->getValueType(1);
14697 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14698 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14699
14700 SDValue Overflow =
14701 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14702 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14703 Results.push_back(Overflow);
14704 return;
14705 }
14706 case ISD::UADDO:
14707 case ISD::USUBO: {
14708 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14709 "Unexpected custom legalisation");
14710 bool IsAdd = N->getOpcode() == ISD::UADDO;
14711 // Create an ADDW or SUBW.
14712 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14713 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14714 SDValue Res =
14715 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14716 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14717 DAG.getValueType(MVT::i32));
14718
14719 SDValue Overflow;
14720 if (IsAdd && isOneConstant(RHS)) {
14721 // Special case uaddo X, 1 overflowed if the addition result is 0.
14722 // The general case (X + C) < C is not necessarily beneficial. Although we
14723 // reduce the live range of X, we may introduce the materialization of
14724 // constant C, especially when the setcc result is used by branch. We have
14725 // no compare with constant and branch instructions.
14726 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14727 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14728 } else if (IsAdd && isAllOnesConstant(RHS)) {
14729 // Special case uaddo X, -1 overflowed if X != 0.
14730 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14731 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14732 } else {
14733 // Sign extend the LHS and perform an unsigned compare with the ADDW
14734 // result. Since the inputs are sign extended from i32, this is equivalent
14735 // to comparing the lower 32 bits.
14736 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14737 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14738 IsAdd ? ISD::SETULT : ISD::SETUGT);
14739 }
14740
14741 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14742 Results.push_back(Overflow);
14743 return;
14744 }
14745 case ISD::UADDSAT:
14746 case ISD::USUBSAT: {
14747 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14748 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14749 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14750 // promotion for UADDO/USUBO.
14751 Results.push_back(expandAddSubSat(N, DAG));
14752 return;
14753 }
14754 case ISD::SADDSAT:
14755 case ISD::SSUBSAT: {
14756 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14757 "Unexpected custom legalisation");
14758 Results.push_back(expandAddSubSat(N, DAG));
14759 return;
14760 }
14761 case ISD::ABS: {
14762 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14763 "Unexpected custom legalisation");
14764
14765 if (Subtarget.hasStdExtZbb()) {
14766 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14767 // This allows us to remember that the result is sign extended. Expanding
14768 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14769 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14770 N->getOperand(0));
14771 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14772 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14773 return;
14774 }
14775
14776 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14777 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14778
14779 // Freeze the source so we can increase it's use count.
14780 Src = DAG.getFreeze(Src);
14781
14782 // Copy sign bit to all bits using the sraiw pattern.
14783 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14784 DAG.getValueType(MVT::i32));
14785 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14786 DAG.getConstant(31, DL, MVT::i64));
14787
14788 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14789 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14790
14791 // NOTE: The result is only required to be anyextended, but sext is
14792 // consistent with type legalization of sub.
14793 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14794 DAG.getValueType(MVT::i32));
14795 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14796 return;
14797 }
14798 case ISD::BITCAST: {
14799 EVT VT = N->getValueType(0);
14800 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14801 SDValue Op0 = N->getOperand(0);
14802 EVT Op0VT = Op0.getValueType();
14803 MVT XLenVT = Subtarget.getXLenVT();
14804 if (VT == MVT::i16 &&
14805 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14806 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14807 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14808 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14809 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14810 Subtarget.hasStdExtFOrZfinx()) {
14811 SDValue FPConv =
14812 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14813 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14814 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14815 Subtarget.hasStdExtDOrZdinx()) {
14816 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14817 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14818 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14819 NewReg.getValue(0), NewReg.getValue(1));
14820 Results.push_back(RetReg);
14821 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14822 isTypeLegal(Op0VT)) {
14823 // Custom-legalize bitcasts from fixed-length vector types to illegal
14824 // scalar types in order to improve codegen. Bitcast the vector to a
14825 // one-element vector type whose element type is the same as the result
14826 // type, and extract the first element.
14827 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14828 if (isTypeLegal(BVT)) {
14829 SDValue BVec = DAG.getBitcast(BVT, Op0);
14830 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14831 }
14832 }
14833 break;
14834 }
14835 case ISD::BITREVERSE: {
14836 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14837 "Unexpected custom legalisation");
14838 MVT XLenVT = Subtarget.getXLenVT();
14839 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14840 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14841 // ReplaceNodeResults requires we maintain the same type for the return
14842 // value.
14843 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14844 break;
14845 }
14846 case RISCVISD::BREV8:
14847 case RISCVISD::ORC_B: {
14848 MVT VT = N->getSimpleValueType(0);
14849 MVT XLenVT = Subtarget.getXLenVT();
14850 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14851 "Unexpected custom legalisation");
14852 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14853 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14854 "Unexpected extension");
14855 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14856 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14857 // ReplaceNodeResults requires we maintain the same type for the return
14858 // value.
14859 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14860 break;
14861 }
14863 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14864 // type is illegal (currently only vXi64 RV32).
14865 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14866 // transferred to the destination register. We issue two of these from the
14867 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14868 // first element.
14869 SDValue Vec = N->getOperand(0);
14870 SDValue Idx = N->getOperand(1);
14871
14872 // The vector type hasn't been legalized yet so we can't issue target
14873 // specific nodes if it needs legalization.
14874 // FIXME: We would manually legalize if it's important.
14875 if (!isTypeLegal(Vec.getValueType()))
14876 return;
14877
14878 MVT VecVT = Vec.getSimpleValueType();
14879
14880 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14881 VecVT.getVectorElementType() == MVT::i64 &&
14882 "Unexpected EXTRACT_VECTOR_ELT legalization");
14883
14884 // If this is a fixed vector, we need to convert it to a scalable vector.
14885 MVT ContainerVT = VecVT;
14886 if (VecVT.isFixedLengthVector()) {
14887 ContainerVT = getContainerForFixedLengthVector(VecVT);
14888 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14889 }
14890
14891 MVT XLenVT = Subtarget.getXLenVT();
14892
14893 // Use a VL of 1 to avoid processing more elements than we need.
14894 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14895
14896 // Unless the index is known to be 0, we must slide the vector down to get
14897 // the desired element into index 0.
14898 if (!isNullConstant(Idx)) {
14899 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14900 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14901 }
14902
14903 // Extract the lower XLEN bits of the correct vector element.
14904 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14905
14906 // To extract the upper XLEN bits of the vector element, shift the first
14907 // element right by 32 bits and re-extract the lower XLEN bits.
14908 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14909 DAG.getUNDEF(ContainerVT),
14910 DAG.getConstant(32, DL, XLenVT), VL);
14911 SDValue LShr32 =
14912 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14913 DAG.getUNDEF(ContainerVT), Mask, VL);
14914
14915 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14916
14917 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14918 break;
14919 }
14921 unsigned IntNo = N->getConstantOperandVal(0);
14922 switch (IntNo) {
14923 default:
14925 "Don't know how to custom type legalize this intrinsic!");
14926 case Intrinsic::experimental_get_vector_length: {
14927 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14928 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14929 return;
14930 }
14931 case Intrinsic::experimental_cttz_elts: {
14932 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14933 Results.push_back(
14934 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14935 return;
14936 }
14937 case Intrinsic::riscv_orc_b:
14938 case Intrinsic::riscv_brev8:
14939 case Intrinsic::riscv_sha256sig0:
14940 case Intrinsic::riscv_sha256sig1:
14941 case Intrinsic::riscv_sha256sum0:
14942 case Intrinsic::riscv_sha256sum1:
14943 case Intrinsic::riscv_sm3p0:
14944 case Intrinsic::riscv_sm3p1: {
14945 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14946 return;
14947 unsigned Opc;
14948 switch (IntNo) {
14949 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14950 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14951 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14952 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14953 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14954 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14955 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14956 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14957 }
14958
14959 SDValue NewOp =
14960 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14961 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14962 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14963 return;
14964 }
14965 case Intrinsic::riscv_sm4ks:
14966 case Intrinsic::riscv_sm4ed: {
14967 unsigned Opc =
14968 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14969 SDValue NewOp0 =
14970 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14971 SDValue NewOp1 =
14972 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14973 SDValue Res =
14974 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14975 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14976 return;
14977 }
14978 case Intrinsic::riscv_mopr: {
14979 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14980 return;
14981 SDValue NewOp =
14982 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14983 SDValue Res = DAG.getNode(
14984 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
14985 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
14986 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14987 return;
14988 }
14989 case Intrinsic::riscv_moprr: {
14990 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14991 return;
14992 SDValue NewOp0 =
14993 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14994 SDValue NewOp1 =
14995 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14996 SDValue Res = DAG.getNode(
14997 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
14998 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
14999 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15000 return;
15001 }
15002 case Intrinsic::riscv_clmul: {
15003 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15004 return;
15005
15006 SDValue NewOp0 =
15007 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15008 SDValue NewOp1 =
15009 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15010 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15011 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15012 return;
15013 }
15014 case Intrinsic::riscv_clmulh:
15015 case Intrinsic::riscv_clmulr: {
15016 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15017 return;
15018
15019 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15020 // to the full 128-bit clmul result of multiplying two xlen values.
15021 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15022 // upper 32 bits.
15023 //
15024 // The alternative is to mask the inputs to 32 bits and use clmul, but
15025 // that requires two shifts to mask each input without zext.w.
15026 // FIXME: If the inputs are known zero extended or could be freely
15027 // zero extended, the mask form would be better.
15028 SDValue NewOp0 =
15029 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15030 SDValue NewOp1 =
15031 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15032 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15033 DAG.getConstant(32, DL, MVT::i64));
15034 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15035 DAG.getConstant(32, DL, MVT::i64));
15036 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15037 : RISCVISD::CLMULR;
15038 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15039 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15040 DAG.getConstant(32, DL, MVT::i64));
15041 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15042 return;
15043 }
15044 case Intrinsic::riscv_vmv_x_s: {
15045 EVT VT = N->getValueType(0);
15046 MVT XLenVT = Subtarget.getXLenVT();
15047 if (VT.bitsLT(XLenVT)) {
15048 // Simple case just extract using vmv.x.s and truncate.
15049 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15050 Subtarget.getXLenVT(), N->getOperand(1));
15051 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15052 return;
15053 }
15054
15055 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15056 "Unexpected custom legalization");
15057
15058 // We need to do the move in two steps.
15059 SDValue Vec = N->getOperand(1);
15060 MVT VecVT = Vec.getSimpleValueType();
15061
15062 // First extract the lower XLEN bits of the element.
15063 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15064
15065 // To extract the upper XLEN bits of the vector element, shift the first
15066 // element right by 32 bits and re-extract the lower XLEN bits.
15067 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15068
15069 SDValue ThirtyTwoV =
15070 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15071 DAG.getConstant(32, DL, XLenVT), VL);
15072 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15073 DAG.getUNDEF(VecVT), Mask, VL);
15074 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15075
15076 Results.push_back(
15077 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15078 break;
15079 }
15080 }
15081 break;
15082 }
15083 case ISD::VECREDUCE_ADD:
15084 case ISD::VECREDUCE_AND:
15085 case ISD::VECREDUCE_OR:
15086 case ISD::VECREDUCE_XOR:
15087 case ISD::VECREDUCE_SMAX:
15088 case ISD::VECREDUCE_UMAX:
15089 case ISD::VECREDUCE_SMIN:
15090 case ISD::VECREDUCE_UMIN:
15091 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15092 Results.push_back(V);
15093 break;
15094 case ISD::VP_REDUCE_ADD:
15095 case ISD::VP_REDUCE_AND:
15096 case ISD::VP_REDUCE_OR:
15097 case ISD::VP_REDUCE_XOR:
15098 case ISD::VP_REDUCE_SMAX:
15099 case ISD::VP_REDUCE_UMAX:
15100 case ISD::VP_REDUCE_SMIN:
15101 case ISD::VP_REDUCE_UMIN:
15102 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15103 Results.push_back(V);
15104 break;
15105 case ISD::GET_ROUNDING: {
15106 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15107 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15108 Results.push_back(Res.getValue(0));
15109 Results.push_back(Res.getValue(1));
15110 break;
15111 }
15112 }
15113}
15114
15115/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15116/// which corresponds to it.
15117static unsigned getVecReduceOpcode(unsigned Opc) {
15118 switch (Opc) {
15119 default:
15120 llvm_unreachable("Unhandled binary to transform reduction");
15121 case ISD::ADD:
15122 return ISD::VECREDUCE_ADD;
15123 case ISD::UMAX:
15124 return ISD::VECREDUCE_UMAX;
15125 case ISD::SMAX:
15126 return ISD::VECREDUCE_SMAX;
15127 case ISD::UMIN:
15128 return ISD::VECREDUCE_UMIN;
15129 case ISD::SMIN:
15130 return ISD::VECREDUCE_SMIN;
15131 case ISD::AND:
15132 return ISD::VECREDUCE_AND;
15133 case ISD::OR:
15134 return ISD::VECREDUCE_OR;
15135 case ISD::XOR:
15136 return ISD::VECREDUCE_XOR;
15137 case ISD::FADD:
15138 // Note: This is the associative form of the generic reduction opcode.
15139 return ISD::VECREDUCE_FADD;
15140 }
15141}
15142
15143/// Perform two related transforms whose purpose is to incrementally recognize
15144/// an explode_vector followed by scalar reduction as a vector reduction node.
15145/// This exists to recover from a deficiency in SLP which can't handle
15146/// forests with multiple roots sharing common nodes. In some cases, one
15147/// of the trees will be vectorized, and the other will remain (unprofitably)
15148/// scalarized.
15149static SDValue
15151 const RISCVSubtarget &Subtarget) {
15152
15153 // This transforms need to run before all integer types have been legalized
15154 // to i64 (so that the vector element type matches the add type), and while
15155 // it's safe to introduce odd sized vector types.
15157 return SDValue();
15158
15159 // Without V, this transform isn't useful. We could form the (illegal)
15160 // operations and let them be scalarized again, but there's really no point.
15161 if (!Subtarget.hasVInstructions())
15162 return SDValue();
15163
15164 const SDLoc DL(N);
15165 const EVT VT = N->getValueType(0);
15166 const unsigned Opc = N->getOpcode();
15167
15168 // For FADD, we only handle the case with reassociation allowed. We
15169 // could handle strict reduction order, but at the moment, there's no
15170 // known reason to, and the complexity isn't worth it.
15171 // TODO: Handle fminnum and fmaxnum here
15172 if (!VT.isInteger() &&
15173 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
15174 return SDValue();
15175
15176 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15177 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15178 "Inconsistent mappings");
15179 SDValue LHS = N->getOperand(0);
15180 SDValue RHS = N->getOperand(1);
15181
15182 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15183 return SDValue();
15184
15185 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15186 std::swap(LHS, RHS);
15187
15188 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15189 !isa<ConstantSDNode>(RHS.getOperand(1)))
15190 return SDValue();
15191
15192 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15193 SDValue SrcVec = RHS.getOperand(0);
15194 EVT SrcVecVT = SrcVec.getValueType();
15195 assert(SrcVecVT.getVectorElementType() == VT);
15196 if (SrcVecVT.isScalableVector())
15197 return SDValue();
15198
15199 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15200 return SDValue();
15201
15202 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15203 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15204 // root of our reduction tree. TODO: We could extend this to any two
15205 // adjacent aligned constant indices if desired.
15206 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15207 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15208 uint64_t LHSIdx =
15209 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15210 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15211 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15212 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15213 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15214 }
15215 }
15216
15217 // Match (binop (reduce (extract_subvector V, 0),
15218 // (extract_vector_elt V, sizeof(SubVec))))
15219 // into a reduction of one more element from the original vector V.
15220 if (LHS.getOpcode() != ReduceOpc)
15221 return SDValue();
15222
15223 SDValue ReduceVec = LHS.getOperand(0);
15224 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15225 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15226 isNullConstant(ReduceVec.getOperand(1)) &&
15227 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15228 // For illegal types (e.g. 3xi32), most will be combined again into a
15229 // wider (hopefully legal) type. If this is a terminal state, we are
15230 // relying on type legalization here to produce something reasonable
15231 // and this lowering quality could probably be improved. (TODO)
15232 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15233 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15234 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15235 ReduceVec->getFlags() & N->getFlags());
15236 }
15237
15238 return SDValue();
15239}
15240
15241
15242// Try to fold (<bop> x, (reduction.<bop> vec, start))
15244 const RISCVSubtarget &Subtarget) {
15245 auto BinOpToRVVReduce = [](unsigned Opc) {
15246 switch (Opc) {
15247 default:
15248 llvm_unreachable("Unhandled binary to transform reduction");
15249 case ISD::ADD:
15250 return RISCVISD::VECREDUCE_ADD_VL;
15251 case ISD::UMAX:
15252 return RISCVISD::VECREDUCE_UMAX_VL;
15253 case ISD::SMAX:
15254 return RISCVISD::VECREDUCE_SMAX_VL;
15255 case ISD::UMIN:
15256 return RISCVISD::VECREDUCE_UMIN_VL;
15257 case ISD::SMIN:
15258 return RISCVISD::VECREDUCE_SMIN_VL;
15259 case ISD::AND:
15260 return RISCVISD::VECREDUCE_AND_VL;
15261 case ISD::OR:
15262 return RISCVISD::VECREDUCE_OR_VL;
15263 case ISD::XOR:
15264 return RISCVISD::VECREDUCE_XOR_VL;
15265 case ISD::FADD:
15266 return RISCVISD::VECREDUCE_FADD_VL;
15267 case ISD::FMAXNUM:
15268 return RISCVISD::VECREDUCE_FMAX_VL;
15269 case ISD::FMINNUM:
15270 return RISCVISD::VECREDUCE_FMIN_VL;
15271 }
15272 };
15273
15274 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15275 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15276 isNullConstant(V.getOperand(1)) &&
15277 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15278 };
15279
15280 unsigned Opc = N->getOpcode();
15281 unsigned ReduceIdx;
15282 if (IsReduction(N->getOperand(0), Opc))
15283 ReduceIdx = 0;
15284 else if (IsReduction(N->getOperand(1), Opc))
15285 ReduceIdx = 1;
15286 else
15287 return SDValue();
15288
15289 // Skip if FADD disallows reassociation but the combiner needs.
15290 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15291 return SDValue();
15292
15293 SDValue Extract = N->getOperand(ReduceIdx);
15294 SDValue Reduce = Extract.getOperand(0);
15295 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15296 return SDValue();
15297
15298 SDValue ScalarV = Reduce.getOperand(2);
15299 EVT ScalarVT = ScalarV.getValueType();
15300 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15301 ScalarV.getOperand(0)->isUndef() &&
15302 isNullConstant(ScalarV.getOperand(2)))
15303 ScalarV = ScalarV.getOperand(1);
15304
15305 // Make sure that ScalarV is a splat with VL=1.
15306 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15307 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15308 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15309 return SDValue();
15310
15311 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15312 return SDValue();
15313
15314 // Check the scalar of ScalarV is neutral element
15315 // TODO: Deal with value other than neutral element.
15316 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15317 0))
15318 return SDValue();
15319
15320 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15321 // FIXME: We might be able to improve this if operand 0 is undef.
15322 if (!isNonZeroAVL(Reduce.getOperand(5)))
15323 return SDValue();
15324
15325 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15326
15327 SDLoc DL(N);
15328 SDValue NewScalarV =
15329 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15330 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15331
15332 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15333 if (ScalarVT != ScalarV.getValueType())
15334 NewScalarV =
15335 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15336
15337 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15338 NewScalarV, Reduce.getOperand(3),
15339 Reduce.getOperand(4), Reduce.getOperand(5)};
15340 SDValue NewReduce =
15341 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15342 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15343 Extract.getOperand(1));
15344}
15345
15346// Optimize (add (shl x, c0), (shl y, c1)) ->
15347// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15348// or
15349// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15351 const RISCVSubtarget &Subtarget) {
15352 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15353 // extension.
15354 if (!Subtarget.hasShlAdd(3))
15355 return SDValue();
15356
15357 // Skip for vector types and larger types.
15358 EVT VT = N->getValueType(0);
15359 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15360 return SDValue();
15361
15362 // The two operand nodes must be SHL and have no other use.
15363 SDValue N0 = N->getOperand(0);
15364 SDValue N1 = N->getOperand(1);
15365 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15366 !N0->hasOneUse() || !N1->hasOneUse())
15367 return SDValue();
15368
15369 // Check c0 and c1.
15370 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15371 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15372 if (!N0C || !N1C)
15373 return SDValue();
15374 int64_t C0 = N0C->getSExtValue();
15375 int64_t C1 = N1C->getSExtValue();
15376 if (C0 <= 0 || C1 <= 0)
15377 return SDValue();
15378
15379 int64_t Diff = std::abs(C0 - C1);
15380 if (!Subtarget.hasShlAdd(Diff))
15381 return SDValue();
15382
15383 // Build nodes.
15384 SDLoc DL(N);
15385 int64_t Bits = std::min(C0, C1);
15386 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15387 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15388 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15389 DAG.getConstant(Diff, DL, VT), NS);
15390 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15391}
15392
15393// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15394// or 3.
15396 SelectionDAG &DAG) {
15397 using namespace llvm::SDPatternMatch;
15398
15399 // Looking for a reg-reg add and not an addi.
15400 if (isa<ConstantSDNode>(N->getOperand(1)))
15401 return SDValue();
15402
15403 // Based on testing it seems that performance degrades if the ADDI has
15404 // more than 2 uses.
15405 if (AddI->use_size() > 2)
15406 return SDValue();
15407
15408 APInt AddVal;
15409 SDValue SHLVal;
15410 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15411 return SDValue();
15412
15413 APInt VShift;
15414 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15415 return SDValue();
15416
15417 if (VShift.slt(1) || VShift.sgt(3))
15418 return SDValue();
15419
15420 SDLoc DL(N);
15421 EVT VT = N->getValueType(0);
15422 // The shift must be positive but the add can be signed.
15423 uint64_t ShlConst = VShift.getZExtValue();
15424 int64_t AddConst = AddVal.getSExtValue();
15425
15426 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15427 DAG.getConstant(ShlConst, DL, VT), Other);
15428 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15429 DAG.getSignedConstant(AddConst, DL, VT));
15430}
15431
15432// Optimize (add (add (shl x, c0), c1), y) ->
15433// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15435 const RISCVSubtarget &Subtarget) {
15436 // Perform this optimization only in the zba extension.
15437 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15438 return SDValue();
15439
15440 // Skip for vector types and larger types.
15441 EVT VT = N->getValueType(0);
15442 if (VT != Subtarget.getXLenVT())
15443 return SDValue();
15444
15445 SDValue AddI = N->getOperand(0);
15446 SDValue Other = N->getOperand(1);
15447 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15448 return V;
15449 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15450 return V;
15451 return SDValue();
15452}
15453
15454// Combine a constant select operand into its use:
15455//
15456// (and (select cond, -1, c), x)
15457// -> (select cond, x, (and x, c)) [AllOnes=1]
15458// (or (select cond, 0, c), x)
15459// -> (select cond, x, (or x, c)) [AllOnes=0]
15460// (xor (select cond, 0, c), x)
15461// -> (select cond, x, (xor x, c)) [AllOnes=0]
15462// (add (select cond, 0, c), x)
15463// -> (select cond, x, (add x, c)) [AllOnes=0]
15464// (sub x, (select cond, 0, c))
15465// -> (select cond, x, (sub x, c)) [AllOnes=0]
15467 SelectionDAG &DAG, bool AllOnes,
15468 const RISCVSubtarget &Subtarget) {
15469 EVT VT = N->getValueType(0);
15470
15471 // Skip vectors.
15472 if (VT.isVector())
15473 return SDValue();
15474
15475 if (!Subtarget.hasConditionalMoveFusion()) {
15476 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15477 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15478 return SDValue();
15479
15480 // Maybe harmful when condition code has multiple use.
15481 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15482 return SDValue();
15483
15484 // Maybe harmful when VT is wider than XLen.
15485 if (VT.getSizeInBits() > Subtarget.getXLen())
15486 return SDValue();
15487 }
15488
15489 if ((Slct.getOpcode() != ISD::SELECT &&
15490 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15491 !Slct.hasOneUse())
15492 return SDValue();
15493
15494 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15496 };
15497
15498 bool SwapSelectOps;
15499 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15500 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15501 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15502 SDValue NonConstantVal;
15503 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15504 SwapSelectOps = false;
15505 NonConstantVal = FalseVal;
15506 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15507 SwapSelectOps = true;
15508 NonConstantVal = TrueVal;
15509 } else
15510 return SDValue();
15511
15512 // Slct is now know to be the desired identity constant when CC is true.
15513 TrueVal = OtherOp;
15514 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15515 // Unless SwapSelectOps says the condition should be false.
15516 if (SwapSelectOps)
15517 std::swap(TrueVal, FalseVal);
15518
15519 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15520 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15521 {Slct.getOperand(0), Slct.getOperand(1),
15522 Slct.getOperand(2), TrueVal, FalseVal});
15523
15524 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15525 {Slct.getOperand(0), TrueVal, FalseVal});
15526}
15527
15528// Attempt combineSelectAndUse on each operand of a commutative operator N.
15530 bool AllOnes,
15531 const RISCVSubtarget &Subtarget) {
15532 SDValue N0 = N->getOperand(0);
15533 SDValue N1 = N->getOperand(1);
15534 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15535 return Result;
15536 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15537 return Result;
15538 return SDValue();
15539}
15540
15541// Transform (add (mul x, c0), c1) ->
15542// (add (mul (add x, c1/c0), c0), c1%c0).
15543// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15544// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15545// to an infinite loop in DAGCombine if transformed.
15546// Or transform (add (mul x, c0), c1) ->
15547// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15548// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15549// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15550// lead to an infinite loop in DAGCombine if transformed.
15551// Or transform (add (mul x, c0), c1) ->
15552// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15553// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15554// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15555// lead to an infinite loop in DAGCombine if transformed.
15556// Or transform (add (mul x, c0), c1) ->
15557// (mul (add x, c1/c0), c0).
15558// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15560 const RISCVSubtarget &Subtarget) {
15561 // Skip for vector types and larger types.
15562 EVT VT = N->getValueType(0);
15563 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15564 return SDValue();
15565 // The first operand node must be a MUL and has no other use.
15566 SDValue N0 = N->getOperand(0);
15567 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15568 return SDValue();
15569 // Check if c0 and c1 match above conditions.
15570 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15571 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15572 if (!N0C || !N1C)
15573 return SDValue();
15574 // If N0C has multiple uses it's possible one of the cases in
15575 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15576 // in an infinite loop.
15577 if (!N0C->hasOneUse())
15578 return SDValue();
15579 int64_t C0 = N0C->getSExtValue();
15580 int64_t C1 = N1C->getSExtValue();
15581 int64_t CA, CB;
15582 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15583 return SDValue();
15584 // Search for proper CA (non-zero) and CB that both are simm12.
15585 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15586 !isInt<12>(C0 * (C1 / C0))) {
15587 CA = C1 / C0;
15588 CB = C1 % C0;
15589 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15590 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15591 CA = C1 / C0 + 1;
15592 CB = C1 % C0 - C0;
15593 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15594 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15595 CA = C1 / C0 - 1;
15596 CB = C1 % C0 + C0;
15597 } else
15598 return SDValue();
15599 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15600 SDLoc DL(N);
15601 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15602 DAG.getSignedConstant(CA, DL, VT));
15603 SDValue New1 =
15604 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15605 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15606}
15607
15608// add (zext, zext) -> zext (add (zext, zext))
15609// sub (zext, zext) -> sext (sub (zext, zext))
15610// mul (zext, zext) -> zext (mul (zext, zext))
15611// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15612// udiv (zext, zext) -> zext (udiv (zext, zext))
15613// srem (zext, zext) -> zext (srem (zext, zext))
15614// urem (zext, zext) -> zext (urem (zext, zext))
15615//
15616// where the sum of the extend widths match, and the the range of the bin op
15617// fits inside the width of the narrower bin op. (For profitability on rvv, we
15618// use a power of two for both inner and outer extend.)
15620
15621 EVT VT = N->getValueType(0);
15622 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15623 return SDValue();
15624
15625 SDValue N0 = N->getOperand(0);
15626 SDValue N1 = N->getOperand(1);
15628 return SDValue();
15629 if (!N0.hasOneUse() || !N1.hasOneUse())
15630 return SDValue();
15631
15632 SDValue Src0 = N0.getOperand(0);
15633 SDValue Src1 = N1.getOperand(0);
15634 EVT SrcVT = Src0.getValueType();
15635 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15636 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15637 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15638 return SDValue();
15639
15640 LLVMContext &C = *DAG.getContext();
15642 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15643
15644 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15645 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15646
15647 // Src0 and Src1 are zero extended, so they're always positive if signed.
15648 //
15649 // sub can produce a negative from two positive operands, so it needs sign
15650 // extended. Other nodes produce a positive from two positive operands, so
15651 // zero extend instead.
15652 unsigned OuterExtend =
15653 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15654
15655 return DAG.getNode(
15656 OuterExtend, SDLoc(N), VT,
15657 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15658}
15659
15660// Try to turn (add (xor bool, 1) -1) into (neg bool).
15662 SDValue N0 = N->getOperand(0);
15663 SDValue N1 = N->getOperand(1);
15664 EVT VT = N->getValueType(0);
15665 SDLoc DL(N);
15666
15667 // RHS should be -1.
15668 if (!isAllOnesConstant(N1))
15669 return SDValue();
15670
15671 // Look for (xor X, 1).
15672 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15673 return SDValue();
15674
15675 // First xor input should be 0 or 1.
15677 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15678 return SDValue();
15679
15680 // Emit a negate of the setcc.
15681 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15682 N0.getOperand(0));
15683}
15684
15687 const RISCVSubtarget &Subtarget) {
15688 SelectionDAG &DAG = DCI.DAG;
15689 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15690 return V;
15691 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15692 return V;
15693 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15694 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15695 return V;
15696 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15697 return V;
15698 }
15699 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15700 return V;
15701 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15702 return V;
15703 if (SDValue V = combineBinOpOfZExt(N, DAG))
15704 return V;
15705
15706 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15707 // (select lhs, rhs, cc, x, (add x, y))
15708 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15709}
15710
15711// Try to turn a sub boolean RHS and constant LHS into an addi.
15713 SDValue N0 = N->getOperand(0);
15714 SDValue N1 = N->getOperand(1);
15715 EVT VT = N->getValueType(0);
15716 SDLoc DL(N);
15717
15718 // Require a constant LHS.
15719 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15720 if (!N0C)
15721 return SDValue();
15722
15723 // All our optimizations involve subtracting 1 from the immediate and forming
15724 // an ADDI. Make sure the new immediate is valid for an ADDI.
15725 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15726 if (!ImmValMinus1.isSignedIntN(12))
15727 return SDValue();
15728
15729 SDValue NewLHS;
15730 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15731 // (sub constant, (setcc x, y, eq/neq)) ->
15732 // (add (setcc x, y, neq/eq), constant - 1)
15733 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15734 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15735 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15736 return SDValue();
15737 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15738 NewLHS =
15739 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15740 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15741 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15742 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15743 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15744 NewLHS = N1.getOperand(0);
15745 } else
15746 return SDValue();
15747
15748 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15749 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15750}
15751
15752// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15753// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15754// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15755// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15757 const RISCVSubtarget &Subtarget) {
15758 if (!Subtarget.hasStdExtZbb())
15759 return SDValue();
15760
15761 EVT VT = N->getValueType(0);
15762
15763 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15764 return SDValue();
15765
15766 SDValue N0 = N->getOperand(0);
15767 SDValue N1 = N->getOperand(1);
15768
15769 if (N0->getOpcode() != ISD::SHL)
15770 return SDValue();
15771
15772 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15773 if (!ShAmtCLeft)
15774 return SDValue();
15775 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15776
15777 if (ShiftedAmount >= 8)
15778 return SDValue();
15779
15780 SDValue LeftShiftOperand = N0->getOperand(0);
15781 SDValue RightShiftOperand = N1;
15782
15783 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15784 if (N1->getOpcode() != ISD::SRL)
15785 return SDValue();
15786 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15787 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15788 return SDValue();
15789 RightShiftOperand = N1.getOperand(0);
15790 }
15791
15792 // At least one shift should have a single use.
15793 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15794 return SDValue();
15795
15796 if (LeftShiftOperand != RightShiftOperand)
15797 return SDValue();
15798
15799 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15800 Mask <<= ShiftedAmount;
15801 // Check that X has indeed the right shape (only the Y-th bit can be set in
15802 // every byte).
15803 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15804 return SDValue();
15805
15806 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15807}
15808
15810 const RISCVSubtarget &Subtarget) {
15811 if (SDValue V = combineSubOfBoolean(N, DAG))
15812 return V;
15813
15814 EVT VT = N->getValueType(0);
15815 SDValue N0 = N->getOperand(0);
15816 SDValue N1 = N->getOperand(1);
15817 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15818 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15819 isNullConstant(N1.getOperand(1)) &&
15820 N1.getValueType() == N1.getOperand(0).getValueType()) {
15821 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15822 if (CCVal == ISD::SETLT) {
15823 SDLoc DL(N);
15824 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15825 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15826 DAG.getConstant(ShAmt, DL, VT));
15827 }
15828 }
15829
15830 if (SDValue V = combineBinOpOfZExt(N, DAG))
15831 return V;
15832 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15833 return V;
15834
15835 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15836 // (select lhs, rhs, cc, x, (sub x, y))
15837 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15838}
15839
15840// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15841// Legalizing setcc can introduce xors like this. Doing this transform reduces
15842// the number of xors and may allow the xor to fold into a branch condition.
15844 SDValue N0 = N->getOperand(0);
15845 SDValue N1 = N->getOperand(1);
15846 bool IsAnd = N->getOpcode() == ISD::AND;
15847
15848 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15849 return SDValue();
15850
15851 if (!N0.hasOneUse() || !N1.hasOneUse())
15852 return SDValue();
15853
15854 SDValue N01 = N0.getOperand(1);
15855 SDValue N11 = N1.getOperand(1);
15856
15857 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15858 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15859 // operation is And, allow one of the Xors to use -1.
15860 if (isOneConstant(N01)) {
15861 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15862 return SDValue();
15863 } else if (isOneConstant(N11)) {
15864 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15865 if (!(IsAnd && isAllOnesConstant(N01)))
15866 return SDValue();
15867 } else
15868 return SDValue();
15869
15870 EVT VT = N->getValueType(0);
15871
15872 SDValue N00 = N0.getOperand(0);
15873 SDValue N10 = N1.getOperand(0);
15874
15875 // The LHS of the xors needs to be 0/1.
15877 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15878 return SDValue();
15879
15880 // Invert the opcode and insert a new xor.
15881 SDLoc DL(N);
15882 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15883 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15884 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15885}
15886
15887// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15888// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15889// value to an unsigned value. This will be lowered to vmax and series of
15890// vnclipu instructions later. This can be extended to other truncated types
15891// other than i8 by replacing 256 and 255 with the equivalent constants for the
15892// type.
15894 EVT VT = N->getValueType(0);
15895 SDValue N0 = N->getOperand(0);
15896 EVT SrcVT = N0.getValueType();
15897
15898 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15899 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15900 return SDValue();
15901
15902 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15903 return SDValue();
15904
15905 SDValue Cond = N0.getOperand(0);
15906 SDValue True = N0.getOperand(1);
15907 SDValue False = N0.getOperand(2);
15908
15909 if (Cond.getOpcode() != ISD::SETCC)
15910 return SDValue();
15911
15912 // FIXME: Support the version of this pattern with the select operands
15913 // swapped.
15914 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15915 if (CCVal != ISD::SETULT)
15916 return SDValue();
15917
15918 SDValue CondLHS = Cond.getOperand(0);
15919 SDValue CondRHS = Cond.getOperand(1);
15920
15921 if (CondLHS != True)
15922 return SDValue();
15923
15924 unsigned ScalarBits = VT.getScalarSizeInBits();
15925
15926 // FIXME: Support other constants.
15927 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15928 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15929 return SDValue();
15930
15931 if (False.getOpcode() != ISD::SIGN_EXTEND)
15932 return SDValue();
15933
15934 False = False.getOperand(0);
15935
15936 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15937 return SDValue();
15938
15939 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15940 if (!FalseRHSC || !FalseRHSC->isZero())
15941 return SDValue();
15942
15943 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15944 if (CCVal2 != ISD::SETGT)
15945 return SDValue();
15946
15947 // Emit the signed to unsigned saturation pattern.
15948 SDLoc DL(N);
15949 SDValue Max =
15950 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15951 SDValue Min =
15952 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15953 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15954 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15955}
15956
15958 const RISCVSubtarget &Subtarget) {
15959 SDValue N0 = N->getOperand(0);
15960 EVT VT = N->getValueType(0);
15961
15962 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15963 // extending X. This is safe since we only need the LSB after the shift and
15964 // shift amounts larger than 31 would produce poison. If we wait until
15965 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
15966 // to use a BEXT instruction.
15967 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15968 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15969 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
15970 SDLoc DL(N0);
15971 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
15972 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
15973 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
15974 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
15975 }
15976
15977 return combineTruncSelectToSMaxUSat(N, DAG);
15978}
15979
15980// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15981// truncation. But RVV doesn't have truncation instructions for more than twice
15982// the bitwidth.
15983//
15984// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15985//
15986// vsetvli a0, zero, e32, m2, ta, ma
15987// vnsrl.wi v12, v8, 0
15988// vsetvli zero, zero, e16, m1, ta, ma
15989// vnsrl.wi v8, v12, 0
15990// vsetvli zero, zero, e8, mf2, ta, ma
15991// vnsrl.wi v8, v8, 0
15992//
15993// So reverse the combine so we generate an vmseq/vmsne again:
15994//
15995// and (lshr (trunc X), ShAmt), 1
15996// -->
15997// zext (icmp ne (and X, (1 << ShAmt)), 0)
15998//
15999// and (lshr (not (trunc X)), ShAmt), 1
16000// -->
16001// zext (icmp eq (and X, (1 << ShAmt)), 0)
16003 const RISCVSubtarget &Subtarget) {
16004 using namespace SDPatternMatch;
16005 SDLoc DL(N);
16006
16007 if (!Subtarget.hasVInstructions())
16008 return SDValue();
16009
16010 EVT VT = N->getValueType(0);
16011 if (!VT.isVector())
16012 return SDValue();
16013
16014 APInt ShAmt;
16015 SDValue Inner;
16016 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16017 m_One())))
16018 return SDValue();
16019
16020 SDValue X;
16021 bool IsNot;
16022 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16023 IsNot = true;
16024 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16025 IsNot = false;
16026 else
16027 return SDValue();
16028
16029 EVT WideVT = X.getValueType();
16030 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16031 return SDValue();
16032
16033 SDValue Res =
16034 DAG.getNode(ISD::AND, DL, WideVT, X,
16035 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16036 Res = DAG.getSetCC(DL,
16037 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16038 WideVT.getVectorElementCount()),
16039 Res, DAG.getConstant(0, DL, WideVT),
16040 IsNot ? ISD::SETEQ : ISD::SETNE);
16041 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16042}
16043
16046 SelectionDAG &DAG = DCI.DAG;
16047 if (N->getOpcode() != ISD::AND)
16048 return SDValue();
16049
16050 SDValue N0 = N->getOperand(0);
16051 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16052 return SDValue();
16053 if (!N0.hasOneUse())
16054 return SDValue();
16055
16058 return SDValue();
16059
16060 EVT LoadedVT = ALoad->getMemoryVT();
16061 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16062 if (!MaskConst)
16063 return SDValue();
16064 uint64_t Mask = MaskConst->getZExtValue();
16065 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16066 if (Mask != ExpectedMask)
16067 return SDValue();
16068
16069 SDValue ZextLoad = DAG.getAtomicLoad(
16070 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16071 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16072 DCI.CombineTo(N, ZextLoad);
16073 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16075 return SDValue(N, 0);
16076}
16077
16078// Combines two comparison operation and logic operation to one selection
16079// operation(min, max) and logic operation. Returns new constructed Node if
16080// conditions for optimization are satisfied.
16083 const RISCVSubtarget &Subtarget) {
16084 SelectionDAG &DAG = DCI.DAG;
16085
16086 SDValue N0 = N->getOperand(0);
16087 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16088 // extending X. This is safe since we only need the LSB after the shift and
16089 // shift amounts larger than 31 would produce poison. If we wait until
16090 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16091 // to use a BEXT instruction.
16092 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16093 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16094 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16095 N0.hasOneUse()) {
16096 SDLoc DL(N);
16097 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16098 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16099 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16100 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16101 DAG.getConstant(1, DL, MVT::i64));
16102 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16103 }
16104
16105 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16106 return V;
16107
16108 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16109 return V;
16110 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16111 return V;
16112 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16113 return V;
16114
16115 if (DCI.isAfterLegalizeDAG())
16116 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16117 return V;
16118
16119 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16120 // (select lhs, rhs, cc, x, (and x, y))
16121 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16122}
16123
16124// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16125// FIXME: Generalize to other binary operators with same operand.
16127 SelectionDAG &DAG) {
16128 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16129
16130 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16131 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16132 !N0.hasOneUse() || !N1.hasOneUse())
16133 return SDValue();
16134
16135 // Should have the same condition.
16136 SDValue Cond = N0.getOperand(1);
16137 if (Cond != N1.getOperand(1))
16138 return SDValue();
16139
16140 SDValue TrueV = N0.getOperand(0);
16141 SDValue FalseV = N1.getOperand(0);
16142
16143 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16144 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16145 !isOneConstant(TrueV.getOperand(1)) ||
16146 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16147 return SDValue();
16148
16149 EVT VT = N->getValueType(0);
16150 SDLoc DL(N);
16151
16152 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16153 Cond);
16154 SDValue NewN1 =
16155 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16156 SDValue NewOr =
16157 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16158 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16159}
16160
16161// (xor X, (xor (and X, C2), Y))
16162// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16163// where C2 is a shifted mask with width = Width and shift = ShAmt
16164// qc_insb might become qc.insb or qc.insbi depending on the operands.
16166 const RISCVSubtarget &Subtarget) {
16167 if (!Subtarget.hasVendorXqcibm())
16168 return SDValue();
16169
16170 using namespace SDPatternMatch;
16171
16172 SDValue Base, Inserted;
16173 APInt CMask;
16174 if (!sd_match(N, m_Xor(m_Value(Base),
16176 m_ConstInt(CMask))),
16177 m_Value(Inserted))))))
16178 return SDValue();
16179
16180 if (N->getValueType(0) != MVT::i32)
16181 return SDValue();
16182
16183 unsigned Width, ShAmt;
16184 if (!CMask.isShiftedMask(ShAmt, Width))
16185 return SDValue();
16186
16187 // Check if all zero bits in CMask are also zero in Inserted
16188 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16189 return SDValue();
16190
16191 SDLoc DL(N);
16192
16193 // `Inserted` needs to be right shifted before it is put into the
16194 // instruction.
16195 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16196 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16197
16198 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16199 DAG.getConstant(ShAmt, DL, MVT::i32)};
16200 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16201}
16202
16204 const RISCVSubtarget &Subtarget) {
16205 SelectionDAG &DAG = DCI.DAG;
16206
16207 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16208 return V;
16209 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16210 return V;
16211
16212 if (DCI.isAfterLegalizeDAG())
16213 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16214 return V;
16215
16216 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16217 // We may be able to pull a common operation out of the true and false value.
16218 SDValue N0 = N->getOperand(0);
16219 SDValue N1 = N->getOperand(1);
16220 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16221 return V;
16222 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16223 return V;
16224
16225 // fold (or (select cond, 0, y), x) ->
16226 // (select cond, x, (or x, y))
16227 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16228}
16229
16231 const RISCVSubtarget &Subtarget) {
16232 SDValue N0 = N->getOperand(0);
16233 SDValue N1 = N->getOperand(1);
16234
16235 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16236 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
16237 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16238 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16239 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16240 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16241 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16242 SDLoc DL(N);
16243 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16244 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16245 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16246 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16247 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16248 }
16249
16250 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16251 // NOTE: Assumes ROL being legal means ROLW is legal.
16252 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16253 if (N0.getOpcode() == RISCVISD::SLLW &&
16255 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16256 SDLoc DL(N);
16257 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16258 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16259 }
16260
16261 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16262 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16263 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16265 if (ConstN00 && CC == ISD::SETLT) {
16266 EVT VT = N0.getValueType();
16267 SDLoc DL(N0);
16268 const APInt &Imm = ConstN00->getAPIntValue();
16269 if ((Imm + 1).isSignedIntN(12))
16270 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16271 DAG.getConstant(Imm + 1, DL, VT), CC);
16272 }
16273 }
16274
16275 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16276 return V;
16277
16278 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16279 return V;
16280 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16281 return V;
16282
16283 // fold (xor (select cond, 0, y), x) ->
16284 // (select cond, x, (xor x, y))
16285 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16286}
16287
16288// Try to expand a multiply to a sequence of shifts and add/subs,
16289// for a machine without native mul instruction.
16291 uint64_t MulAmt) {
16292 SDLoc DL(N);
16293 EVT VT = N->getValueType(0);
16295
16296 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16297 SDValue N0 = N->getOperand(0);
16298
16299 // Find the Non-adjacent form of the multiplier.
16300 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16301 if (E & 1) {
16302 bool IsAdd = (E & 3) == 1;
16303 E -= IsAdd ? 1 : -1;
16304 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16305 DAG.getShiftAmountConstant(I, VT, DL));
16306 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16307 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16308 }
16309 }
16310
16311 return Result;
16312}
16313
16314// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16316 uint64_t MulAmt) {
16317 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16319 uint64_t ShiftAmt1;
16320 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16321 Op = ISD::SUB;
16322 ShiftAmt1 = MulAmt + MulAmtLowBit;
16323 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16324 Op = ISD::ADD;
16325 ShiftAmt1 = MulAmt - MulAmtLowBit;
16326 } else {
16327 return SDValue();
16328 }
16329 EVT VT = N->getValueType(0);
16330 SDLoc DL(N);
16331 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16332 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16333 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16334 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16335 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16336}
16337
16338// Try to expand a scalar multiply to a faster sequence.
16341 const RISCVSubtarget &Subtarget) {
16342
16343 EVT VT = N->getValueType(0);
16344
16345 // LI + MUL is usually smaller than the alternative sequence.
16347 return SDValue();
16348
16349 if (VT != Subtarget.getXLenVT())
16350 return SDValue();
16351
16352 bool ShouldExpandMul =
16353 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16354 !Subtarget.hasStdExtZmmul();
16355 if (!ShouldExpandMul)
16356 return SDValue();
16357
16358 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16359 if (!CNode)
16360 return SDValue();
16361 uint64_t MulAmt = CNode->getZExtValue();
16362
16363 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16364 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16365 return SDValue();
16366
16367 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16368 // We're adding additional uses of X here, and in principle, we should be freezing
16369 // X before doing so. However, adding freeze here causes real regressions, and no
16370 // other target properly freezes X in these cases either.
16371 SDValue X = N->getOperand(0);
16372
16373 if (Subtarget.hasShlAdd(3)) {
16374 for (uint64_t Divisor : {3, 5, 9}) {
16375 if (MulAmt % Divisor != 0)
16376 continue;
16377 uint64_t MulAmt2 = MulAmt / Divisor;
16378 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16379 if (isPowerOf2_64(MulAmt2)) {
16380 SDLoc DL(N);
16381 SDValue X = N->getOperand(0);
16382 // Put the shift first if we can fold a zext into the
16383 // shift forming a slli.uw.
16384 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16385 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16386 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16387 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16388 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16389 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16390 Shl);
16391 }
16392 // Otherwise, put rhe shl second so that it can fold with following
16393 // instructions (e.g. sext or add).
16394 SDValue Mul359 =
16395 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16396 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16397 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16398 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16399 }
16400
16401 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16402 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16403 SDLoc DL(N);
16404 SDValue Mul359 =
16405 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16406 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16407 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16408 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16409 Mul359);
16410 }
16411 }
16412
16413 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16414 // shXadd. First check if this a sum of two power of 2s because that's
16415 // easy. Then count how many zeros are up to the first bit.
16416 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16417 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16418 if (ScaleShift >= 1 && ScaleShift < 4) {
16419 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16420 SDLoc DL(N);
16421 SDValue Shift1 =
16422 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16423 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16424 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16425 }
16426 }
16427
16428 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16429 // This is the two instruction form, there are also three instruction
16430 // variants we could implement. e.g.
16431 // (2^(1,2,3) * 3,5,9 + 1) << C2
16432 // 2^(C1>3) * 3,5,9 +/- 1
16433 for (uint64_t Divisor : {3, 5, 9}) {
16434 uint64_t C = MulAmt - 1;
16435 if (C <= Divisor)
16436 continue;
16437 unsigned TZ = llvm::countr_zero(C);
16438 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16439 SDLoc DL(N);
16440 SDValue Mul359 =
16441 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16442 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16443 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16444 DAG.getConstant(TZ, DL, VT), X);
16445 }
16446 }
16447
16448 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16449 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16450 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16451 if (ScaleShift >= 1 && ScaleShift < 4) {
16452 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16453 SDLoc DL(N);
16454 SDValue Shift1 =
16455 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16456 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16457 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16458 DAG.getConstant(ScaleShift, DL, VT), X));
16459 }
16460 }
16461
16462 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16463 for (uint64_t Offset : {3, 5, 9}) {
16464 if (isPowerOf2_64(MulAmt + Offset)) {
16465 unsigned ShAmt = Log2_64(MulAmt + Offset);
16466 if (ShAmt >= VT.getSizeInBits())
16467 continue;
16468 SDLoc DL(N);
16469 SDValue Shift1 =
16470 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16471 SDValue Mul359 =
16472 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16473 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16474 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16475 }
16476 }
16477
16478 for (uint64_t Divisor : {3, 5, 9}) {
16479 if (MulAmt % Divisor != 0)
16480 continue;
16481 uint64_t MulAmt2 = MulAmt / Divisor;
16482 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16483 // of 25 which happen to be quite common.
16484 for (uint64_t Divisor2 : {3, 5, 9}) {
16485 if (MulAmt2 % Divisor2 != 0)
16486 continue;
16487 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16488 if (isPowerOf2_64(MulAmt3)) {
16489 SDLoc DL(N);
16490 SDValue Mul359A =
16491 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16492 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16493 SDValue Mul359B = DAG.getNode(
16494 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16495 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16496 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16497 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16498 }
16499 }
16500 }
16501 }
16502
16503 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16504 return V;
16505
16506 if (!Subtarget.hasStdExtZmmul())
16507 return expandMulToNAFSequence(N, DAG, MulAmt);
16508
16509 return SDValue();
16510}
16511
16512// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16513// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16514// Same for other equivalent types with other equivalent constants.
16516 EVT VT = N->getValueType(0);
16517 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16518
16519 // Do this for legal vectors unless they are i1 or i8 vectors.
16520 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16521 return SDValue();
16522
16523 if (N->getOperand(0).getOpcode() != ISD::AND ||
16524 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16525 return SDValue();
16526
16527 SDValue And = N->getOperand(0);
16528 SDValue Srl = And.getOperand(0);
16529
16530 APInt V1, V2, V3;
16531 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16532 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16534 return SDValue();
16535
16536 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16537 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16538 V3 != (HalfSize - 1))
16539 return SDValue();
16540
16541 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16542 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16543 VT.getVectorElementCount() * 2);
16544 SDLoc DL(N);
16545 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16546 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16547 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16548 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16549}
16550
16553 const RISCVSubtarget &Subtarget) {
16554 EVT VT = N->getValueType(0);
16555 if (!VT.isVector())
16556 return expandMul(N, DAG, DCI, Subtarget);
16557
16558 SDLoc DL(N);
16559 SDValue N0 = N->getOperand(0);
16560 SDValue N1 = N->getOperand(1);
16561 SDValue MulOper;
16562 unsigned AddSubOpc;
16563
16564 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16565 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16566 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16567 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16568 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16569 AddSubOpc = V->getOpcode();
16570 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16571 SDValue Opnd = V->getOperand(1);
16572 MulOper = V->getOperand(0);
16573 if (AddSubOpc == ISD::SUB)
16574 std::swap(Opnd, MulOper);
16575 if (isOneOrOneSplat(Opnd))
16576 return true;
16577 }
16578 return false;
16579 };
16580
16581 if (IsAddSubWith1(N0)) {
16582 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16583 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16584 }
16585
16586 if (IsAddSubWith1(N1)) {
16587 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16588 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16589 }
16590
16591 if (SDValue V = combineBinOpOfZExt(N, DAG))
16592 return V;
16593
16595 return V;
16596
16597 return SDValue();
16598}
16599
16600/// According to the property that indexed load/store instructions zero-extend
16601/// their indices, try to narrow the type of index operand.
16602static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16603 if (isIndexTypeSigned(IndexType))
16604 return false;
16605
16606 if (!N->hasOneUse())
16607 return false;
16608
16609 EVT VT = N.getValueType();
16610 SDLoc DL(N);
16611
16612 // In general, what we're doing here is seeing if we can sink a truncate to
16613 // a smaller element type into the expression tree building our index.
16614 // TODO: We can generalize this and handle a bunch more cases if useful.
16615
16616 // Narrow a buildvector to the narrowest element type. This requires less
16617 // work and less register pressure at high LMUL, and creates smaller constants
16618 // which may be cheaper to materialize.
16619 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16620 KnownBits Known = DAG.computeKnownBits(N);
16621 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16622 LLVMContext &C = *DAG.getContext();
16623 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16624 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16625 N = DAG.getNode(ISD::TRUNCATE, DL,
16626 VT.changeVectorElementType(ResultVT), N);
16627 return true;
16628 }
16629 }
16630
16631 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16632 if (N.getOpcode() != ISD::SHL)
16633 return false;
16634
16635 SDValue N0 = N.getOperand(0);
16636 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16637 N0.getOpcode() != RISCVISD::VZEXT_VL)
16638 return false;
16639 if (!N0->hasOneUse())
16640 return false;
16641
16642 APInt ShAmt;
16643 SDValue N1 = N.getOperand(1);
16644 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16645 return false;
16646
16647 SDValue Src = N0.getOperand(0);
16648 EVT SrcVT = Src.getValueType();
16649 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16650 unsigned ShAmtV = ShAmt.getZExtValue();
16651 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16652 NewElen = std::max(NewElen, 8U);
16653
16654 // Skip if NewElen is not narrower than the original extended type.
16655 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16656 return false;
16657
16658 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16659 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16660
16661 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16662 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16663 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16664 return true;
16665}
16666
16667/// Try to map an integer comparison with size > XLEN to vector instructions
16668/// before type legalization splits it up into chunks.
16669static SDValue
16671 const SDLoc &DL, SelectionDAG &DAG,
16672 const RISCVSubtarget &Subtarget) {
16673 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16674
16675 if (!Subtarget.hasVInstructions())
16676 return SDValue();
16677
16678 MVT XLenVT = Subtarget.getXLenVT();
16679 EVT OpVT = X.getValueType();
16680 // We're looking for an oversized integer equality comparison.
16681 if (!OpVT.isScalarInteger())
16682 return SDValue();
16683
16684 unsigned OpSize = OpVT.getSizeInBits();
16685 // The size should be larger than XLen and smaller than the maximum vector
16686 // size.
16687 if (OpSize <= Subtarget.getXLen() ||
16688 OpSize > Subtarget.getRealMinVLen() *
16690 return SDValue();
16691
16692 // Don't perform this combine if constructing the vector will be expensive.
16693 auto IsVectorBitCastCheap = [](SDValue X) {
16695 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16696 X.getOpcode() == ISD::LOAD;
16697 };
16698 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16699 return SDValue();
16700
16702 Attribute::NoImplicitFloat))
16703 return SDValue();
16704
16705 // Bail out for non-byte-sized types.
16706 if (!OpVT.isByteSized())
16707 return SDValue();
16708
16709 unsigned VecSize = OpSize / 8;
16710 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16711 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16712
16713 SDValue VecX = DAG.getBitcast(VecVT, X);
16714 SDValue VecY = DAG.getBitcast(VecVT, Y);
16715 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16716 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16717
16718 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16719 DAG.getCondCode(ISD::SETNE), Mask, VL);
16720 return DAG.getSetCC(DL, VT,
16721 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16722 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16723 VL),
16724 DAG.getConstant(0, DL, XLenVT), CC);
16725}
16726
16729 const RISCVSubtarget &Subtarget) {
16730 SelectionDAG &DAG = DCI.DAG;
16731 SDLoc dl(N);
16732 SDValue N0 = N->getOperand(0);
16733 SDValue N1 = N->getOperand(1);
16734 EVT VT = N->getValueType(0);
16735 EVT OpVT = N0.getValueType();
16736
16737 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16738 // Looking for an equality compare.
16739 if (!isIntEqualitySetCC(Cond))
16740 return SDValue();
16741
16742 if (SDValue V =
16743 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16744 return V;
16745
16746 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16747 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16749 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16750 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16751 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16752 AndRHSC.isNegatedPowerOf2()) {
16753 unsigned ShiftBits = AndRHSC.countr_zero();
16754 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16755 DAG.getConstant(ShiftBits, dl, OpVT));
16756 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16757 }
16758
16759 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16760 // comparing with constants other than 0 if the constant can be folded into
16761 // addi or xori after shifting.
16762 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16763 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16764 if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
16765 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16766 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16767 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16768 if (NewC >= -2048 && NewC <= 2048) {
16769 SDValue SExt =
16770 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16771 DAG.getValueType(MVT::i32));
16772 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16773 DAG.getConstant(ShiftBits, dl, OpVT));
16774 return DAG.getSetCC(dl, VT, Shift,
16775 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16776 }
16777 }
16778 }
16779
16780 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16781 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16782 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16783 // sext_inreg can become a sext.w instead of a shift pair.
16784 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16785 return SDValue();
16786
16787 // RHS needs to be a constant.
16788 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16789 if (!N1C)
16790 return SDValue();
16791
16792 // LHS needs to be (and X, 0xffffffff).
16793 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16795 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16796 return SDValue();
16797
16798 // Don't do this if the sign bit is provably zero, it will be turned back into
16799 // an AND.
16800 APInt SignMask = APInt::getOneBitSet(64, 31);
16801 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16802 return SDValue();
16803
16804 const APInt &C1 = N1C->getAPIntValue();
16805
16806 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16807 // to be equal.
16808 if (C1.getActiveBits() > 32)
16809 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16810
16811 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16812 N0.getOperand(0), DAG.getValueType(MVT::i32));
16813 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16814 dl, OpVT), Cond);
16815}
16816
16817static SDValue
16819 const RISCVSubtarget &Subtarget) {
16820 SelectionDAG &DAG = DCI.DAG;
16821 SDValue Src = N->getOperand(0);
16822 EVT VT = N->getValueType(0);
16823 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16824 unsigned Opc = Src.getOpcode();
16825 SDLoc DL(N);
16826
16827 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16828 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16829 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16830 Subtarget.hasStdExtZfhmin())
16831 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16832
16833 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16834 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16835 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16836 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16837 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16838 Src.getOperand(1));
16839
16840 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16841 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16842 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16843
16844 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16845 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16846 isAllOnesConstant(Src.getOperand(1)) &&
16847 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16848 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16849 DAG.getAllOnesConstant(DL, VT));
16850
16851 return SDValue();
16852}
16853
16854namespace {
16855// Forward declaration of the structure holding the necessary information to
16856// apply a combine.
16857struct CombineResult;
16858
16859enum ExtKind : uint8_t {
16860 ZExt = 1 << 0,
16861 SExt = 1 << 1,
16862 FPExt = 1 << 2,
16863 BF16Ext = 1 << 3
16864};
16865/// Helper class for folding sign/zero extensions.
16866/// In particular, this class is used for the following combines:
16867/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16868/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16869/// mul | mul_vl -> vwmul(u) | vwmul_su
16870/// shl | shl_vl -> vwsll
16871/// fadd -> vfwadd | vfwadd_w
16872/// fsub -> vfwsub | vfwsub_w
16873/// fmul -> vfwmul
16874/// An object of this class represents an operand of the operation we want to
16875/// combine.
16876/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16877/// NodeExtensionHelper for `a` and one for `b`.
16878///
16879/// This class abstracts away how the extension is materialized and
16880/// how its number of users affect the combines.
16881///
16882/// In particular:
16883/// - VWADD_W is conceptually == add(op0, sext(op1))
16884/// - VWADDU_W == add(op0, zext(op1))
16885/// - VWSUB_W == sub(op0, sext(op1))
16886/// - VWSUBU_W == sub(op0, zext(op1))
16887/// - VFWADD_W == fadd(op0, fpext(op1))
16888/// - VFWSUB_W == fsub(op0, fpext(op1))
16889/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16890/// zext|sext(smaller_value).
16891struct NodeExtensionHelper {
16892 /// Records if this operand is like being zero extended.
16893 bool SupportsZExt;
16894 /// Records if this operand is like being sign extended.
16895 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16896 /// instance, a splat constant (e.g., 3), would support being both sign and
16897 /// zero extended.
16898 bool SupportsSExt;
16899 /// Records if this operand is like being floating point extended.
16900 bool SupportsFPExt;
16901 /// Records if this operand is extended from bf16.
16902 bool SupportsBF16Ext;
16903 /// This boolean captures whether we care if this operand would still be
16904 /// around after the folding happens.
16905 bool EnforceOneUse;
16906 /// Original value that this NodeExtensionHelper represents.
16907 SDValue OrigOperand;
16908
16909 /// Get the value feeding the extension or the value itself.
16910 /// E.g., for zext(a), this would return a.
16911 SDValue getSource() const {
16912 switch (OrigOperand.getOpcode()) {
16913 case ISD::ZERO_EXTEND:
16914 case ISD::SIGN_EXTEND:
16915 case RISCVISD::VSEXT_VL:
16916 case RISCVISD::VZEXT_VL:
16917 case RISCVISD::FP_EXTEND_VL:
16918 return OrigOperand.getOperand(0);
16919 default:
16920 return OrigOperand;
16921 }
16922 }
16923
16924 /// Check if this instance represents a splat.
16925 bool isSplat() const {
16926 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16927 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16928 }
16929
16930 /// Get the extended opcode.
16931 unsigned getExtOpc(ExtKind SupportsExt) const {
16932 switch (SupportsExt) {
16933 case ExtKind::SExt:
16934 return RISCVISD::VSEXT_VL;
16935 case ExtKind::ZExt:
16936 return RISCVISD::VZEXT_VL;
16937 case ExtKind::FPExt:
16938 case ExtKind::BF16Ext:
16939 return RISCVISD::FP_EXTEND_VL;
16940 }
16941 llvm_unreachable("Unknown ExtKind enum");
16942 }
16943
16944 /// Get or create a value that can feed \p Root with the given extension \p
16945 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16946 /// operand. \see ::getSource().
16947 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16948 const RISCVSubtarget &Subtarget,
16949 std::optional<ExtKind> SupportsExt) const {
16950 if (!SupportsExt.has_value())
16951 return OrigOperand;
16952
16953 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16954
16955 SDValue Source = getSource();
16956 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16957 if (Source.getValueType() == NarrowVT)
16958 return Source;
16959
16960 unsigned ExtOpc = getExtOpc(*SupportsExt);
16961
16962 // If we need an extension, we should be changing the type.
16963 SDLoc DL(OrigOperand);
16964 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16965 switch (OrigOperand.getOpcode()) {
16966 case ISD::ZERO_EXTEND:
16967 case ISD::SIGN_EXTEND:
16968 case RISCVISD::VSEXT_VL:
16969 case RISCVISD::VZEXT_VL:
16970 case RISCVISD::FP_EXTEND_VL:
16971 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
16972 case ISD::SPLAT_VECTOR:
16973 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
16974 case RISCVISD::VMV_V_X_VL:
16975 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
16976 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
16977 case RISCVISD::VFMV_V_F_VL:
16978 Source = Source.getOperand(1);
16979 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
16980 Source = Source.getOperand(0);
16981 assert(Source.getValueType() == NarrowVT.getVectorElementType());
16982 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
16983 DAG.getUNDEF(NarrowVT), Source, VL);
16984 default:
16985 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
16986 // and that operand should already have the right NarrowVT so no
16987 // extension should be required at this point.
16988 llvm_unreachable("Unsupported opcode");
16989 }
16990 }
16991
16992 /// Helper function to get the narrow type for \p Root.
16993 /// The narrow type is the type of \p Root where we divided the size of each
16994 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
16995 /// \pre Both the narrow type and the original type should be legal.
16996 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
16997 MVT VT = Root->getSimpleValueType(0);
16998
16999 // Determine the narrow size.
17000 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17001
17002 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17003 : SupportsExt == ExtKind::FPExt
17004 ? MVT::getFloatingPointVT(NarrowSize)
17005 : MVT::getIntegerVT(NarrowSize);
17006
17007 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17008 "Trying to extend something we can't represent");
17009 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17010 return NarrowVT;
17011 }
17012
17013 /// Get the opcode to materialize:
17014 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17015 static unsigned getSExtOpcode(unsigned Opcode) {
17016 switch (Opcode) {
17017 case ISD::ADD:
17018 case RISCVISD::ADD_VL:
17019 case RISCVISD::VWADD_W_VL:
17020 case RISCVISD::VWADDU_W_VL:
17021 case ISD::OR:
17022 case RISCVISD::OR_VL:
17023 return RISCVISD::VWADD_VL;
17024 case ISD::SUB:
17025 case RISCVISD::SUB_VL:
17026 case RISCVISD::VWSUB_W_VL:
17027 case RISCVISD::VWSUBU_W_VL:
17028 return RISCVISD::VWSUB_VL;
17029 case ISD::MUL:
17030 case RISCVISD::MUL_VL:
17031 return RISCVISD::VWMUL_VL;
17032 default:
17033 llvm_unreachable("Unexpected opcode");
17034 }
17035 }
17036
17037 /// Get the opcode to materialize:
17038 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17039 static unsigned getZExtOpcode(unsigned Opcode) {
17040 switch (Opcode) {
17041 case ISD::ADD:
17042 case RISCVISD::ADD_VL:
17043 case RISCVISD::VWADD_W_VL:
17044 case RISCVISD::VWADDU_W_VL:
17045 case ISD::OR:
17046 case RISCVISD::OR_VL:
17047 return RISCVISD::VWADDU_VL;
17048 case ISD::SUB:
17049 case RISCVISD::SUB_VL:
17050 case RISCVISD::VWSUB_W_VL:
17051 case RISCVISD::VWSUBU_W_VL:
17052 return RISCVISD::VWSUBU_VL;
17053 case ISD::MUL:
17054 case RISCVISD::MUL_VL:
17055 return RISCVISD::VWMULU_VL;
17056 case ISD::SHL:
17057 case RISCVISD::SHL_VL:
17058 return RISCVISD::VWSLL_VL;
17059 default:
17060 llvm_unreachable("Unexpected opcode");
17061 }
17062 }
17063
17064 /// Get the opcode to materialize:
17065 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17066 static unsigned getFPExtOpcode(unsigned Opcode) {
17067 switch (Opcode) {
17068 case RISCVISD::FADD_VL:
17069 case RISCVISD::VFWADD_W_VL:
17070 return RISCVISD::VFWADD_VL;
17071 case RISCVISD::FSUB_VL:
17072 case RISCVISD::VFWSUB_W_VL:
17073 return RISCVISD::VFWSUB_VL;
17074 case RISCVISD::FMUL_VL:
17075 return RISCVISD::VFWMUL_VL;
17076 case RISCVISD::VFMADD_VL:
17077 return RISCVISD::VFWMADD_VL;
17078 case RISCVISD::VFMSUB_VL:
17079 return RISCVISD::VFWMSUB_VL;
17080 case RISCVISD::VFNMADD_VL:
17081 return RISCVISD::VFWNMADD_VL;
17082 case RISCVISD::VFNMSUB_VL:
17083 return RISCVISD::VFWNMSUB_VL;
17084 default:
17085 llvm_unreachable("Unexpected opcode");
17086 }
17087 }
17088
17089 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17090 /// newOpcode(a, b).
17091 static unsigned getSUOpcode(unsigned Opcode) {
17092 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17093 "SU is only supported for MUL");
17094 return RISCVISD::VWMULSU_VL;
17095 }
17096
17097 /// Get the opcode to materialize
17098 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17099 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17100 switch (Opcode) {
17101 case ISD::ADD:
17102 case RISCVISD::ADD_VL:
17103 case ISD::OR:
17104 case RISCVISD::OR_VL:
17105 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17106 : RISCVISD::VWADDU_W_VL;
17107 case ISD::SUB:
17108 case RISCVISD::SUB_VL:
17109 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17110 : RISCVISD::VWSUBU_W_VL;
17111 case RISCVISD::FADD_VL:
17112 return RISCVISD::VFWADD_W_VL;
17113 case RISCVISD::FSUB_VL:
17114 return RISCVISD::VFWSUB_W_VL;
17115 default:
17116 llvm_unreachable("Unexpected opcode");
17117 }
17118 }
17119
17120 using CombineToTry = std::function<std::optional<CombineResult>(
17121 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17122 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17123 const RISCVSubtarget &)>;
17124
17125 /// Check if this node needs to be fully folded or extended for all users.
17126 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17127
17128 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17129 const RISCVSubtarget &Subtarget) {
17130 unsigned Opc = OrigOperand.getOpcode();
17131 MVT VT = OrigOperand.getSimpleValueType();
17132
17133 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17134 "Unexpected Opcode");
17135
17136 // The pasthru must be undef for tail agnostic.
17137 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17138 return;
17139
17140 // Get the scalar value.
17141 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17142 : OrigOperand.getOperand(1);
17143
17144 // See if we have enough sign bits or zero bits in the scalar to use a
17145 // widening opcode by splatting to smaller element size.
17146 unsigned EltBits = VT.getScalarSizeInBits();
17147 unsigned ScalarBits = Op.getValueSizeInBits();
17148 // If we're not getting all bits from the element, we need special handling.
17149 if (ScalarBits < EltBits) {
17150 // This should only occur on RV32.
17151 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17152 !Subtarget.is64Bit() && "Unexpected splat");
17153 // vmv.v.x sign extends narrow inputs.
17154 SupportsSExt = true;
17155
17156 // If the input is positive, then sign extend is also zero extend.
17157 if (DAG.SignBitIsZero(Op))
17158 SupportsZExt = true;
17159
17160 EnforceOneUse = false;
17161 return;
17162 }
17163
17164 unsigned NarrowSize = EltBits / 2;
17165 // If the narrow type cannot be expressed with a legal VMV,
17166 // this is not a valid candidate.
17167 if (NarrowSize < 8)
17168 return;
17169
17170 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17171 SupportsSExt = true;
17172
17173 if (DAG.MaskedValueIsZero(Op,
17174 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17175 SupportsZExt = true;
17176
17177 EnforceOneUse = false;
17178 }
17179
17180 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17181 return (NarrowEltVT == MVT::f32 ||
17182 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17183 }
17184
17185 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17186 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17187 }
17188
17189 /// Helper method to set the various fields of this struct based on the
17190 /// type of \p Root.
17191 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17192 const RISCVSubtarget &Subtarget) {
17193 SupportsZExt = false;
17194 SupportsSExt = false;
17195 SupportsFPExt = false;
17196 SupportsBF16Ext = false;
17197 EnforceOneUse = true;
17198 unsigned Opc = OrigOperand.getOpcode();
17199 // For the nodes we handle below, we end up using their inputs directly: see
17200 // getSource(). However since they either don't have a passthru or we check
17201 // that their passthru is undef, we can safely ignore their mask and VL.
17202 switch (Opc) {
17203 case ISD::ZERO_EXTEND:
17204 case ISD::SIGN_EXTEND: {
17205 MVT VT = OrigOperand.getSimpleValueType();
17206 if (!VT.isVector())
17207 break;
17208
17209 SDValue NarrowElt = OrigOperand.getOperand(0);
17210 MVT NarrowVT = NarrowElt.getSimpleValueType();
17211 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17212 if (NarrowVT.getVectorElementType() == MVT::i1)
17213 break;
17214
17215 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17216 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17217 break;
17218 }
17219 case RISCVISD::VZEXT_VL:
17220 SupportsZExt = true;
17221 break;
17222 case RISCVISD::VSEXT_VL:
17223 SupportsSExt = true;
17224 break;
17225 case RISCVISD::FP_EXTEND_VL: {
17226 MVT NarrowEltVT =
17228 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17229 SupportsFPExt = true;
17230 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17231 SupportsBF16Ext = true;
17232
17233 break;
17234 }
17235 case ISD::SPLAT_VECTOR:
17236 case RISCVISD::VMV_V_X_VL:
17237 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17238 break;
17239 case RISCVISD::VFMV_V_F_VL: {
17240 MVT VT = OrigOperand.getSimpleValueType();
17241
17242 if (!OrigOperand.getOperand(0).isUndef())
17243 break;
17244
17245 SDValue Op = OrigOperand.getOperand(1);
17246 if (Op.getOpcode() != ISD::FP_EXTEND)
17247 break;
17248
17249 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17250 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17251 if (NarrowSize != ScalarBits)
17252 break;
17253
17254 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17255 SupportsFPExt = true;
17256 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17257 Subtarget))
17258 SupportsBF16Ext = true;
17259 break;
17260 }
17261 default:
17262 break;
17263 }
17264 }
17265
17266 /// Check if \p Root supports any extension folding combines.
17267 static bool isSupportedRoot(const SDNode *Root,
17268 const RISCVSubtarget &Subtarget) {
17269 switch (Root->getOpcode()) {
17270 case ISD::ADD:
17271 case ISD::SUB:
17272 case ISD::MUL: {
17273 return Root->getValueType(0).isScalableVector();
17274 }
17275 case ISD::OR: {
17276 return Root->getValueType(0).isScalableVector() &&
17277 Root->getFlags().hasDisjoint();
17278 }
17279 // Vector Widening Integer Add/Sub/Mul Instructions
17280 case RISCVISD::ADD_VL:
17281 case RISCVISD::MUL_VL:
17282 case RISCVISD::VWADD_W_VL:
17283 case RISCVISD::VWADDU_W_VL:
17284 case RISCVISD::SUB_VL:
17285 case RISCVISD::VWSUB_W_VL:
17286 case RISCVISD::VWSUBU_W_VL:
17287 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17288 case RISCVISD::FADD_VL:
17289 case RISCVISD::FSUB_VL:
17290 case RISCVISD::FMUL_VL:
17291 case RISCVISD::VFWADD_W_VL:
17292 case RISCVISD::VFWSUB_W_VL:
17293 return true;
17294 case RISCVISD::OR_VL:
17295 return Root->getFlags().hasDisjoint();
17296 case ISD::SHL:
17297 return Root->getValueType(0).isScalableVector() &&
17298 Subtarget.hasStdExtZvbb();
17299 case RISCVISD::SHL_VL:
17300 return Subtarget.hasStdExtZvbb();
17301 case RISCVISD::VFMADD_VL:
17302 case RISCVISD::VFNMSUB_VL:
17303 case RISCVISD::VFNMADD_VL:
17304 case RISCVISD::VFMSUB_VL:
17305 return true;
17306 default:
17307 return false;
17308 }
17309 }
17310
17311 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17312 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17313 const RISCVSubtarget &Subtarget) {
17314 assert(isSupportedRoot(Root, Subtarget) &&
17315 "Trying to build an helper with an "
17316 "unsupported root");
17317 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17319 OrigOperand = Root->getOperand(OperandIdx);
17320
17321 unsigned Opc = Root->getOpcode();
17322 switch (Opc) {
17323 // We consider
17324 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17325 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17326 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17327 case RISCVISD::VWADD_W_VL:
17328 case RISCVISD::VWADDU_W_VL:
17329 case RISCVISD::VWSUB_W_VL:
17330 case RISCVISD::VWSUBU_W_VL:
17331 case RISCVISD::VFWADD_W_VL:
17332 case RISCVISD::VFWSUB_W_VL:
17333 if (OperandIdx == 1) {
17334 SupportsZExt =
17335 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
17336 SupportsSExt =
17337 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
17338 SupportsFPExt =
17339 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
17340 // There's no existing extension here, so we don't have to worry about
17341 // making sure it gets removed.
17342 EnforceOneUse = false;
17343 break;
17344 }
17345 [[fallthrough]];
17346 default:
17347 fillUpExtensionSupport(Root, DAG, Subtarget);
17348 break;
17349 }
17350 }
17351
17352 /// Helper function to get the Mask and VL from \p Root.
17353 static std::pair<SDValue, SDValue>
17354 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17355 const RISCVSubtarget &Subtarget) {
17356 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17357 switch (Root->getOpcode()) {
17358 case ISD::ADD:
17359 case ISD::SUB:
17360 case ISD::MUL:
17361 case ISD::OR:
17362 case ISD::SHL: {
17363 SDLoc DL(Root);
17364 MVT VT = Root->getSimpleValueType(0);
17365 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17366 }
17367 default:
17368 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17369 }
17370 }
17371
17372 /// Helper function to check if \p N is commutative with respect to the
17373 /// foldings that are supported by this class.
17374 static bool isCommutative(const SDNode *N) {
17375 switch (N->getOpcode()) {
17376 case ISD::ADD:
17377 case ISD::MUL:
17378 case ISD::OR:
17379 case RISCVISD::ADD_VL:
17380 case RISCVISD::MUL_VL:
17381 case RISCVISD::OR_VL:
17382 case RISCVISD::VWADD_W_VL:
17383 case RISCVISD::VWADDU_W_VL:
17384 case RISCVISD::FADD_VL:
17385 case RISCVISD::FMUL_VL:
17386 case RISCVISD::VFWADD_W_VL:
17387 case RISCVISD::VFMADD_VL:
17388 case RISCVISD::VFNMSUB_VL:
17389 case RISCVISD::VFNMADD_VL:
17390 case RISCVISD::VFMSUB_VL:
17391 return true;
17392 case ISD::SUB:
17393 case RISCVISD::SUB_VL:
17394 case RISCVISD::VWSUB_W_VL:
17395 case RISCVISD::VWSUBU_W_VL:
17396 case RISCVISD::FSUB_VL:
17397 case RISCVISD::VFWSUB_W_VL:
17398 case ISD::SHL:
17399 case RISCVISD::SHL_VL:
17400 return false;
17401 default:
17402 llvm_unreachable("Unexpected opcode");
17403 }
17404 }
17405
17406 /// Get a list of combine to try for folding extensions in \p Root.
17407 /// Note that each returned CombineToTry function doesn't actually modify
17408 /// anything. Instead they produce an optional CombineResult that if not None,
17409 /// need to be materialized for the combine to be applied.
17410 /// \see CombineResult::materialize.
17411 /// If the related CombineToTry function returns std::nullopt, that means the
17412 /// combine didn't match.
17413 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17414};
17415
17416/// Helper structure that holds all the necessary information to materialize a
17417/// combine that does some extension folding.
17418struct CombineResult {
17419 /// Opcode to be generated when materializing the combine.
17420 unsigned TargetOpcode;
17421 // No value means no extension is needed.
17422 std::optional<ExtKind> LHSExt;
17423 std::optional<ExtKind> RHSExt;
17424 /// Root of the combine.
17425 SDNode *Root;
17426 /// LHS of the TargetOpcode.
17427 NodeExtensionHelper LHS;
17428 /// RHS of the TargetOpcode.
17429 NodeExtensionHelper RHS;
17430
17431 CombineResult(unsigned TargetOpcode, SDNode *Root,
17432 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17433 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17434 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17435 LHS(LHS), RHS(RHS) {}
17436
17437 /// Return a value that uses TargetOpcode and that can be used to replace
17438 /// Root.
17439 /// The actual replacement is *not* done in that method.
17440 SDValue materialize(SelectionDAG &DAG,
17441 const RISCVSubtarget &Subtarget) const {
17442 SDValue Mask, VL, Passthru;
17443 std::tie(Mask, VL) =
17444 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17445 switch (Root->getOpcode()) {
17446 default:
17447 Passthru = Root->getOperand(2);
17448 break;
17449 case ISD::ADD:
17450 case ISD::SUB:
17451 case ISD::MUL:
17452 case ISD::OR:
17453 case ISD::SHL:
17454 Passthru = DAG.getUNDEF(Root->getValueType(0));
17455 break;
17456 }
17457 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17458 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17459 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17460 Passthru, Mask, VL);
17461 }
17462};
17463
17464/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17465/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17466/// are zext) and LHS and RHS can be folded into Root.
17467/// AllowExtMask define which form `ext` can take in this pattern.
17468///
17469/// \note If the pattern can match with both zext and sext, the returned
17470/// CombineResult will feature the zext result.
17471///
17472/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17473/// can be used to apply the pattern.
17474static std::optional<CombineResult>
17475canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17476 const NodeExtensionHelper &RHS,
17477 uint8_t AllowExtMask, SelectionDAG &DAG,
17478 const RISCVSubtarget &Subtarget) {
17479 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17480 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17481 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17482 /*RHSExt=*/{ExtKind::ZExt});
17483 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17484 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17485 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17486 /*RHSExt=*/{ExtKind::SExt});
17487 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17488 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17489 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17490 /*RHSExt=*/{ExtKind::FPExt});
17491 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17492 RHS.SupportsBF16Ext)
17493 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17494 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17495 /*RHSExt=*/{ExtKind::BF16Ext});
17496 return std::nullopt;
17497}
17498
17499/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17500/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17501/// are zext) and LHS and RHS can be folded into Root.
17502///
17503/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17504/// can be used to apply the pattern.
17505static std::optional<CombineResult>
17506canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17507 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17508 const RISCVSubtarget &Subtarget) {
17509 return canFoldToVWWithSameExtensionImpl(
17510 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17511 Subtarget);
17512}
17513
17514/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17515///
17516/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17517/// can be used to apply the pattern.
17518static std::optional<CombineResult>
17519canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17520 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17521 const RISCVSubtarget &Subtarget) {
17522 if (RHS.SupportsFPExt)
17523 return CombineResult(
17524 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17525 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17526
17527 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17528 // sext/zext?
17529 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17530 // purposes.
17531 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17532 return CombineResult(
17533 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17534 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17535 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17536 return CombineResult(
17537 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17538 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17539 return std::nullopt;
17540}
17541
17542/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17543///
17544/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17545/// can be used to apply the pattern.
17546static std::optional<CombineResult>
17547canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17548 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17549 const RISCVSubtarget &Subtarget) {
17550 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
17551 Subtarget);
17552}
17553
17554/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17555///
17556/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17557/// can be used to apply the pattern.
17558static std::optional<CombineResult>
17559canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17560 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17561 const RISCVSubtarget &Subtarget) {
17562 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17563 Subtarget);
17564}
17565
17566/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17567///
17568/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17569/// can be used to apply the pattern.
17570static std::optional<CombineResult>
17571canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17572 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17573 const RISCVSubtarget &Subtarget) {
17574 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
17575 Subtarget);
17576}
17577
17578/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17579///
17580/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17581/// can be used to apply the pattern.
17582static std::optional<CombineResult>
17583canFoldToVWWithBF16EXT(SDNode *Root, const NodeExtensionHelper &LHS,
17584 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17585 const RISCVSubtarget &Subtarget) {
17586 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17587 Subtarget);
17588}
17589
17590/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17591///
17592/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17593/// can be used to apply the pattern.
17594static std::optional<CombineResult>
17595canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17596 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17597 const RISCVSubtarget &Subtarget) {
17598
17599 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17600 return std::nullopt;
17601 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17602 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17603 /*RHSExt=*/{ExtKind::ZExt});
17604}
17605
17607NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17608 SmallVector<CombineToTry> Strategies;
17609 switch (Root->getOpcode()) {
17610 case ISD::ADD:
17611 case ISD::SUB:
17612 case ISD::OR:
17613 case RISCVISD::ADD_VL:
17614 case RISCVISD::SUB_VL:
17615 case RISCVISD::OR_VL:
17616 case RISCVISD::FADD_VL:
17617 case RISCVISD::FSUB_VL:
17618 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17619 Strategies.push_back(canFoldToVWWithSameExtension);
17620 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17621 Strategies.push_back(canFoldToVW_W);
17622 break;
17623 case RISCVISD::FMUL_VL:
17624 case RISCVISD::VFMADD_VL:
17625 case RISCVISD::VFMSUB_VL:
17626 case RISCVISD::VFNMADD_VL:
17627 case RISCVISD::VFNMSUB_VL:
17628 Strategies.push_back(canFoldToVWWithSameExtension);
17629 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17630 Strategies.push_back(canFoldToVWWithBF16EXT);
17631 break;
17632 case ISD::MUL:
17633 case RISCVISD::MUL_VL:
17634 // mul -> vwmul(u)
17635 Strategies.push_back(canFoldToVWWithSameExtension);
17636 // mul -> vwmulsu
17637 Strategies.push_back(canFoldToVW_SU);
17638 break;
17639 case ISD::SHL:
17640 case RISCVISD::SHL_VL:
17641 // shl -> vwsll
17642 Strategies.push_back(canFoldToVWWithZEXT);
17643 break;
17644 case RISCVISD::VWADD_W_VL:
17645 case RISCVISD::VWSUB_W_VL:
17646 // vwadd_w|vwsub_w -> vwadd|vwsub
17647 Strategies.push_back(canFoldToVWWithSEXT);
17648 break;
17649 case RISCVISD::VWADDU_W_VL:
17650 case RISCVISD::VWSUBU_W_VL:
17651 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17652 Strategies.push_back(canFoldToVWWithZEXT);
17653 break;
17654 case RISCVISD::VFWADD_W_VL:
17655 case RISCVISD::VFWSUB_W_VL:
17656 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17657 Strategies.push_back(canFoldToVWWithFPEXT);
17658 break;
17659 default:
17660 llvm_unreachable("Unexpected opcode");
17661 }
17662 return Strategies;
17663}
17664} // End anonymous namespace.
17665
17667 // TODO: Extend this to other binops using generic identity logic
17668 assert(N->getOpcode() == RISCVISD::ADD_VL);
17669 SDValue A = N->getOperand(0);
17670 SDValue B = N->getOperand(1);
17671 SDValue Passthru = N->getOperand(2);
17672 if (!Passthru.isUndef())
17673 // TODO:This could be a vmerge instead
17674 return SDValue();
17675 ;
17677 return A;
17678 // Peek through fixed to scalable
17679 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17680 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17681 return A;
17682 return SDValue();
17683}
17684
17685/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17686/// The supported combines are:
17687/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17688/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17689/// mul | mul_vl -> vwmul(u) | vwmul_su
17690/// shl | shl_vl -> vwsll
17691/// fadd_vl -> vfwadd | vfwadd_w
17692/// fsub_vl -> vfwsub | vfwsub_w
17693/// fmul_vl -> vfwmul
17694/// vwadd_w(u) -> vwadd(u)
17695/// vwsub_w(u) -> vwsub(u)
17696/// vfwadd_w -> vfwadd
17697/// vfwsub_w -> vfwsub
17700 const RISCVSubtarget &Subtarget) {
17701 SelectionDAG &DAG = DCI.DAG;
17702 if (DCI.isBeforeLegalize())
17703 return SDValue();
17704
17705 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17706 return SDValue();
17707
17708 SmallVector<SDNode *> Worklist;
17709 SmallPtrSet<SDNode *, 8> Inserted;
17710 Worklist.push_back(N);
17711 Inserted.insert(N);
17712 SmallVector<CombineResult> CombinesToApply;
17713
17714 while (!Worklist.empty()) {
17715 SDNode *Root = Worklist.pop_back_val();
17716
17717 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17718 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17719 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17720 &Inserted](const NodeExtensionHelper &Op) {
17721 if (Op.needToPromoteOtherUsers()) {
17722 for (SDUse &Use : Op.OrigOperand->uses()) {
17723 SDNode *TheUser = Use.getUser();
17724 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17725 return false;
17726 // We only support the first 2 operands of FMA.
17727 if (Use.getOperandNo() >= 2)
17728 return false;
17729 if (Inserted.insert(TheUser).second)
17730 Worklist.push_back(TheUser);
17731 }
17732 }
17733 return true;
17734 };
17735
17736 // Control the compile time by limiting the number of node we look at in
17737 // total.
17738 if (Inserted.size() > ExtensionMaxWebSize)
17739 return SDValue();
17740
17742 NodeExtensionHelper::getSupportedFoldings(Root);
17743
17744 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17745 bool Matched = false;
17746 for (int Attempt = 0;
17747 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17748 ++Attempt) {
17749
17750 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17751 FoldingStrategies) {
17752 std::optional<CombineResult> Res =
17753 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17754 if (Res) {
17755 Matched = true;
17756 CombinesToApply.push_back(*Res);
17757 // All the inputs that are extended need to be folded, otherwise
17758 // we would be leaving the old input (since it is may still be used),
17759 // and the new one.
17760 if (Res->LHSExt.has_value())
17761 if (!AppendUsersIfNeeded(LHS))
17762 return SDValue();
17763 if (Res->RHSExt.has_value())
17764 if (!AppendUsersIfNeeded(RHS))
17765 return SDValue();
17766 break;
17767 }
17768 }
17769 std::swap(LHS, RHS);
17770 }
17771 // Right now we do an all or nothing approach.
17772 if (!Matched)
17773 return SDValue();
17774 }
17775 // Store the value for the replacement of the input node separately.
17776 SDValue InputRootReplacement;
17777 // We do the RAUW after we materialize all the combines, because some replaced
17778 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17779 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17780 // yet-to-be-visited CombinesToApply roots.
17782 ValuesToReplace.reserve(CombinesToApply.size());
17783 for (CombineResult Res : CombinesToApply) {
17784 SDValue NewValue = Res.materialize(DAG, Subtarget);
17785 if (!InputRootReplacement) {
17786 assert(Res.Root == N &&
17787 "First element is expected to be the current node");
17788 InputRootReplacement = NewValue;
17789 } else {
17790 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17791 }
17792 }
17793 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17794 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17795 DCI.AddToWorklist(OldNewValues.second.getNode());
17796 }
17797 return InputRootReplacement;
17798}
17799
17800// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17801// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17802// y will be the Passthru and cond will be the Mask.
17804 unsigned Opc = N->getOpcode();
17805 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17806 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17807
17808 SDValue Y = N->getOperand(0);
17809 SDValue MergeOp = N->getOperand(1);
17810 unsigned MergeOpc = MergeOp.getOpcode();
17811
17812 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17813 return SDValue();
17814
17815 SDValue X = MergeOp->getOperand(1);
17816
17817 if (!MergeOp.hasOneUse())
17818 return SDValue();
17819
17820 // Passthru should be undef
17821 SDValue Passthru = N->getOperand(2);
17822 if (!Passthru.isUndef())
17823 return SDValue();
17824
17825 // Mask should be all ones
17826 SDValue Mask = N->getOperand(3);
17827 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17828 return SDValue();
17829
17830 // False value of MergeOp should be all zeros
17831 SDValue Z = MergeOp->getOperand(2);
17832
17833 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17834 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17835 Z = Z.getOperand(1);
17836
17837 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17838 return SDValue();
17839
17840 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17841 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17842 N->getFlags());
17843}
17844
17847 const RISCVSubtarget &Subtarget) {
17848 [[maybe_unused]] unsigned Opc = N->getOpcode();
17849 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17850 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17851
17852 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17853 return V;
17854
17855 return combineVWADDSUBWSelect(N, DCI.DAG);
17856}
17857
17858// Helper function for performMemPairCombine.
17859// Try to combine the memory loads/stores LSNode1 and LSNode2
17860// into a single memory pair operation.
17862 LSBaseSDNode *LSNode2, SDValue BasePtr,
17863 uint64_t Imm) {
17865 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17866
17867 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17868 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17869 return SDValue();
17870
17872 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17873
17874 // The new operation has twice the width.
17875 MVT XLenVT = Subtarget.getXLenVT();
17876 EVT MemVT = LSNode1->getMemoryVT();
17877 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17878 MachineMemOperand *MMO = LSNode1->getMemOperand();
17880 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17881
17882 if (LSNode1->getOpcode() == ISD::LOAD) {
17883 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17884 unsigned Opcode;
17885 if (MemVT == MVT::i32)
17886 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17887 else
17888 Opcode = RISCVISD::TH_LDD;
17889
17890 SDValue Res = DAG.getMemIntrinsicNode(
17891 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17892 {LSNode1->getChain(), BasePtr,
17893 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17894 NewMemVT, NewMMO);
17895
17896 SDValue Node1 =
17897 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17898 SDValue Node2 =
17899 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17900
17901 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17902 return Node1;
17903 } else {
17904 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17905
17906 SDValue Res = DAG.getMemIntrinsicNode(
17907 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17908 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17909 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17910 NewMemVT, NewMMO);
17911
17912 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17913 return Res;
17914 }
17915}
17916
17917// Try to combine two adjacent loads/stores to a single pair instruction from
17918// the XTHeadMemPair vendor extension.
17921 SelectionDAG &DAG = DCI.DAG;
17923 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17924
17925 // Target does not support load/store pair.
17926 if (!Subtarget.hasVendorXTHeadMemPair())
17927 return SDValue();
17928
17929 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17930 EVT MemVT = LSNode1->getMemoryVT();
17931 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17932
17933 // No volatile, indexed or atomic loads/stores.
17934 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17935 return SDValue();
17936
17937 // Function to get a base + constant representation from a memory value.
17938 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17939 if (Ptr->getOpcode() == ISD::ADD)
17940 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17941 return {Ptr->getOperand(0), C1->getZExtValue()};
17942 return {Ptr, 0};
17943 };
17944
17945 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17946
17947 SDValue Chain = N->getOperand(0);
17948 for (SDUse &Use : Chain->uses()) {
17949 if (Use.getUser() != N && Use.getResNo() == 0 &&
17950 Use.getUser()->getOpcode() == N->getOpcode()) {
17952
17953 // No volatile, indexed or atomic loads/stores.
17954 if (!LSNode2->isSimple() || LSNode2->isIndexed())
17955 continue;
17956
17957 // Check if LSNode1 and LSNode2 have the same type and extension.
17958 if (LSNode1->getOpcode() == ISD::LOAD)
17959 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
17961 continue;
17962
17963 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17964 continue;
17965
17966 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
17967
17968 // Check if the base pointer is the same for both instruction.
17969 if (Base1 != Base2)
17970 continue;
17971
17972 // Check if the offsets match the XTHeadMemPair encoding constraints.
17973 bool Valid = false;
17974 if (MemVT == MVT::i32) {
17975 // Check for adjacent i32 values and a 2-bit index.
17976 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
17977 Valid = true;
17978 } else if (MemVT == MVT::i64) {
17979 // Check for adjacent i64 values and a 2-bit index.
17980 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
17981 Valid = true;
17982 }
17983
17984 if (!Valid)
17985 continue;
17986
17987 // Try to combine.
17988 if (SDValue Res =
17989 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
17990 return Res;
17991 }
17992 }
17993
17994 return SDValue();
17995}
17996
17997// Fold
17998// (fp_to_int (froundeven X)) -> fcvt X, rne
17999// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18000// (fp_to_int (ffloor X)) -> fcvt X, rdn
18001// (fp_to_int (fceil X)) -> fcvt X, rup
18002// (fp_to_int (fround X)) -> fcvt X, rmm
18003// (fp_to_int (frint X)) -> fcvt X
18006 const RISCVSubtarget &Subtarget) {
18007 SelectionDAG &DAG = DCI.DAG;
18008 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18009 MVT XLenVT = Subtarget.getXLenVT();
18010
18011 SDValue Src = N->getOperand(0);
18012
18013 // Don't do this for strict-fp Src.
18014 if (Src->isStrictFPOpcode())
18015 return SDValue();
18016
18017 // Ensure the FP type is legal.
18018 if (!TLI.isTypeLegal(Src.getValueType()))
18019 return SDValue();
18020
18021 // Don't do this for f16 with Zfhmin and not Zfh.
18022 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18023 return SDValue();
18024
18025 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18026 // If the result is invalid, we didn't find a foldable instruction.
18027 if (FRM == RISCVFPRndMode::Invalid)
18028 return SDValue();
18029
18030 SDLoc DL(N);
18031 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18032 EVT VT = N->getValueType(0);
18033
18034 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18035 MVT SrcVT = Src.getSimpleValueType();
18036 MVT SrcContainerVT = SrcVT;
18037 MVT ContainerVT = VT.getSimpleVT();
18038 SDValue XVal = Src.getOperand(0);
18039
18040 // For widening and narrowing conversions we just combine it into a
18041 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18042 // end up getting lowered to their appropriate pseudo instructions based on
18043 // their operand types
18044 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18045 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18046 return SDValue();
18047
18048 // Make fixed-length vectors scalable first
18049 if (SrcVT.isFixedLengthVector()) {
18050 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18051 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18052 ContainerVT =
18053 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18054 }
18055
18056 auto [Mask, VL] =
18057 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18058
18059 SDValue FpToInt;
18060 if (FRM == RISCVFPRndMode::RTZ) {
18061 // Use the dedicated trunc static rounding mode if we're truncating so we
18062 // don't need to generate calls to fsrmi/fsrm
18063 unsigned Opc =
18064 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18065 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18066 } else {
18067 unsigned Opc =
18068 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18069 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18070 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18071 }
18072
18073 // If converted from fixed-length to scalable, convert back
18074 if (VT.isFixedLengthVector())
18075 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18076
18077 return FpToInt;
18078 }
18079
18080 // Only handle XLen or i32 types. Other types narrower than XLen will
18081 // eventually be legalized to XLenVT.
18082 if (VT != MVT::i32 && VT != XLenVT)
18083 return SDValue();
18084
18085 unsigned Opc;
18086 if (VT == XLenVT)
18087 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18088 else
18089 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18090
18091 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18092 DAG.getTargetConstant(FRM, DL, XLenVT));
18093 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18094}
18095
18096// Fold
18097// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18098// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18099// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18100// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18101// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18102// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18105 const RISCVSubtarget &Subtarget) {
18106 SelectionDAG &DAG = DCI.DAG;
18107 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18108 MVT XLenVT = Subtarget.getXLenVT();
18109
18110 // Only handle XLen types. Other types narrower than XLen will eventually be
18111 // legalized to XLenVT.
18112 EVT DstVT = N->getValueType(0);
18113 if (DstVT != XLenVT)
18114 return SDValue();
18115
18116 SDValue Src = N->getOperand(0);
18117
18118 // Don't do this for strict-fp Src.
18119 if (Src->isStrictFPOpcode())
18120 return SDValue();
18121
18122 // Ensure the FP type is also legal.
18123 if (!TLI.isTypeLegal(Src.getValueType()))
18124 return SDValue();
18125
18126 // Don't do this for f16 with Zfhmin and not Zfh.
18127 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18128 return SDValue();
18129
18130 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18131
18132 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18133 if (FRM == RISCVFPRndMode::Invalid)
18134 return SDValue();
18135
18136 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18137
18138 unsigned Opc;
18139 if (SatVT == DstVT)
18140 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18141 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18142 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18143 else
18144 return SDValue();
18145 // FIXME: Support other SatVTs by clamping before or after the conversion.
18146
18147 Src = Src.getOperand(0);
18148
18149 SDLoc DL(N);
18150 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18151 DAG.getTargetConstant(FRM, DL, XLenVT));
18152
18153 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18154 // extend.
18155 if (Opc == RISCVISD::FCVT_WU_RV64)
18156 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18157
18158 // RISC-V FP-to-int conversions saturate to the destination register size, but
18159 // don't produce 0 for nan.
18160 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18161 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18162}
18163
18164// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18165// smaller than XLenVT.
18167 const RISCVSubtarget &Subtarget) {
18168 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18169
18170 SDValue Src = N->getOperand(0);
18171 if (Src.getOpcode() != ISD::BSWAP)
18172 return SDValue();
18173
18174 EVT VT = N->getValueType(0);
18175 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18177 return SDValue();
18178
18179 SDLoc DL(N);
18180 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18181}
18182
18184 const RISCVSubtarget &Subtarget) {
18185 // Fold:
18186 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18187
18188 // Check if its first operand is a vp.load.
18189 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18190 if (!VPLoad)
18191 return SDValue();
18192
18193 EVT LoadVT = VPLoad->getValueType(0);
18194 // We do not have a strided_load version for masks, and the evl of vp.reverse
18195 // and vp.load should always be the same.
18196 if (!LoadVT.getVectorElementType().isByteSized() ||
18197 N->getOperand(2) != VPLoad->getVectorLength() ||
18198 !N->getOperand(0).hasOneUse())
18199 return SDValue();
18200
18201 // Check if the mask of outer vp.reverse are all 1's.
18202 if (!isOneOrOneSplat(N->getOperand(1)))
18203 return SDValue();
18204
18205 SDValue LoadMask = VPLoad->getMask();
18206 // If Mask is all ones, then load is unmasked and can be reversed.
18207 if (!isOneOrOneSplat(LoadMask)) {
18208 // If the mask is not all ones, we can reverse the load if the mask was also
18209 // reversed by an unmasked vp.reverse with the same EVL.
18210 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18211 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18212 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18213 return SDValue();
18214 LoadMask = LoadMask.getOperand(0);
18215 }
18216
18217 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18218 SDLoc DL(N);
18219 MVT XLenVT = Subtarget.getXLenVT();
18220 SDValue NumElem = VPLoad->getVectorLength();
18221 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18222
18223 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18224 DAG.getConstant(1, DL, XLenVT));
18225 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18226 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18227 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18228 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18229
18231 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18233 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18234 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18235
18236 SDValue Ret = DAG.getStridedLoadVP(
18237 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18238 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18239
18240 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18241
18242 return Ret;
18243}
18244
18246 const RISCVSubtarget &Subtarget) {
18247 // Fold:
18248 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18249 // -1, MASK)
18250 auto *VPStore = cast<VPStoreSDNode>(N);
18251
18252 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18253 return SDValue();
18254
18255 SDValue VPReverse = VPStore->getValue();
18256 EVT ReverseVT = VPReverse->getValueType(0);
18257
18258 // We do not have a strided_store version for masks, and the evl of vp.reverse
18259 // and vp.store should always be the same.
18260 if (!ReverseVT.getVectorElementType().isByteSized() ||
18261 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18262 !VPReverse.hasOneUse())
18263 return SDValue();
18264
18265 SDValue StoreMask = VPStore->getMask();
18266 // If Mask is all ones, then load is unmasked and can be reversed.
18267 if (!isOneOrOneSplat(StoreMask)) {
18268 // If the mask is not all ones, we can reverse the store if the mask was
18269 // also reversed by an unmasked vp.reverse with the same EVL.
18270 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18271 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18272 StoreMask.getOperand(2) != VPStore->getVectorLength())
18273 return SDValue();
18274 StoreMask = StoreMask.getOperand(0);
18275 }
18276
18277 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18278 SDLoc DL(N);
18279 MVT XLenVT = Subtarget.getXLenVT();
18280 SDValue NumElem = VPStore->getVectorLength();
18281 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18282
18283 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18284 DAG.getConstant(1, DL, XLenVT));
18285 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18286 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18287 SDValue Base =
18288 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18289 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18290
18292 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18294 PtrInfo, VPStore->getMemOperand()->getFlags(),
18295 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18296
18297 return DAG.getStridedStoreVP(
18298 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18299 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18300 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18301 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18302}
18303
18304// Peephole avgceil pattern.
18305// %1 = zext <N x i8> %a to <N x i32>
18306// %2 = zext <N x i8> %b to <N x i32>
18307// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18308// %4 = add nuw nsw <N x i32> %3, %2
18309// %5 = lshr <N x i32> %4, splat (i32 1)
18310// %6 = trunc <N x i32> %5 to <N x i8>
18312 const RISCVSubtarget &Subtarget) {
18313 EVT VT = N->getValueType(0);
18314
18315 // Ignore fixed vectors.
18316 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18317 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18318 return SDValue();
18319
18320 SDValue In = N->getOperand(0);
18321 SDValue Mask = N->getOperand(1);
18322 SDValue VL = N->getOperand(2);
18323
18324 // Input should be a vp_srl with same mask and VL.
18325 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18326 In.getOperand(3) != VL)
18327 return SDValue();
18328
18329 // Shift amount should be 1.
18330 if (!isOneOrOneSplat(In.getOperand(1)))
18331 return SDValue();
18332
18333 // Shifted value should be a vp_add with same mask and VL.
18334 SDValue LHS = In.getOperand(0);
18335 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18336 LHS.getOperand(3) != VL)
18337 return SDValue();
18338
18339 SDValue Operands[3];
18340
18341 // Matches another VP_ADD with same VL and Mask.
18342 auto FindAdd = [&](SDValue V, SDValue Other) {
18343 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18344 V.getOperand(3) != VL)
18345 return false;
18346
18347 Operands[0] = Other;
18348 Operands[1] = V.getOperand(1);
18349 Operands[2] = V.getOperand(0);
18350 return true;
18351 };
18352
18353 // We need to find another VP_ADD in one of the operands.
18354 SDValue LHS0 = LHS.getOperand(0);
18355 SDValue LHS1 = LHS.getOperand(1);
18356 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18357 return SDValue();
18358
18359 // Now we have three operands of two additions. Check that one of them is a
18360 // constant vector with ones.
18361 auto I = llvm::find_if(Operands,
18362 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18363 if (I == std::end(Operands))
18364 return SDValue();
18365 // We found a vector with ones, move if it to the end of the Operands array.
18366 std::swap(*I, Operands[2]);
18367
18368 // Make sure the other 2 operands can be promoted from the result type.
18369 for (SDValue Op : drop_end(Operands)) {
18370 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18371 Op.getOperand(2) != VL)
18372 return SDValue();
18373 // Input must be the same size or smaller than our result.
18374 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18375 return SDValue();
18376 }
18377
18378 // Pattern is detected.
18379 // Rebuild the zero extends in case the inputs are smaller than our result.
18380 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18381 Operands[0].getOperand(0), Mask, VL);
18382 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18383 Operands[1].getOperand(0), Mask, VL);
18384 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18385 // mode.
18386 SDLoc DL(N);
18387 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18388 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18389}
18390
18391// Convert from one FMA opcode to another based on whether we are negating the
18392// multiply result and/or the accumulator.
18393// NOTE: Only supports RVV operations with VL.
18394static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18395 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18396 if (NegMul) {
18397 // clang-format off
18398 switch (Opcode) {
18399 default: llvm_unreachable("Unexpected opcode");
18400 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18401 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18402 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18403 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18404 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18405 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18406 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18407 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18408 }
18409 // clang-format on
18410 }
18411
18412 // Negating the accumulator changes ADD<->SUB.
18413 if (NegAcc) {
18414 // clang-format off
18415 switch (Opcode) {
18416 default: llvm_unreachable("Unexpected opcode");
18417 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18418 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18419 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18420 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18421 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18422 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18423 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18424 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18425 }
18426 // clang-format on
18427 }
18428
18429 return Opcode;
18430}
18431
18433 // Fold FNEG_VL into FMA opcodes.
18434 // The first operand of strict-fp is chain.
18435 bool IsStrict =
18436 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18437 unsigned Offset = IsStrict ? 1 : 0;
18438 SDValue A = N->getOperand(0 + Offset);
18439 SDValue B = N->getOperand(1 + Offset);
18440 SDValue C = N->getOperand(2 + Offset);
18441 SDValue Mask = N->getOperand(3 + Offset);
18442 SDValue VL = N->getOperand(4 + Offset);
18443
18444 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18445 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18446 V.getOperand(2) == VL) {
18447 // Return the negated input.
18448 V = V.getOperand(0);
18449 return true;
18450 }
18451
18452 return false;
18453 };
18454
18455 bool NegA = invertIfNegative(A);
18456 bool NegB = invertIfNegative(B);
18457 bool NegC = invertIfNegative(C);
18458
18459 // If no operands are negated, we're done.
18460 if (!NegA && !NegB && !NegC)
18461 return SDValue();
18462
18463 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18464 if (IsStrict)
18465 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18466 {N->getOperand(0), A, B, C, Mask, VL});
18467 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18468 VL);
18469}
18470
18473 const RISCVSubtarget &Subtarget) {
18474 SelectionDAG &DAG = DCI.DAG;
18475
18477 return V;
18478
18479 // FIXME: Ignore strict opcodes for now.
18480 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18481 return SDValue();
18482
18483 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18484}
18485
18487 const RISCVSubtarget &Subtarget) {
18488 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18489
18490 EVT VT = N->getValueType(0);
18491
18492 if (VT != Subtarget.getXLenVT())
18493 return SDValue();
18494
18495 if (!isa<ConstantSDNode>(N->getOperand(1)))
18496 return SDValue();
18497 uint64_t ShAmt = N->getConstantOperandVal(1);
18498
18499 SDValue N0 = N->getOperand(0);
18500
18501 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18502 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18503 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18504 unsigned ExtSize =
18505 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18506 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18507 N0.getOperand(0).hasOneUse() &&
18509 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18510 if (LShAmt < ExtSize) {
18511 unsigned Size = VT.getSizeInBits();
18512 SDLoc ShlDL(N0.getOperand(0));
18513 SDValue Shl =
18514 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18515 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18516 SDLoc DL(N);
18517 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18518 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18519 }
18520 }
18521 }
18522
18523 if (ShAmt > 32 || VT != MVT::i64)
18524 return SDValue();
18525
18526 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18527 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18528 //
18529 // Also try these folds where an add or sub is in the middle.
18530 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18531 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18532 SDValue Shl;
18533 ConstantSDNode *AddC = nullptr;
18534
18535 // We might have an ADD or SUB between the SRA and SHL.
18536 bool IsAdd = N0.getOpcode() == ISD::ADD;
18537 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18538 // Other operand needs to be a constant we can modify.
18539 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18540 if (!AddC)
18541 return SDValue();
18542
18543 // AddC needs to have at least 32 trailing zeros.
18544 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18545 return SDValue();
18546
18547 // All users should be a shift by constant less than or equal to 32. This
18548 // ensures we'll do this optimization for each of them to produce an
18549 // add/sub+sext_inreg they can all share.
18550 for (SDNode *U : N0->users()) {
18551 if (U->getOpcode() != ISD::SRA ||
18552 !isa<ConstantSDNode>(U->getOperand(1)) ||
18553 U->getConstantOperandVal(1) > 32)
18554 return SDValue();
18555 }
18556
18557 Shl = N0.getOperand(IsAdd ? 0 : 1);
18558 } else {
18559 // Not an ADD or SUB.
18560 Shl = N0;
18561 }
18562
18563 // Look for a shift left by 32.
18564 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18565 Shl.getConstantOperandVal(1) != 32)
18566 return SDValue();
18567
18568 // We if we didn't look through an add/sub, then the shl should have one use.
18569 // If we did look through an add/sub, the sext_inreg we create is free so
18570 // we're only creating 2 new instructions. It's enough to only remove the
18571 // original sra+add/sub.
18572 if (!AddC && !Shl.hasOneUse())
18573 return SDValue();
18574
18575 SDLoc DL(N);
18576 SDValue In = Shl.getOperand(0);
18577
18578 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18579 // constant.
18580 if (AddC) {
18581 SDValue ShiftedAddC =
18582 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18583 if (IsAdd)
18584 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18585 else
18586 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18587 }
18588
18589 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18590 DAG.getValueType(MVT::i32));
18591 if (ShAmt == 32)
18592 return SExt;
18593
18594 return DAG.getNode(
18595 ISD::SHL, DL, MVT::i64, SExt,
18596 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18597}
18598
18599// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18600// the result is used as the condition of a br_cc or select_cc we can invert,
18601// inverting the setcc is free, and Z is 0/1. Caller will invert the
18602// br_cc/select_cc.
18604 bool IsAnd = Cond.getOpcode() == ISD::AND;
18605 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18606 return SDValue();
18607
18608 if (!Cond.hasOneUse())
18609 return SDValue();
18610
18611 SDValue Setcc = Cond.getOperand(0);
18612 SDValue Xor = Cond.getOperand(1);
18613 // Canonicalize setcc to LHS.
18614 if (Setcc.getOpcode() != ISD::SETCC)
18615 std::swap(Setcc, Xor);
18616 // LHS should be a setcc and RHS should be an xor.
18617 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18618 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18619 return SDValue();
18620
18621 // If the condition is an And, SimplifyDemandedBits may have changed
18622 // (xor Z, 1) to (not Z).
18623 SDValue Xor1 = Xor.getOperand(1);
18624 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18625 return SDValue();
18626
18627 EVT VT = Cond.getValueType();
18628 SDValue Xor0 = Xor.getOperand(0);
18629
18630 // The LHS of the xor needs to be 0/1.
18632 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18633 return SDValue();
18634
18635 // We can only invert integer setccs.
18636 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18637 if (!SetCCOpVT.isScalarInteger())
18638 return SDValue();
18639
18640 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18641 if (ISD::isIntEqualitySetCC(CCVal)) {
18642 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18643 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18644 Setcc.getOperand(1), CCVal);
18645 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18646 // Invert (setlt 0, X) by converting to (setlt X, 1).
18647 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18648 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18649 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18650 // (setlt X, 1) by converting to (setlt 0, X).
18651 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18652 DAG.getConstant(0, SDLoc(Setcc), VT),
18653 Setcc.getOperand(0), CCVal);
18654 } else
18655 return SDValue();
18656
18657 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18658 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18659}
18660
18661// Perform common combines for BR_CC and SELECT_CC conditions.
18662static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18663 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18664 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18665
18666 // As far as arithmetic right shift always saves the sign,
18667 // shift can be omitted.
18668 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18669 // setge (sra X, N), 0 -> setge X, 0
18670 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18671 LHS.getOpcode() == ISD::SRA) {
18672 LHS = LHS.getOperand(0);
18673 return true;
18674 }
18675
18676 if (!ISD::isIntEqualitySetCC(CCVal))
18677 return false;
18678
18679 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18680 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18681 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18682 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18683 // If we're looking for eq 0 instead of ne 0, we need to invert the
18684 // condition.
18685 bool Invert = CCVal == ISD::SETEQ;
18686 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18687 if (Invert)
18688 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18689
18690 RHS = LHS.getOperand(1);
18691 LHS = LHS.getOperand(0);
18692 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18693
18694 CC = DAG.getCondCode(CCVal);
18695 return true;
18696 }
18697
18698 // If XOR is reused and has an immediate that will fit in XORI,
18699 // do not fold.
18700 auto isXorImmediate = [](const SDValue &Op) -> bool {
18701 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18702 return isInt<12>(XorCnst->getSExtValue());
18703 return false;
18704 };
18705 // Fold (X(i1) ^ 1) == 0 -> X != 0
18706 auto singleBitOp = [&DAG](const SDValue &VarOp,
18707 const SDValue &ConstOp) -> bool {
18708 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18709 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18710 return (XorCnst->getSExtValue() == 1) &&
18711 DAG.MaskedValueIsZero(VarOp, Mask);
18712 }
18713 return false;
18714 };
18715 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18716 for (const SDNode *UserNode : Op->users()) {
18717 const unsigned Opcode = UserNode->getOpcode();
18718 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18719 return false;
18720 }
18721 return true;
18722 };
18723 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18724 const SDValue &LHS, const SDValue &RHS) -> bool {
18725 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18726 (!isXorImmediate(LHS.getOperand(1)) ||
18727 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18728 onlyUsedBySelectOrBR(LHS));
18729 };
18730 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18731 if (isFoldableXorEq(LHS, RHS)) {
18732 RHS = LHS.getOperand(1);
18733 LHS = LHS.getOperand(0);
18734 return true;
18735 }
18736 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18737 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18738 const SDValue LHS0 = LHS.getOperand(0);
18739 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18740 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18741 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18742 LHS0.getOperand(1), LHS.getOperand(1));
18743 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18744 LHS0.getOperand(0), LHS.getOperand(1));
18745 return true;
18746 }
18747 }
18748
18749 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18750 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18751 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18752 SDValue LHS0 = LHS.getOperand(0);
18753 if (LHS0.getOpcode() == ISD::AND &&
18754 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18755 uint64_t Mask = LHS0.getConstantOperandVal(1);
18756 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18757 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18758 // XAndesPerf supports branch on test bit.
18759 if (Subtarget.hasVendorXAndesPerf()) {
18760 LHS =
18761 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18762 DAG.getConstant(Mask, DL, LHS.getValueType()));
18763 return true;
18764 }
18765
18766 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18767 CC = DAG.getCondCode(CCVal);
18768
18769 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18770 LHS = LHS0.getOperand(0);
18771 if (ShAmt != 0)
18772 LHS =
18773 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18774 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18775 return true;
18776 }
18777 }
18778 }
18779
18780 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18781 // This can occur when legalizing some floating point comparisons.
18782 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18783 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18784 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18785 CC = DAG.getCondCode(CCVal);
18786 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18787 return true;
18788 }
18789
18790 if (isNullConstant(RHS)) {
18791 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18792 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18793 CC = DAG.getCondCode(CCVal);
18794 LHS = NewCond;
18795 return true;
18796 }
18797 }
18798
18799 return false;
18800}
18801
18802// Fold
18803// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18804// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18805// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18806// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18807// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18808// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18810 SDValue TrueVal, SDValue FalseVal,
18811 bool Swapped) {
18812 bool Commutative = true;
18813 unsigned Opc = TrueVal.getOpcode();
18814 switch (Opc) {
18815 default:
18816 return SDValue();
18817 case ISD::SHL:
18818 case ISD::SRA:
18819 case ISD::SRL:
18820 case ISD::SUB:
18821 case ISD::ROTL:
18822 case ISD::ROTR:
18823 Commutative = false;
18824 break;
18825 case ISD::ADD:
18826 case ISD::OR:
18827 case ISD::XOR:
18828 case ISD::UMIN:
18829 case ISD::UMAX:
18830 break;
18831 }
18832
18833 if (!TrueVal.hasOneUse())
18834 return SDValue();
18835
18836 unsigned OpToFold;
18837 if (FalseVal == TrueVal.getOperand(0))
18838 OpToFold = 0;
18839 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18840 OpToFold = 1;
18841 else
18842 return SDValue();
18843
18844 EVT VT = N->getValueType(0);
18845 SDLoc DL(N);
18846 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18847 EVT OtherOpVT = OtherOp.getValueType();
18848 SDValue IdentityOperand =
18849 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18850 if (!Commutative)
18851 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18852 assert(IdentityOperand && "No identity operand!");
18853
18854 if (Swapped)
18855 std::swap(OtherOp, IdentityOperand);
18856 SDValue NewSel =
18857 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18858 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18859}
18860
18861// This tries to get rid of `select` and `icmp` that are being used to handle
18862// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18864 SDValue Cond = N->getOperand(0);
18865
18866 // This represents either CTTZ or CTLZ instruction.
18867 SDValue CountZeroes;
18868
18869 SDValue ValOnZero;
18870
18871 if (Cond.getOpcode() != ISD::SETCC)
18872 return SDValue();
18873
18874 if (!isNullConstant(Cond->getOperand(1)))
18875 return SDValue();
18876
18877 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18878 if (CCVal == ISD::CondCode::SETEQ) {
18879 CountZeroes = N->getOperand(2);
18880 ValOnZero = N->getOperand(1);
18881 } else if (CCVal == ISD::CondCode::SETNE) {
18882 CountZeroes = N->getOperand(1);
18883 ValOnZero = N->getOperand(2);
18884 } else {
18885 return SDValue();
18886 }
18887
18888 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18889 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18890 CountZeroes = CountZeroes.getOperand(0);
18891
18892 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18893 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18894 CountZeroes.getOpcode() != ISD::CTLZ &&
18895 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18896 return SDValue();
18897
18898 if (!isNullConstant(ValOnZero))
18899 return SDValue();
18900
18901 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18902 if (Cond->getOperand(0) != CountZeroesArgument)
18903 return SDValue();
18904
18905 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18906 if (!isPowerOf2_32(BitWidth))
18907 return SDValue();
18908
18909 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18910 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18911 CountZeroes.getValueType(), CountZeroesArgument);
18912 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18913 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18914 CountZeroes.getValueType(), CountZeroesArgument);
18915 }
18916
18917 SDValue BitWidthMinusOne =
18918 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18919
18920 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18921 CountZeroes, BitWidthMinusOne);
18922 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18923}
18924
18926 const RISCVSubtarget &Subtarget) {
18927 SDValue Cond = N->getOperand(0);
18928 SDValue True = N->getOperand(1);
18929 SDValue False = N->getOperand(2);
18930 SDLoc DL(N);
18931 EVT VT = N->getValueType(0);
18932 EVT CondVT = Cond.getValueType();
18933
18934 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18935 return SDValue();
18936
18937 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18938 // BEXTI, where C is power of 2.
18939 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
18940 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
18941 SDValue LHS = Cond.getOperand(0);
18942 SDValue RHS = Cond.getOperand(1);
18943 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18944 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18945 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18946 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18947 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18948 return DAG.getSelect(DL, VT,
18949 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18950 False, True);
18951 }
18952 }
18953 return SDValue();
18954}
18955
18956static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18957 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
18958 return false;
18959
18960 SwapCC = false;
18961 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18962 std::swap(TrueVal, FalseVal);
18963 SwapCC = true;
18964 }
18965
18966 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
18967 return false;
18968
18969 SDValue A = FalseVal.getOperand(0);
18970 SDValue B = FalseVal.getOperand(1);
18971 // Add is commutative, so check both orders
18972 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
18973 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
18974}
18975
18976/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18977/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18978/// register pressure over the add followed by masked vsub sequence.
18980 SDLoc DL(N);
18981 EVT VT = N->getValueType(0);
18982 SDValue CC = N->getOperand(0);
18983 SDValue TrueVal = N->getOperand(1);
18984 SDValue FalseVal = N->getOperand(2);
18985
18986 bool SwapCC;
18987 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
18988 return SDValue();
18989
18990 SDValue Sub = SwapCC ? TrueVal : FalseVal;
18991 SDValue A = Sub.getOperand(0);
18992 SDValue B = Sub.getOperand(1);
18993
18994 // Arrange the select such that we can match a masked
18995 // vrsub.vi to perform the conditional negate
18996 SDValue NegB = DAG.getNegative(B, DL, VT);
18997 if (!SwapCC)
18998 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
18999 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19000 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19001}
19002
19004 const RISCVSubtarget &Subtarget) {
19005 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19006 return Folded;
19007
19008 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19009 return V;
19010
19011 if (Subtarget.hasConditionalMoveFusion())
19012 return SDValue();
19013
19014 SDValue TrueVal = N->getOperand(1);
19015 SDValue FalseVal = N->getOperand(2);
19016 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19017 return V;
19018 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19019}
19020
19021/// If we have a build_vector where each lane is binop X, C, where C
19022/// is a constant (but not necessarily the same constant on all lanes),
19023/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19024/// We assume that materializing a constant build vector will be no more
19025/// expensive that performing O(n) binops.
19027 const RISCVSubtarget &Subtarget,
19028 const RISCVTargetLowering &TLI) {
19029 SDLoc DL(N);
19030 EVT VT = N->getValueType(0);
19031
19032 assert(!VT.isScalableVector() && "unexpected build vector");
19033
19034 if (VT.getVectorNumElements() == 1)
19035 return SDValue();
19036
19037 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19038 if (!TLI.isBinOp(Opcode))
19039 return SDValue();
19040
19041 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19042 return SDValue();
19043
19044 // This BUILD_VECTOR involves an implicit truncation, and sinking
19045 // truncates through binops is non-trivial.
19046 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19047 return SDValue();
19048
19049 SmallVector<SDValue> LHSOps;
19050 SmallVector<SDValue> RHSOps;
19051 for (SDValue Op : N->ops()) {
19052 if (Op.isUndef()) {
19053 // We can't form a divide or remainder from undef.
19054 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19055 return SDValue();
19056
19057 LHSOps.push_back(Op);
19058 RHSOps.push_back(Op);
19059 continue;
19060 }
19061
19062 // TODO: We can handle operations which have an neutral rhs value
19063 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19064 // of profit in a more explicit manner.
19065 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19066 return SDValue();
19067
19068 LHSOps.push_back(Op.getOperand(0));
19069 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19070 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19071 return SDValue();
19072 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19073 // have different LHS and RHS types.
19074 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19075 return SDValue();
19076
19077 RHSOps.push_back(Op.getOperand(1));
19078 }
19079
19080 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19081 DAG.getBuildVector(VT, DL, RHSOps));
19082}
19083
19085 ElementCount OpEC = OpVT.getVectorElementCount();
19086 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19087 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19088}
19089
19090/// Given fixed length vectors A and B with equal element types, but possibly
19091/// different number of elements, return A + B where either A or B is zero
19092/// padded to the larger number of elements.
19094 SelectionDAG &DAG) {
19095 // NOTE: Manually doing the extract/add/insert scheme produces
19096 // significantly better codegen than the naive pad with zeros
19097 // and add scheme.
19098 EVT AVT = A.getValueType();
19099 EVT BVT = B.getValueType();
19102 std::swap(A, B);
19103 std::swap(AVT, BVT);
19104 }
19105
19106 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19107 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19108 return DAG.getInsertSubvector(DL, B, Res, 0);
19109}
19110
19112 SelectionDAG &DAG,
19113 const RISCVSubtarget &Subtarget,
19114 const RISCVTargetLowering &TLI) {
19115 using namespace SDPatternMatch;
19116 // Note: We intentionally do not check the legality of the reduction type.
19117 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19118 // intermediate types flow through here.
19119 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19121 return SDValue();
19122
19123 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19124 // form).
19125 SDValue A, B;
19126 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19127 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19128 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19129 if (AOpt || BOpt) {
19130 if (AOpt)
19131 A = AOpt;
19132 if (BOpt)
19133 B = BOpt;
19134 // From here, we're doing A + B with mixed types, implicitly zero
19135 // padded to the wider type. Note that we *don't* need the result
19136 // type to be the original VT, and in fact prefer narrower ones
19137 // if possible.
19138 return getZeroPaddedAdd(DL, A, B, DAG);
19139 }
19140 }
19141
19142 // zext a <--> partial_reduce_umla 0, a, 1
19143 // sext a <--> partial_reduce_smla 0, a, 1
19144 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19145 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19146 SDValue A = InVec.getOperand(0);
19147 EVT OpVT = A.getValueType();
19148 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19149 return SDValue();
19150
19151 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19152 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19153 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19154 unsigned Opc =
19155 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19156 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19157 }
19158
19159 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19160 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19161 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19162 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19163 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19164 return SDValue();
19165
19166 if (!ISD::isExtOpcode(A.getOpcode()))
19167 return SDValue();
19168
19169 EVT OpVT = A.getOperand(0).getValueType();
19170 if (OpVT.getVectorElementType() != MVT::i8 ||
19171 OpVT != B.getOperand(0).getValueType() ||
19172 !TLI.isTypeLegal(A.getValueType()))
19173 return SDValue();
19174
19175 unsigned Opc;
19176 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19177 Opc = ISD::PARTIAL_REDUCE_SMLA;
19178 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19179 B.getOpcode() == ISD::ZERO_EXTEND)
19180 Opc = ISD::PARTIAL_REDUCE_UMLA;
19181 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19182 B.getOpcode() == ISD::ZERO_EXTEND)
19183 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19184 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19185 B.getOpcode() == ISD::SIGN_EXTEND) {
19186 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19187 std::swap(A, B);
19188 } else
19189 return SDValue();
19190
19191 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19192 return DAG.getNode(
19193 Opc, DL, ResVT,
19194 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19195}
19196
19198 const RISCVSubtarget &Subtarget,
19199 const RISCVTargetLowering &TLI) {
19200 if (!Subtarget.hasStdExtZvqdotq())
19201 return SDValue();
19202
19203 SDLoc DL(N);
19204 EVT VT = N->getValueType(0);
19205 SDValue InVec = N->getOperand(0);
19206 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19207 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19208 return SDValue();
19209}
19210
19212 const RISCVSubtarget &Subtarget,
19213 const RISCVTargetLowering &TLI) {
19214 SDValue InVec = N->getOperand(0);
19215 SDValue InVal = N->getOperand(1);
19216 SDValue EltNo = N->getOperand(2);
19217 SDLoc DL(N);
19218
19219 EVT VT = InVec.getValueType();
19220 if (VT.isScalableVector())
19221 return SDValue();
19222
19223 if (!InVec.hasOneUse())
19224 return SDValue();
19225
19226 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19227 // move the insert_vector_elts into the arms of the binop. Note that
19228 // the new RHS must be a constant.
19229 const unsigned InVecOpcode = InVec->getOpcode();
19230 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19231 InVal.hasOneUse()) {
19232 SDValue InVecLHS = InVec->getOperand(0);
19233 SDValue InVecRHS = InVec->getOperand(1);
19234 SDValue InValLHS = InVal->getOperand(0);
19235 SDValue InValRHS = InVal->getOperand(1);
19236
19238 return SDValue();
19239 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19240 return SDValue();
19241 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19242 // have different LHS and RHS types.
19243 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19244 return SDValue();
19246 InVecLHS, InValLHS, EltNo);
19248 InVecRHS, InValRHS, EltNo);
19249 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19250 }
19251
19252 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19253 // move the insert_vector_elt to the source operand of the concat_vector.
19254 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19255 return SDValue();
19256
19257 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19258 if (!IndexC)
19259 return SDValue();
19260 unsigned Elt = IndexC->getZExtValue();
19261
19262 EVT ConcatVT = InVec.getOperand(0).getValueType();
19263 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19264 return SDValue();
19265 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19266 unsigned NewIdx = Elt % ConcatNumElts;
19267
19268 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19269 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19270 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19271
19272 SmallVector<SDValue> ConcatOps(InVec->ops());
19273 ConcatOps[ConcatOpIdx] = ConcatOp;
19274 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19275}
19276
19277// If we're concatenating a series of vector loads like
19278// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19279// Then we can turn this into a strided load by widening the vector elements
19280// vlse32 p, stride=n
19282 const RISCVSubtarget &Subtarget,
19283 const RISCVTargetLowering &TLI) {
19284 SDLoc DL(N);
19285 EVT VT = N->getValueType(0);
19286
19287 // Only perform this combine on legal MVTs.
19288 if (!TLI.isTypeLegal(VT))
19289 return SDValue();
19290
19291 // TODO: Potentially extend this to scalable vectors
19292 if (VT.isScalableVector())
19293 return SDValue();
19294
19295 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19296 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19297 !SDValue(BaseLd, 0).hasOneUse())
19298 return SDValue();
19299
19300 EVT BaseLdVT = BaseLd->getValueType(0);
19301
19302 // Go through the loads and check that they're strided
19304 Lds.push_back(BaseLd);
19305 Align Align = BaseLd->getAlign();
19306 for (SDValue Op : N->ops().drop_front()) {
19307 auto *Ld = dyn_cast<LoadSDNode>(Op);
19308 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19309 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19310 Ld->getValueType(0) != BaseLdVT)
19311 return SDValue();
19312
19313 Lds.push_back(Ld);
19314
19315 // The common alignment is the most restrictive (smallest) of all the loads
19316 Align = std::min(Align, Ld->getAlign());
19317 }
19318
19319 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19320 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19321 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19322 // If the load ptrs can be decomposed into a common (Base + Index) with a
19323 // common constant stride, then return the constant stride.
19324 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19325 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19326 if (BIO1.equalBaseIndex(BIO2, DAG))
19327 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19328
19329 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19330 SDValue P1 = Ld1->getBasePtr();
19331 SDValue P2 = Ld2->getBasePtr();
19332 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19333 return {{P2.getOperand(1), false}};
19334 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19335 return {{P1.getOperand(1), true}};
19336
19337 return std::nullopt;
19338 };
19339
19340 // Get the distance between the first and second loads
19341 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19342 if (!BaseDiff)
19343 return SDValue();
19344
19345 // Check all the loads are the same distance apart
19346 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19347 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19348 return SDValue();
19349
19350 // TODO: At this point, we've successfully matched a generalized gather
19351 // load. Maybe we should emit that, and then move the specialized
19352 // matchers above and below into a DAG combine?
19353
19354 // Get the widened scalar type, e.g. v4i8 -> i64
19355 unsigned WideScalarBitWidth =
19356 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19357 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19358
19359 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19360 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19361 if (!TLI.isTypeLegal(WideVecVT))
19362 return SDValue();
19363
19364 // Check that the operation is legal
19365 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19366 return SDValue();
19367
19368 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19369 SDValue Stride =
19370 std::holds_alternative<SDValue>(StrideVariant)
19371 ? std::get<SDValue>(StrideVariant)
19372 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19373 Lds[0]->getOffset().getValueType());
19374 if (MustNegateStride)
19375 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19376
19377 SDValue AllOneMask =
19378 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19379 DAG.getConstant(1, DL, MVT::i1));
19380
19381 uint64_t MemSize;
19382 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19383 ConstStride && ConstStride->getSExtValue() >= 0)
19384 // total size = (elsize * n) + (stride - elsize) * (n-1)
19385 // = elsize + stride * (n-1)
19386 MemSize = WideScalarVT.getSizeInBits() +
19387 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19388 else
19389 // If Stride isn't constant, then we can't know how much it will load
19391
19393 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19394 Align);
19395
19396 SDValue StridedLoad = DAG.getStridedLoadVP(
19397 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19398 AllOneMask,
19399 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19400
19401 for (SDValue Ld : N->ops())
19402 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19403
19404 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19405}
19406
19408 const RISCVSubtarget &Subtarget,
19409 const RISCVTargetLowering &TLI) {
19410 SDLoc DL(N);
19411 EVT VT = N->getValueType(0);
19412 const unsigned ElementSize = VT.getScalarSizeInBits();
19413 const unsigned NumElts = VT.getVectorNumElements();
19414 SDValue V1 = N->getOperand(0);
19415 SDValue V2 = N->getOperand(1);
19416 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19417 MVT XLenVT = Subtarget.getXLenVT();
19418
19419 // Recognized a disguised select of add/sub.
19420 bool SwapCC;
19421 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19422 matchSelectAddSub(V1, V2, SwapCC)) {
19423 SDValue Sub = SwapCC ? V1 : V2;
19424 SDValue A = Sub.getOperand(0);
19425 SDValue B = Sub.getOperand(1);
19426
19427 SmallVector<SDValue> MaskVals;
19428 for (int MaskIndex : Mask) {
19429 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19430 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19431 }
19432 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19433 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19434 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19435
19436 // Arrange the select such that we can match a masked
19437 // vrsub.vi to perform the conditional negate
19438 SDValue NegB = DAG.getNegative(B, DL, VT);
19439 if (!SwapCC)
19440 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19441 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19442 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19443 }
19444
19445 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19446 // during the combine phase before type legalization, and relies on
19447 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19448 // for the source mask.
19449 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19450 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19451 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19452 return SDValue();
19453
19454 SmallVector<int, 8> NewMask;
19455 narrowShuffleMaskElts(2, Mask, NewMask);
19456
19457 LLVMContext &C = *DAG.getContext();
19458 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19459 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19460 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19461 DAG.getBitcast(NewVT, V2), NewMask);
19462 return DAG.getBitcast(VT, Res);
19463}
19464
19466 const RISCVSubtarget &Subtarget) {
19467 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19468
19469 if (N->getValueType(0).isFixedLengthVector())
19470 return SDValue();
19471
19472 SDValue Addend = N->getOperand(0);
19473 SDValue MulOp = N->getOperand(1);
19474
19475 if (N->getOpcode() == RISCVISD::ADD_VL) {
19476 SDValue AddPassthruOp = N->getOperand(2);
19477 if (!AddPassthruOp.isUndef())
19478 return SDValue();
19479 }
19480
19481 auto IsVWMulOpc = [](unsigned Opc) {
19482 switch (Opc) {
19483 case RISCVISD::VWMUL_VL:
19484 case RISCVISD::VWMULU_VL:
19485 case RISCVISD::VWMULSU_VL:
19486 return true;
19487 default:
19488 return false;
19489 }
19490 };
19491
19492 if (!IsVWMulOpc(MulOp.getOpcode()))
19493 std::swap(Addend, MulOp);
19494
19495 if (!IsVWMulOpc(MulOp.getOpcode()))
19496 return SDValue();
19497
19498 SDValue MulPassthruOp = MulOp.getOperand(2);
19499
19500 if (!MulPassthruOp.isUndef())
19501 return SDValue();
19502
19503 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19504 const RISCVSubtarget &Subtarget) {
19505 if (N->getOpcode() == ISD::ADD) {
19506 SDLoc DL(N);
19507 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19508 Subtarget);
19509 }
19510 return std::make_pair(N->getOperand(3), N->getOperand(4));
19511 }(N, DAG, Subtarget);
19512
19513 SDValue MulMask = MulOp.getOperand(3);
19514 SDValue MulVL = MulOp.getOperand(4);
19515
19516 if (AddMask != MulMask || AddVL != MulVL)
19517 return SDValue();
19518
19519 const auto &TSInfo =
19520 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19521 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19522
19523 SDLoc DL(N);
19524 EVT VT = N->getValueType(0);
19525 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19526 AddVL};
19527 return DAG.getNode(Opc, DL, VT, Ops);
19528}
19529
19531 const RISCVSubtarget &Subtarget) {
19532
19533 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19534
19535 if (!N->getValueType(0).isVector())
19536 return SDValue();
19537
19538 SDValue Addend = N->getOperand(0);
19539 SDValue DotOp = N->getOperand(1);
19540
19541 if (N->getOpcode() == RISCVISD::ADD_VL) {
19542 SDValue AddPassthruOp = N->getOperand(2);
19543 if (!AddPassthruOp.isUndef())
19544 return SDValue();
19545 }
19546
19547 auto IsVqdotqOpc = [](unsigned Opc) {
19548 switch (Opc) {
19549 case RISCVISD::VQDOT_VL:
19550 case RISCVISD::VQDOTU_VL:
19551 case RISCVISD::VQDOTSU_VL:
19552 return true;
19553 default:
19554 return false;
19555 }
19556 };
19557
19558 if (!IsVqdotqOpc(DotOp.getOpcode()))
19559 std::swap(Addend, DotOp);
19560
19561 if (!IsVqdotqOpc(DotOp.getOpcode()))
19562 return SDValue();
19563
19564 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19565 const RISCVSubtarget &Subtarget) {
19566 if (N->getOpcode() == ISD::ADD) {
19567 SDLoc DL(N);
19568 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19569 Subtarget);
19570 }
19571 return std::make_pair(N->getOperand(3), N->getOperand(4));
19572 }(N, DAG, Subtarget);
19573
19574 SDValue MulVL = DotOp.getOperand(4);
19575 if (AddVL != MulVL)
19576 return SDValue();
19577
19578 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19579 AddMask.getOperand(0) != MulVL)
19580 return SDValue();
19581
19582 SDValue AccumOp = DotOp.getOperand(2);
19583 SDLoc DL(N);
19584 EVT VT = N->getValueType(0);
19585 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19586 DAG.getUNDEF(VT), AddMask, AddVL);
19587
19588 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19589 DotOp.getOperand(3), DotOp->getOperand(4)};
19590 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19591}
19592
19593static bool
19595 ISD::MemIndexType &IndexType,
19597 if (!DCI.isBeforeLegalize())
19598 return false;
19599
19600 SelectionDAG &DAG = DCI.DAG;
19601 const MVT XLenVT =
19602 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19603
19604 const EVT IndexVT = Index.getValueType();
19605
19606 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19607 // mode, so anything else must be manually legalized.
19608 if (!isIndexTypeSigned(IndexType))
19609 return false;
19610
19611 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19612 // Any index legalization should first promote to XLenVT, so we don't lose
19613 // bits when scaling. This may create an illegal index type so we let
19614 // LLVM's legalization take care of the splitting.
19615 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19616 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19617 IndexVT.changeVectorElementType(XLenVT), Index);
19618 }
19619 IndexType = ISD::UNSIGNED_SCALED;
19620 return true;
19621}
19622
19623/// Match the index vector of a scatter or gather node as the shuffle mask
19624/// which performs the rearrangement if possible. Will only match if
19625/// all lanes are touched, and thus replacing the scatter or gather with
19626/// a unit strided access and shuffle is legal.
19627static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19628 SmallVector<int> &ShuffleMask) {
19629 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19630 return false;
19631 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19632 return false;
19633
19634 const unsigned ElementSize = VT.getScalarStoreSize();
19635 const unsigned NumElems = VT.getVectorNumElements();
19636
19637 // Create the shuffle mask and check all bits active
19638 assert(ShuffleMask.empty());
19639 BitVector ActiveLanes(NumElems);
19640 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19641 // TODO: We've found an active bit of UB, and could be
19642 // more aggressive here if desired.
19643 if (Index->getOperand(i)->isUndef())
19644 return false;
19645 uint64_t C = Index->getConstantOperandVal(i);
19646 if (C % ElementSize != 0)
19647 return false;
19648 C = C / ElementSize;
19649 if (C >= NumElems)
19650 return false;
19651 ShuffleMask.push_back(C);
19652 ActiveLanes.set(C);
19653 }
19654 return ActiveLanes.all();
19655}
19656
19657/// Match the index of a gather or scatter operation as an operation
19658/// with twice the element width and half the number of elements. This is
19659/// generally profitable (if legal) because these operations are linear
19660/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19661/// come out ahead.
19662static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19663 Align BaseAlign, const RISCVSubtarget &ST) {
19664 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19665 return false;
19666 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19667 return false;
19668
19669 // Attempt a doubling. If we can use a element type 4x or 8x in
19670 // size, this will happen via multiply iterations of the transform.
19671 const unsigned NumElems = VT.getVectorNumElements();
19672 if (NumElems % 2 != 0)
19673 return false;
19674
19675 const unsigned ElementSize = VT.getScalarStoreSize();
19676 const unsigned WiderElementSize = ElementSize * 2;
19677 if (WiderElementSize > ST.getELen()/8)
19678 return false;
19679
19680 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19681 return false;
19682
19683 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19684 // TODO: We've found an active bit of UB, and could be
19685 // more aggressive here if desired.
19686 if (Index->getOperand(i)->isUndef())
19687 return false;
19688 // TODO: This offset check is too strict if we support fully
19689 // misaligned memory operations.
19690 uint64_t C = Index->getConstantOperandVal(i);
19691 if (i % 2 == 0) {
19692 if (C % WiderElementSize != 0)
19693 return false;
19694 continue;
19695 }
19696 uint64_t Last = Index->getConstantOperandVal(i-1);
19697 if (C != Last + ElementSize)
19698 return false;
19699 }
19700 return true;
19701}
19702
19703// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19704// This would be benefit for the cases where X and Y are both the same value
19705// type of low precision vectors. Since the truncate would be lowered into
19706// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19707// restriction, such pattern would be expanded into a series of "vsetvli"
19708// and "vnsrl" instructions later to reach this point.
19710 SDValue Mask = N->getOperand(1);
19711 SDValue VL = N->getOperand(2);
19712
19713 bool IsVLMAX = isAllOnesConstant(VL) ||
19714 (isa<RegisterSDNode>(VL) &&
19715 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19716 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19717 Mask.getOperand(0) != VL)
19718 return SDValue();
19719
19720 auto IsTruncNode = [&](SDValue V) {
19721 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19722 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19723 };
19724
19725 SDValue Op = N->getOperand(0);
19726
19727 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19728 // to distinguish such pattern.
19729 while (IsTruncNode(Op)) {
19730 if (!Op.hasOneUse())
19731 return SDValue();
19732 Op = Op.getOperand(0);
19733 }
19734
19735 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19736 return SDValue();
19737
19738 SDValue N0 = Op.getOperand(0);
19739 SDValue N1 = Op.getOperand(1);
19740 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19741 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19742 return SDValue();
19743
19744 SDValue N00 = N0.getOperand(0);
19745 SDValue N10 = N1.getOperand(0);
19746 if (!N00.getValueType().isVector() ||
19747 N00.getValueType() != N10.getValueType() ||
19748 N->getValueType(0) != N10.getValueType())
19749 return SDValue();
19750
19751 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19752 SDValue SMin =
19753 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19754 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19755 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19756}
19757
19758// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19759// maximum value for the truncated type.
19760// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19761// is the signed maximum value for the truncated type and C2 is the signed
19762// minimum value.
19764 const RISCVSubtarget &Subtarget) {
19765 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19766
19767 MVT VT = N->getSimpleValueType(0);
19768
19769 SDValue Mask = N->getOperand(1);
19770 SDValue VL = N->getOperand(2);
19771
19772 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19773 APInt &SplatVal) {
19774 if (V.getOpcode() != Opc &&
19775 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19776 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19777 return SDValue();
19778
19779 SDValue Op = V.getOperand(1);
19780
19781 // Peek through conversion between fixed and scalable vectors.
19782 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19783 isNullConstant(Op.getOperand(2)) &&
19784 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19785 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19786 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19787 isNullConstant(Op.getOperand(1).getOperand(1)))
19788 Op = Op.getOperand(1).getOperand(0);
19789
19790 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19791 return V.getOperand(0);
19792
19793 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19794 Op.getOperand(2) == VL) {
19795 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19796 SplatVal =
19797 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19798 return V.getOperand(0);
19799 }
19800 }
19801
19802 return SDValue();
19803 };
19804
19805 SDLoc DL(N);
19806
19807 auto DetectUSatPattern = [&](SDValue V) {
19808 APInt LoC, HiC;
19809
19810 // Simple case, V is a UMIN.
19811 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19812 if (HiC.isMask(VT.getScalarSizeInBits()))
19813 return UMinOp;
19814
19815 // If we have an SMAX that removes negative numbers first, then we can match
19816 // SMIN instead of UMIN.
19817 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19818 if (SDValue SMaxOp =
19819 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19820 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19821 return SMinOp;
19822
19823 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19824 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19825 // first.
19826 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19827 if (SDValue SMinOp =
19828 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19829 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19830 HiC.uge(LoC))
19831 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19832 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19833 Mask, VL);
19834
19835 return SDValue();
19836 };
19837
19838 auto DetectSSatPattern = [&](SDValue V) {
19839 unsigned NumDstBits = VT.getScalarSizeInBits();
19840 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19841 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19842 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19843
19844 APInt HiC, LoC;
19845 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19846 if (SDValue SMaxOp =
19847 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19848 if (HiC == SignedMax && LoC == SignedMin)
19849 return SMaxOp;
19850
19851 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19852 if (SDValue SMinOp =
19853 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19854 if (HiC == SignedMax && LoC == SignedMin)
19855 return SMinOp;
19856
19857 return SDValue();
19858 };
19859
19860 SDValue Src = N->getOperand(0);
19861
19862 // Look through multiple layers of truncates.
19863 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19864 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19865 Src.hasOneUse())
19866 Src = Src.getOperand(0);
19867
19868 SDValue Val;
19869 unsigned ClipOpc;
19870 if ((Val = DetectUSatPattern(Src)))
19871 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19872 else if ((Val = DetectSSatPattern(Src)))
19873 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19874 else
19875 return SDValue();
19876
19877 MVT ValVT = Val.getSimpleValueType();
19878
19879 do {
19880 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19881 ValVT = ValVT.changeVectorElementType(ValEltVT);
19882 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19883 } while (ValVT != VT);
19884
19885 return Val;
19886}
19887
19888// Convert
19889// (iX ctpop (bitcast (vXi1 A)))
19890// ->
19891// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19892// and
19893// (iN reduce.add (zext (vXi1 A to vXiN))
19894// ->
19895// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19896// FIXME: It's complicated to match all the variations of this after type
19897// legalization so we only handle the pre-type legalization pattern, but that
19898// requires the fixed vector type to be legal.
19900 const RISCVSubtarget &Subtarget) {
19901 unsigned Opc = N->getOpcode();
19902 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
19903 "Unexpected opcode");
19904 EVT VT = N->getValueType(0);
19905 if (!VT.isScalarInteger())
19906 return SDValue();
19907
19908 SDValue Src = N->getOperand(0);
19909
19910 if (Opc == ISD::CTPOP) {
19911 // Peek through zero_extend. It doesn't change the count.
19912 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19913 Src = Src.getOperand(0);
19914
19915 if (Src.getOpcode() != ISD::BITCAST)
19916 return SDValue();
19917 Src = Src.getOperand(0);
19918 } else if (Opc == ISD::VECREDUCE_ADD) {
19919 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19920 return SDValue();
19921 Src = Src.getOperand(0);
19922 }
19923
19924 EVT SrcEVT = Src.getValueType();
19925 if (!SrcEVT.isSimple())
19926 return SDValue();
19927
19928 MVT SrcMVT = SrcEVT.getSimpleVT();
19929 // Make sure the input is an i1 vector.
19930 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19931 return SDValue();
19932
19933 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19934 if (!TLI.isTypeLegal(SrcMVT))
19935 return SDValue();
19936
19937 // Check that destination type is large enough to hold result without
19938 // overflow.
19939 if (Opc == ISD::VECREDUCE_ADD) {
19940 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19941 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19942 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19943 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19944 ? SrcMVT.getVectorNumElements()
19946 VectorBitsMax, EltSize, MinSize);
19947 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19948 return SDValue();
19949 }
19950
19951 MVT ContainerVT = SrcMVT;
19952 if (SrcMVT.isFixedLengthVector()) {
19953 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
19954 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
19955 }
19956
19957 SDLoc DL(N);
19958 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
19959
19960 MVT XLenVT = Subtarget.getXLenVT();
19961 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
19962 return DAG.getZExtOrTrunc(Pop, DL, VT);
19963}
19964
19967 const RISCVSubtarget &Subtarget) {
19968 // (shl (zext x), y) -> (vwsll x, y)
19969 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19970 return V;
19971
19972 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
19973 // (shl (zext x), C) -> (vwmulu x, 1u << C)
19974
19975 if (!DCI.isAfterLegalizeDAG())
19976 return SDValue();
19977
19978 SDValue LHS = N->getOperand(0);
19979 if (!LHS.hasOneUse())
19980 return SDValue();
19981 unsigned Opcode;
19982 switch (LHS.getOpcode()) {
19983 case ISD::SIGN_EXTEND:
19984 case RISCVISD::VSEXT_VL:
19985 Opcode = RISCVISD::VWMULSU_VL;
19986 break;
19987 case ISD::ZERO_EXTEND:
19988 case RISCVISD::VZEXT_VL:
19989 Opcode = RISCVISD::VWMULU_VL;
19990 break;
19991 default:
19992 return SDValue();
19993 }
19994
19995 SDValue RHS = N->getOperand(1);
19996 APInt ShAmt;
19997 uint64_t ShAmtInt;
19998 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
19999 ShAmtInt = ShAmt.getZExtValue();
20000 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20001 RHS.getOperand(1).getOpcode() == ISD::Constant)
20002 ShAmtInt = RHS.getConstantOperandVal(1);
20003 else
20004 return SDValue();
20005
20006 // Better foldings:
20007 // (shl (sext x), 1) -> (vwadd x, x)
20008 // (shl (zext x), 1) -> (vwaddu x, x)
20009 if (ShAmtInt <= 1)
20010 return SDValue();
20011
20012 SDValue NarrowOp = LHS.getOperand(0);
20013 MVT NarrowVT = NarrowOp.getSimpleValueType();
20014 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20015 if (ShAmtInt >= NarrowBits)
20016 return SDValue();
20017 MVT VT = N->getSimpleValueType(0);
20018 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20019 return SDValue();
20020
20021 SelectionDAG &DAG = DCI.DAG;
20022 SDLoc DL(N);
20023 SDValue Passthru, Mask, VL;
20024 switch (N->getOpcode()) {
20025 case ISD::SHL:
20026 Passthru = DAG.getUNDEF(VT);
20027 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20028 break;
20029 case RISCVISD::SHL_VL:
20030 Passthru = N->getOperand(2);
20031 Mask = N->getOperand(3);
20032 VL = N->getOperand(4);
20033 break;
20034 default:
20035 llvm_unreachable("Expected SHL");
20036 }
20037 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20038 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20039 Passthru, Mask, VL);
20040}
20041
20043 DAGCombinerInfo &DCI) const {
20044 SelectionDAG &DAG = DCI.DAG;
20045 const MVT XLenVT = Subtarget.getXLenVT();
20046 SDLoc DL(N);
20047
20048 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20049 // bits are demanded. N will be added to the Worklist if it was not deleted.
20050 // Caller should return SDValue(N, 0) if this returns true.
20051 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20052 SDValue Op = N->getOperand(OpNo);
20053 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20054 if (!SimplifyDemandedBits(Op, Mask, DCI))
20055 return false;
20056
20057 if (N->getOpcode() != ISD::DELETED_NODE)
20058 DCI.AddToWorklist(N);
20059 return true;
20060 };
20061
20062 switch (N->getOpcode()) {
20063 default:
20064 break;
20065 case RISCVISD::SplitF64: {
20066 SDValue Op0 = N->getOperand(0);
20067 // If the input to SplitF64 is just BuildPairF64 then the operation is
20068 // redundant. Instead, use BuildPairF64's operands directly.
20069 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20070 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20071
20072 if (Op0->isUndef()) {
20073 SDValue Lo = DAG.getUNDEF(MVT::i32);
20074 SDValue Hi = DAG.getUNDEF(MVT::i32);
20075 return DCI.CombineTo(N, Lo, Hi);
20076 }
20077
20078 // It's cheaper to materialise two 32-bit integers than to load a double
20079 // from the constant pool and transfer it to integer registers through the
20080 // stack.
20082 APInt V = C->getValueAPF().bitcastToAPInt();
20083 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20084 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20085 return DCI.CombineTo(N, Lo, Hi);
20086 }
20087
20088 // This is a target-specific version of a DAGCombine performed in
20089 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20090 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20091 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20092 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20093 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20094 break;
20095 SDValue NewSplitF64 =
20096 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20097 Op0.getOperand(0));
20098 SDValue Lo = NewSplitF64.getValue(0);
20099 SDValue Hi = NewSplitF64.getValue(1);
20100 APInt SignBit = APInt::getSignMask(32);
20101 if (Op0.getOpcode() == ISD::FNEG) {
20102 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20103 DAG.getConstant(SignBit, DL, MVT::i32));
20104 return DCI.CombineTo(N, Lo, NewHi);
20105 }
20106 assert(Op0.getOpcode() == ISD::FABS);
20107 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20108 DAG.getConstant(~SignBit, DL, MVT::i32));
20109 return DCI.CombineTo(N, Lo, NewHi);
20110 }
20111 case RISCVISD::SLLW:
20112 case RISCVISD::SRAW:
20113 case RISCVISD::SRLW:
20114 case RISCVISD::RORW:
20115 case RISCVISD::ROLW: {
20116 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20117 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20118 SimplifyDemandedLowBitsHelper(1, 5))
20119 return SDValue(N, 0);
20120
20121 break;
20122 }
20123 case RISCVISD::CLZW:
20124 case RISCVISD::CTZW: {
20125 // Only the lower 32 bits of the first operand are read
20126 if (SimplifyDemandedLowBitsHelper(0, 32))
20127 return SDValue(N, 0);
20128 break;
20129 }
20130 case RISCVISD::FMV_W_X_RV64: {
20131 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20132 // conversion is unnecessary and can be replaced with the
20133 // FMV_X_ANYEXTW_RV64 operand.
20134 SDValue Op0 = N->getOperand(0);
20135 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20136 return Op0.getOperand(0);
20137 break;
20138 }
20139 case RISCVISD::FMV_X_ANYEXTH:
20140 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20141 SDLoc DL(N);
20142 SDValue Op0 = N->getOperand(0);
20143 MVT VT = N->getSimpleValueType(0);
20144
20145 // Constant fold.
20146 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20147 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20148 return DAG.getConstant(Val, DL, VT);
20149 }
20150
20151 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20152 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20153 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20154 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20155 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20156 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20157 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20158 assert(Op0.getOperand(0).getValueType() == VT &&
20159 "Unexpected value type!");
20160 return Op0.getOperand(0);
20161 }
20162
20163 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20164 cast<LoadSDNode>(Op0)->isSimple()) {
20166 auto *LN0 = cast<LoadSDNode>(Op0);
20167 SDValue Load =
20168 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20169 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20170 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20171 return Load;
20172 }
20173
20174 // This is a target-specific version of a DAGCombine performed in
20175 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20176 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20177 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20178 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20179 !Op0.getNode()->hasOneUse())
20180 break;
20181 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20182 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20183 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20184 if (Op0.getOpcode() == ISD::FNEG)
20185 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20186 DAG.getConstant(SignBit, DL, VT));
20187
20188 assert(Op0.getOpcode() == ISD::FABS);
20189 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20190 DAG.getConstant(~SignBit, DL, VT));
20191 }
20192 case ISD::ABS: {
20193 EVT VT = N->getValueType(0);
20194 SDValue N0 = N->getOperand(0);
20195 // abs (sext) -> zext (abs)
20196 // abs (zext) -> zext (handled elsewhere)
20197 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20198 SDValue Src = N0.getOperand(0);
20199 SDLoc DL(N);
20200 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20201 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20202 }
20203 break;
20204 }
20205 case ISD::ADD: {
20206 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20207 return V;
20208 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20209 return V;
20210 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20211 return V;
20212 return performADDCombine(N, DCI, Subtarget);
20213 }
20214 case ISD::SUB: {
20215 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20216 return V;
20217 return performSUBCombine(N, DAG, Subtarget);
20218 }
20219 case ISD::AND:
20220 return performANDCombine(N, DCI, Subtarget);
20221 case ISD::OR: {
20222 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20223 return V;
20224 return performORCombine(N, DCI, Subtarget);
20225 }
20226 case ISD::XOR:
20227 return performXORCombine(N, DAG, Subtarget);
20228 case ISD::MUL:
20229 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20230 return V;
20231 return performMULCombine(N, DAG, DCI, Subtarget);
20232 case ISD::SDIV:
20233 case ISD::UDIV:
20234 case ISD::SREM:
20235 case ISD::UREM:
20236 if (SDValue V = combineBinOpOfZExt(N, DAG))
20237 return V;
20238 break;
20239 case ISD::FMUL: {
20240 using namespace SDPatternMatch;
20241 SDLoc DL(N);
20242 EVT VT = N->getValueType(0);
20243 SDValue X, Y;
20244 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20245 // hoistFNegAboveFMulFDiv.
20246 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20248 return DAG.getNode(ISD::FNEG, DL, VT,
20249 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20250
20251 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20252 SDValue N0 = N->getOperand(0);
20253 SDValue N1 = N->getOperand(1);
20254 if (N0->getOpcode() != ISD::FCOPYSIGN)
20255 std::swap(N0, N1);
20256 if (N0->getOpcode() != ISD::FCOPYSIGN)
20257 return SDValue();
20259 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20260 return SDValue();
20261 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20262 return SDValue();
20263 SDValue Sign = N0->getOperand(1);
20264 if (Sign.getValueType() != VT)
20265 return SDValue();
20266 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20267 }
20268 case ISD::FADD:
20269 case ISD::UMAX:
20270 case ISD::UMIN:
20271 case ISD::SMAX:
20272 case ISD::SMIN:
20273 case ISD::FMAXNUM:
20274 case ISD::FMINNUM: {
20275 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20276 return V;
20277 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20278 return V;
20279 return SDValue();
20280 }
20281 case ISD::SETCC:
20282 return performSETCCCombine(N, DCI, Subtarget);
20284 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20285 case ISD::ZERO_EXTEND:
20286 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20287 // type legalization. This is safe because fp_to_uint produces poison if
20288 // it overflows.
20289 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20290 SDValue Src = N->getOperand(0);
20291 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20292 isTypeLegal(Src.getOperand(0).getValueType()))
20293 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20294 Src.getOperand(0));
20295 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20296 isTypeLegal(Src.getOperand(1).getValueType())) {
20297 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20298 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20299 Src.getOperand(0), Src.getOperand(1));
20300 DCI.CombineTo(N, Res);
20301 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20302 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20303 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20304 }
20305 }
20306 return SDValue();
20307 case RISCVISD::TRUNCATE_VECTOR_VL:
20308 if (SDValue V = combineTruncOfSraSext(N, DAG))
20309 return V;
20310 return combineTruncToVnclip(N, DAG, Subtarget);
20311 case ISD::VP_TRUNCATE:
20312 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20313 case ISD::TRUNCATE:
20314 return performTRUNCATECombine(N, DAG, Subtarget);
20315 case ISD::SELECT:
20316 return performSELECTCombine(N, DAG, Subtarget);
20317 case ISD::VSELECT:
20318 return performVSELECTCombine(N, DAG);
20319 case RISCVISD::CZERO_EQZ:
20320 case RISCVISD::CZERO_NEZ: {
20321 SDValue Val = N->getOperand(0);
20322 SDValue Cond = N->getOperand(1);
20323
20324 unsigned Opc = N->getOpcode();
20325
20326 // czero_eqz x, x -> x
20327 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20328 return Val;
20329
20330 unsigned InvOpc =
20331 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20332
20333 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20334 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20335 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20336 SDValue NewCond = Cond.getOperand(0);
20337 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20338 if (DAG.MaskedValueIsZero(NewCond, Mask))
20339 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20340 }
20341 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20342 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20343 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20344 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20345 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20346 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20347 if (ISD::isIntEqualitySetCC(CCVal))
20348 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20349 N->getValueType(0), Val, Cond.getOperand(0));
20350 }
20351 return SDValue();
20352 }
20353 case RISCVISD::SELECT_CC: {
20354 // Transform
20355 SDValue LHS = N->getOperand(0);
20356 SDValue RHS = N->getOperand(1);
20357 SDValue CC = N->getOperand(2);
20358 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20359 SDValue TrueV = N->getOperand(3);
20360 SDValue FalseV = N->getOperand(4);
20361 SDLoc DL(N);
20362 EVT VT = N->getValueType(0);
20363
20364 // If the True and False values are the same, we don't need a select_cc.
20365 if (TrueV == FalseV)
20366 return TrueV;
20367
20368 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20369 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20370 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20371 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20372 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20373 if (CCVal == ISD::CondCode::SETGE)
20374 std::swap(TrueV, FalseV);
20375
20376 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20377 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20378 // Only handle simm12, if it is not in this range, it can be considered as
20379 // register.
20380 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20381 isInt<12>(TrueSImm - FalseSImm)) {
20382 SDValue SRA =
20383 DAG.getNode(ISD::SRA, DL, VT, LHS,
20384 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20385 SDValue AND =
20386 DAG.getNode(ISD::AND, DL, VT, SRA,
20387 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20388 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20389 }
20390
20391 if (CCVal == ISD::CondCode::SETGE)
20392 std::swap(TrueV, FalseV);
20393 }
20394
20395 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20396 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20397 {LHS, RHS, CC, TrueV, FalseV});
20398
20399 if (!Subtarget.hasConditionalMoveFusion()) {
20400 // (select c, -1, y) -> -c | y
20401 if (isAllOnesConstant(TrueV)) {
20402 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20403 SDValue Neg = DAG.getNegative(C, DL, VT);
20404 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20405 }
20406 // (select c, y, -1) -> -!c | y
20407 if (isAllOnesConstant(FalseV)) {
20408 SDValue C =
20409 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20410 SDValue Neg = DAG.getNegative(C, DL, VT);
20411 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20412 }
20413
20414 // (select c, 0, y) -> -!c & y
20415 if (isNullConstant(TrueV)) {
20416 SDValue C =
20417 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20418 SDValue Neg = DAG.getNegative(C, DL, VT);
20419 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20420 }
20421 // (select c, y, 0) -> -c & y
20422 if (isNullConstant(FalseV)) {
20423 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20424 SDValue Neg = DAG.getNegative(C, DL, VT);
20425 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20426 }
20427 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20428 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20429 if (((isOneConstant(FalseV) && LHS == TrueV &&
20430 CCVal == ISD::CondCode::SETNE) ||
20431 (isOneConstant(TrueV) && LHS == FalseV &&
20432 CCVal == ISD::CondCode::SETEQ)) &&
20433 isNullConstant(RHS)) {
20434 // freeze it to be safe.
20435 LHS = DAG.getFreeze(LHS);
20436 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20437 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20438 }
20439 }
20440
20441 // If both true/false are an xor with 1, pull through the select.
20442 // This can occur after op legalization if both operands are setccs that
20443 // require an xor to invert.
20444 // FIXME: Generalize to other binary ops with identical operand?
20445 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20446 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20447 isOneConstant(TrueV.getOperand(1)) &&
20448 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20449 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20450 TrueV.getOperand(0), FalseV.getOperand(0));
20451 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20452 }
20453
20454 return SDValue();
20455 }
20456 case RISCVISD::BR_CC: {
20457 SDValue LHS = N->getOperand(1);
20458 SDValue RHS = N->getOperand(2);
20459 SDValue CC = N->getOperand(3);
20460 SDLoc DL(N);
20461
20462 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20463 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20464 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20465
20466 return SDValue();
20467 }
20468 case ISD::BITREVERSE:
20469 return performBITREVERSECombine(N, DAG, Subtarget);
20470 case ISD::FP_TO_SINT:
20471 case ISD::FP_TO_UINT:
20472 return performFP_TO_INTCombine(N, DCI, Subtarget);
20475 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20476 case ISD::FCOPYSIGN: {
20477 EVT VT = N->getValueType(0);
20478 if (!VT.isVector())
20479 break;
20480 // There is a form of VFSGNJ which injects the negated sign of its second
20481 // operand. Try and bubble any FNEG up after the extend/round to produce
20482 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20483 // TRUNC=1.
20484 SDValue In2 = N->getOperand(1);
20485 // Avoid cases where the extend/round has multiple uses, as duplicating
20486 // those is typically more expensive than removing a fneg.
20487 if (!In2.hasOneUse())
20488 break;
20489 if (In2.getOpcode() != ISD::FP_EXTEND &&
20490 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20491 break;
20492 In2 = In2.getOperand(0);
20493 if (In2.getOpcode() != ISD::FNEG)
20494 break;
20495 SDLoc DL(N);
20496 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20497 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20498 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20499 }
20500 case ISD::MGATHER: {
20501 const auto *MGN = cast<MaskedGatherSDNode>(N);
20502 const EVT VT = N->getValueType(0);
20503 SDValue Index = MGN->getIndex();
20504 SDValue ScaleOp = MGN->getScale();
20505 ISD::MemIndexType IndexType = MGN->getIndexType();
20506 assert(!MGN->isIndexScaled() &&
20507 "Scaled gather/scatter should not be formed");
20508
20509 SDLoc DL(N);
20510 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20511 return DAG.getMaskedGather(
20512 N->getVTList(), MGN->getMemoryVT(), DL,
20513 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20514 MGN->getBasePtr(), Index, ScaleOp},
20515 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20516
20517 if (narrowIndex(Index, IndexType, DAG))
20518 return DAG.getMaskedGather(
20519 N->getVTList(), MGN->getMemoryVT(), DL,
20520 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20521 MGN->getBasePtr(), Index, ScaleOp},
20522 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20523
20524 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20525 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20526 // The sequence will be XLenVT, not the type of Index. Tell
20527 // isSimpleVIDSequence this so we avoid overflow.
20528 if (std::optional<VIDSequence> SimpleVID =
20529 isSimpleVIDSequence(Index, Subtarget.getXLen());
20530 SimpleVID && SimpleVID->StepDenominator == 1) {
20531 const int64_t StepNumerator = SimpleVID->StepNumerator;
20532 const int64_t Addend = SimpleVID->Addend;
20533
20534 // Note: We don't need to check alignment here since (by assumption
20535 // from the existence of the gather), our offsets must be sufficiently
20536 // aligned.
20537
20538 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20539 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20540 assert(IndexType == ISD::UNSIGNED_SCALED);
20541 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20542 DAG.getSignedConstant(Addend, DL, PtrVT));
20543
20544 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20546 SDValue StridedLoad = DAG.getStridedLoadVP(
20547 VT, DL, MGN->getChain(), BasePtr,
20548 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20549 EVL, MGN->getMemOperand());
20550 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20551 MGN->getPassThru());
20552 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20553 DL);
20554 }
20555 }
20556
20557 SmallVector<int> ShuffleMask;
20558 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20559 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20560 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20561 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20562 MGN->getMask(), DAG.getUNDEF(VT),
20563 MGN->getMemoryVT(), MGN->getMemOperand(),
20565 SDValue Shuffle =
20566 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20567 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20568 }
20569
20570 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20571 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20572 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20573 SmallVector<SDValue> NewIndices;
20574 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20575 NewIndices.push_back(Index.getOperand(i));
20576 EVT IndexVT = Index.getValueType()
20578 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20579
20580 unsigned ElementSize = VT.getScalarStoreSize();
20581 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20582 auto EltCnt = VT.getVectorElementCount();
20583 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20584 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20585 EltCnt.divideCoefficientBy(2));
20586 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20587 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20588 EltCnt.divideCoefficientBy(2));
20589 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20590
20591 SDValue Gather =
20592 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20593 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20594 Index, ScaleOp},
20595 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20596 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20597 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20598 }
20599 break;
20600 }
20601 case ISD::MSCATTER:{
20602 const auto *MSN = cast<MaskedScatterSDNode>(N);
20603 SDValue Index = MSN->getIndex();
20604 SDValue ScaleOp = MSN->getScale();
20605 ISD::MemIndexType IndexType = MSN->getIndexType();
20606 assert(!MSN->isIndexScaled() &&
20607 "Scaled gather/scatter should not be formed");
20608
20609 SDLoc DL(N);
20610 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20611 return DAG.getMaskedScatter(
20612 N->getVTList(), MSN->getMemoryVT(), DL,
20613 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20614 Index, ScaleOp},
20615 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20616
20617 if (narrowIndex(Index, IndexType, DAG))
20618 return DAG.getMaskedScatter(
20619 N->getVTList(), MSN->getMemoryVT(), DL,
20620 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20621 Index, ScaleOp},
20622 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20623
20624 EVT VT = MSN->getValue()->getValueType(0);
20625 SmallVector<int> ShuffleMask;
20626 if (!MSN->isTruncatingStore() &&
20627 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20628 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20629 DAG.getUNDEF(VT), ShuffleMask);
20630 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20631 DAG.getUNDEF(XLenVT), MSN->getMask(),
20632 MSN->getMemoryVT(), MSN->getMemOperand(),
20633 ISD::UNINDEXED, false);
20634 }
20635 break;
20636 }
20637 case ISD::VP_GATHER: {
20638 const auto *VPGN = cast<VPGatherSDNode>(N);
20639 SDValue Index = VPGN->getIndex();
20640 SDValue ScaleOp = VPGN->getScale();
20641 ISD::MemIndexType IndexType = VPGN->getIndexType();
20642 assert(!VPGN->isIndexScaled() &&
20643 "Scaled gather/scatter should not be formed");
20644
20645 SDLoc DL(N);
20646 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20647 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20648 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20649 ScaleOp, VPGN->getMask(),
20650 VPGN->getVectorLength()},
20651 VPGN->getMemOperand(), IndexType);
20652
20653 if (narrowIndex(Index, IndexType, DAG))
20654 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20655 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20656 ScaleOp, VPGN->getMask(),
20657 VPGN->getVectorLength()},
20658 VPGN->getMemOperand(), IndexType);
20659
20660 break;
20661 }
20662 case ISD::VP_SCATTER: {
20663 const auto *VPSN = cast<VPScatterSDNode>(N);
20664 SDValue Index = VPSN->getIndex();
20665 SDValue ScaleOp = VPSN->getScale();
20666 ISD::MemIndexType IndexType = VPSN->getIndexType();
20667 assert(!VPSN->isIndexScaled() &&
20668 "Scaled gather/scatter should not be formed");
20669
20670 SDLoc DL(N);
20671 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20672 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20673 {VPSN->getChain(), VPSN->getValue(),
20674 VPSN->getBasePtr(), Index, ScaleOp,
20675 VPSN->getMask(), VPSN->getVectorLength()},
20676 VPSN->getMemOperand(), IndexType);
20677
20678 if (narrowIndex(Index, IndexType, DAG))
20679 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20680 {VPSN->getChain(), VPSN->getValue(),
20681 VPSN->getBasePtr(), Index, ScaleOp,
20682 VPSN->getMask(), VPSN->getVectorLength()},
20683 VPSN->getMemOperand(), IndexType);
20684 break;
20685 }
20686 case RISCVISD::SHL_VL:
20687 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20688 return V;
20689 [[fallthrough]];
20690 case RISCVISD::SRA_VL:
20691 case RISCVISD::SRL_VL: {
20692 SDValue ShAmt = N->getOperand(1);
20693 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20694 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20695 SDLoc DL(N);
20696 SDValue VL = N->getOperand(4);
20697 EVT VT = N->getValueType(0);
20698 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20699 ShAmt.getOperand(1), VL);
20700 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20701 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20702 }
20703 break;
20704 }
20705 case ISD::SRA:
20706 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20707 return V;
20708 [[fallthrough]];
20709 case ISD::SRL:
20710 case ISD::SHL: {
20711 if (N->getOpcode() == ISD::SHL) {
20712 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20713 return V;
20714 }
20715 SDValue ShAmt = N->getOperand(1);
20716 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20717 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20718 SDLoc DL(N);
20719 EVT VT = N->getValueType(0);
20720 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20721 ShAmt.getOperand(1),
20722 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20723 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20724 }
20725 break;
20726 }
20727 case RISCVISD::ADD_VL:
20728 if (SDValue V = simplifyOp_VL(N))
20729 return V;
20730 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20731 return V;
20732 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20733 return V;
20734 return combineToVWMACC(N, DAG, Subtarget);
20735 case RISCVISD::VWADD_W_VL:
20736 case RISCVISD::VWADDU_W_VL:
20737 case RISCVISD::VWSUB_W_VL:
20738 case RISCVISD::VWSUBU_W_VL:
20739 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20740 case RISCVISD::OR_VL:
20741 case RISCVISD::SUB_VL:
20742 case RISCVISD::MUL_VL:
20743 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20744 case RISCVISD::VFMADD_VL:
20745 case RISCVISD::VFNMADD_VL:
20746 case RISCVISD::VFMSUB_VL:
20747 case RISCVISD::VFNMSUB_VL:
20748 case RISCVISD::STRICT_VFMADD_VL:
20749 case RISCVISD::STRICT_VFNMADD_VL:
20750 case RISCVISD::STRICT_VFMSUB_VL:
20751 case RISCVISD::STRICT_VFNMSUB_VL:
20752 return performVFMADD_VLCombine(N, DCI, Subtarget);
20753 case RISCVISD::FADD_VL:
20754 case RISCVISD::FSUB_VL:
20755 case RISCVISD::FMUL_VL:
20756 case RISCVISD::VFWADD_W_VL:
20757 case RISCVISD::VFWSUB_W_VL:
20758 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20759 case ISD::LOAD:
20760 case ISD::STORE: {
20761 if (DCI.isAfterLegalizeDAG())
20762 if (SDValue V = performMemPairCombine(N, DCI))
20763 return V;
20764
20765 if (N->getOpcode() != ISD::STORE)
20766 break;
20767
20768 auto *Store = cast<StoreSDNode>(N);
20769 SDValue Chain = Store->getChain();
20770 EVT MemVT = Store->getMemoryVT();
20771 SDValue Val = Store->getValue();
20772 SDLoc DL(N);
20773
20774 bool IsScalarizable =
20775 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20776 Store->isSimple() &&
20777 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20778 isPowerOf2_64(MemVT.getSizeInBits()) &&
20779 MemVT.getSizeInBits() <= Subtarget.getXLen();
20780
20781 // If sufficiently aligned we can scalarize stores of constant vectors of
20782 // any power-of-two size up to XLen bits, provided that they aren't too
20783 // expensive to materialize.
20784 // vsetivli zero, 2, e8, m1, ta, ma
20785 // vmv.v.i v8, 4
20786 // vse64.v v8, (a0)
20787 // ->
20788 // li a1, 1028
20789 // sh a1, 0(a0)
20790 if (DCI.isBeforeLegalize() && IsScalarizable &&
20792 // Get the constant vector bits
20793 APInt NewC(Val.getValueSizeInBits(), 0);
20794 uint64_t EltSize = Val.getScalarValueSizeInBits();
20795 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20796 if (Val.getOperand(i).isUndef())
20797 continue;
20798 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20799 i * EltSize);
20800 }
20801 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20802
20803 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20804 true) <= 2 &&
20806 NewVT, *Store->getMemOperand())) {
20807 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20808 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20809 Store->getPointerInfo(), Store->getBaseAlign(),
20810 Store->getMemOperand()->getFlags());
20811 }
20812 }
20813
20814 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20815 // vsetivli zero, 2, e16, m1, ta, ma
20816 // vle16.v v8, (a0)
20817 // vse16.v v8, (a1)
20818 if (auto *L = dyn_cast<LoadSDNode>(Val);
20819 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20820 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20821 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20822 L->getMemoryVT() == MemVT) {
20823 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20825 NewVT, *Store->getMemOperand()) &&
20827 NewVT, *L->getMemOperand())) {
20828 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20829 L->getPointerInfo(), L->getBaseAlign(),
20830 L->getMemOperand()->getFlags());
20831 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20832 Store->getPointerInfo(), Store->getBaseAlign(),
20833 Store->getMemOperand()->getFlags());
20834 }
20835 }
20836
20837 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20838 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20839 // any illegal types.
20840 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20841 (DCI.isAfterLegalizeDAG() &&
20843 isNullConstant(Val.getOperand(1)))) &&
20844 Val.hasOneUse()) {
20845 SDValue Src = Val.getOperand(0);
20846 MVT VecVT = Src.getSimpleValueType();
20847 // VecVT should be scalable and memory VT should match the element type.
20848 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20849 MemVT == VecVT.getVectorElementType()) {
20850 SDLoc DL(N);
20851 MVT MaskVT = getMaskTypeFor(VecVT);
20852 return DAG.getStoreVP(
20853 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20854 DAG.getConstant(1, DL, MaskVT),
20855 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20856 Store->getMemOperand(), Store->getAddressingMode(),
20857 Store->isTruncatingStore(), /*IsCompress*/ false);
20858 }
20859 }
20860
20861 break;
20862 }
20863 case ISD::SPLAT_VECTOR: {
20864 EVT VT = N->getValueType(0);
20865 // Only perform this combine on legal MVT types.
20866 if (!isTypeLegal(VT))
20867 break;
20868 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20869 DAG, Subtarget))
20870 return Gather;
20871 break;
20872 }
20873 case ISD::BUILD_VECTOR:
20874 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20875 return V;
20876 break;
20878 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20879 return V;
20880 break;
20882 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20883 return V;
20884 break;
20886 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20887 return V;
20888 break;
20889 case RISCVISD::VFMV_V_F_VL: {
20890 const MVT VT = N->getSimpleValueType(0);
20891 SDValue Passthru = N->getOperand(0);
20892 SDValue Scalar = N->getOperand(1);
20893 SDValue VL = N->getOperand(2);
20894
20895 // If VL is 1, we can use vfmv.s.f.
20896 if (isOneConstant(VL))
20897 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20898 break;
20899 }
20900 case RISCVISD::VMV_V_X_VL: {
20901 const MVT VT = N->getSimpleValueType(0);
20902 SDValue Passthru = N->getOperand(0);
20903 SDValue Scalar = N->getOperand(1);
20904 SDValue VL = N->getOperand(2);
20905
20906 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20907 // scalar input.
20908 unsigned ScalarSize = Scalar.getValueSizeInBits();
20909 unsigned EltWidth = VT.getScalarSizeInBits();
20910 if (ScalarSize > EltWidth && Passthru.isUndef())
20911 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20912 return SDValue(N, 0);
20913
20914 // If VL is 1 and the scalar value won't benefit from immediate, we can
20915 // use vmv.s.x.
20917 if (isOneConstant(VL) &&
20918 (!Const || Const->isZero() ||
20919 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20920 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20921
20922 break;
20923 }
20924 case RISCVISD::VFMV_S_F_VL: {
20925 SDValue Src = N->getOperand(1);
20926 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20927 // into an undef vector.
20928 // TODO: Could use a vslide or vmv.v.v for non-undef.
20929 if (N->getOperand(0).isUndef() &&
20930 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20931 isNullConstant(Src.getOperand(1)) &&
20932 Src.getOperand(0).getValueType().isScalableVector()) {
20933 EVT VT = N->getValueType(0);
20934 SDValue EVSrc = Src.getOperand(0);
20935 EVT EVSrcVT = EVSrc.getValueType();
20937 // Widths match, just return the original vector.
20938 if (EVSrcVT == VT)
20939 return EVSrc;
20940 SDLoc DL(N);
20941 // Width is narrower, using insert_subvector.
20942 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20943 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20944 EVSrc,
20945 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20946 }
20947 // Width is wider, using extract_subvector.
20948 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20949 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20950 }
20951 [[fallthrough]];
20952 }
20953 case RISCVISD::VMV_S_X_VL: {
20954 const MVT VT = N->getSimpleValueType(0);
20955 SDValue Passthru = N->getOperand(0);
20956 SDValue Scalar = N->getOperand(1);
20957 SDValue VL = N->getOperand(2);
20958
20959 // The vmv.s.x instruction copies the scalar integer register to element 0
20960 // of the destination vector register. If SEW < XLEN, the least-significant
20961 // bits are copied and the upper XLEN-SEW bits are ignored.
20962 unsigned ScalarSize = Scalar.getValueSizeInBits();
20963 unsigned EltWidth = VT.getScalarSizeInBits();
20964 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
20965 return SDValue(N, 0);
20966
20967 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20968 Scalar.getOperand(0).getValueType() == N->getValueType(0))
20969 return Scalar.getOperand(0);
20970
20971 // Use M1 or smaller to avoid over constraining register allocation
20972 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20973 if (M1VT.bitsLT(VT)) {
20974 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
20975 SDValue Result =
20976 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
20977 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
20978 return Result;
20979 }
20980
20981 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
20982 // higher would involve overly constraining the register allocator for
20983 // no purpose.
20984 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20985 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
20986 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
20987 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
20988
20989 break;
20990 }
20991 case RISCVISD::VMV_X_S: {
20992 SDValue Vec = N->getOperand(0);
20993 MVT VecVT = N->getOperand(0).getSimpleValueType();
20994 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
20995 if (M1VT.bitsLT(VecVT)) {
20996 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
20997 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
20998 }
20999 break;
21000 }
21004 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21005 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21006 switch (IntNo) {
21007 // By default we do not combine any intrinsic.
21008 default:
21009 return SDValue();
21010 case Intrinsic::riscv_vcpop:
21011 case Intrinsic::riscv_vcpop_mask:
21012 case Intrinsic::riscv_vfirst:
21013 case Intrinsic::riscv_vfirst_mask: {
21014 SDValue VL = N->getOperand(2);
21015 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21016 IntNo == Intrinsic::riscv_vfirst_mask)
21017 VL = N->getOperand(3);
21018 if (!isNullConstant(VL))
21019 return SDValue();
21020 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21021 SDLoc DL(N);
21022 EVT VT = N->getValueType(0);
21023 if (IntNo == Intrinsic::riscv_vfirst ||
21024 IntNo == Intrinsic::riscv_vfirst_mask)
21025 return DAG.getAllOnesConstant(DL, VT);
21026 return DAG.getConstant(0, DL, VT);
21027 }
21028 case Intrinsic::riscv_vsseg2_mask:
21029 case Intrinsic::riscv_vsseg3_mask:
21030 case Intrinsic::riscv_vsseg4_mask:
21031 case Intrinsic::riscv_vsseg5_mask:
21032 case Intrinsic::riscv_vsseg6_mask:
21033 case Intrinsic::riscv_vsseg7_mask:
21034 case Intrinsic::riscv_vsseg8_mask: {
21035 SDValue Tuple = N->getOperand(2);
21036 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21037
21038 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21039 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21040 !Tuple.getOperand(0).isUndef())
21041 return SDValue();
21042
21043 SDValue Val = Tuple.getOperand(1);
21044 unsigned Idx = Tuple.getConstantOperandVal(2);
21045
21046 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21047 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21048 "Type mismatch without bitcast?");
21049 unsigned Stride = SEW / 8 * NF;
21050 unsigned Offset = SEW / 8 * Idx;
21051
21052 SDValue Ops[] = {
21053 /*Chain=*/N->getOperand(0),
21054 /*IntID=*/
21055 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21056 /*StoredVal=*/Val,
21057 /*Ptr=*/
21058 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21059 DAG.getConstant(Offset, DL, XLenVT)),
21060 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21061 /*Mask=*/N->getOperand(4),
21062 /*VL=*/N->getOperand(5)};
21063
21064 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21065 // Match getTgtMemIntrinsic for non-unit stride case
21066 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21069 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21070
21071 SDVTList VTs = DAG.getVTList(MVT::Other);
21072 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21073 MMO);
21074 }
21075 }
21076 }
21077 case ISD::EXPERIMENTAL_VP_REVERSE:
21078 return performVP_REVERSECombine(N, DAG, Subtarget);
21079 case ISD::VP_STORE:
21080 return performVP_STORECombine(N, DAG, Subtarget);
21081 case ISD::BITCAST: {
21082 assert(Subtarget.useRVVForFixedLengthVectors());
21083 SDValue N0 = N->getOperand(0);
21084 EVT VT = N->getValueType(0);
21085 EVT SrcVT = N0.getValueType();
21086 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21087 unsigned NF = VT.getRISCVVectorTupleNumFields();
21088 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21089 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21090 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21091
21092 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21093
21094 SDValue Result = DAG.getUNDEF(VT);
21095 for (unsigned i = 0; i < NF; ++i)
21096 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21097 DAG.getTargetConstant(i, DL, MVT::i32));
21098 return Result;
21099 }
21100 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21101 // type, widen both sides to avoid a trip through memory.
21102 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21103 VT.isScalarInteger()) {
21104 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21105 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21106 Ops[0] = N0;
21107 SDLoc DL(N);
21108 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21109 N0 = DAG.getBitcast(MVT::i8, N0);
21110 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21111 }
21112
21113 return SDValue();
21114 }
21115 case ISD::VECREDUCE_ADD:
21116 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21117 return V;
21118 [[fallthrough]];
21119 case ISD::CTPOP:
21120 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21121 return V;
21122 break;
21123 case RISCVISD::VRGATHER_VX_VL: {
21124 // Note this assumes that out of bounds indices produce poison
21125 // and can thus be replaced without having to prove them inbounds..
21126 EVT VT = N->getValueType(0);
21127 SDValue Src = N->getOperand(0);
21128 SDValue Idx = N->getOperand(1);
21129 SDValue Passthru = N->getOperand(2);
21130 SDValue VL = N->getOperand(4);
21131
21132 // Warning: Unlike most cases we strip an insert_subvector, this one
21133 // does not require the first operand to be undef.
21134 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21135 isNullConstant(Src.getOperand(2)))
21136 Src = Src.getOperand(1);
21137
21138 switch (Src.getOpcode()) {
21139 default:
21140 break;
21141 case RISCVISD::VMV_V_X_VL:
21142 case RISCVISD::VFMV_V_F_VL:
21143 // Drop a redundant vrgather_vx.
21144 // TODO: Remove the type restriction if we find a motivating
21145 // test case?
21146 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21147 Src.getValueType() == VT)
21148 return Src;
21149 break;
21150 case RISCVISD::VMV_S_X_VL:
21151 case RISCVISD::VFMV_S_F_VL:
21152 // If this use only demands lane zero from the source vmv.s.x, and
21153 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21154 // a vmv.v.x. Note that there can be other uses of the original
21155 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21156 if (isNullConstant(Idx) && Passthru.isUndef() &&
21157 VL == Src.getOperand(2)) {
21158 unsigned Opc =
21159 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21160 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21161 VL);
21162 }
21163 break;
21164 }
21165 break;
21166 }
21167 case RISCVISD::TUPLE_EXTRACT: {
21168 EVT VT = N->getValueType(0);
21169 SDValue Tuple = N->getOperand(0);
21170 unsigned Idx = N->getConstantOperandVal(1);
21171 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21172 break;
21173
21174 unsigned NF = 0;
21175 switch (Tuple.getConstantOperandVal(1)) {
21176 default:
21177 break;
21178 case Intrinsic::riscv_vlseg2_mask:
21179 case Intrinsic::riscv_vlseg3_mask:
21180 case Intrinsic::riscv_vlseg4_mask:
21181 case Intrinsic::riscv_vlseg5_mask:
21182 case Intrinsic::riscv_vlseg6_mask:
21183 case Intrinsic::riscv_vlseg7_mask:
21184 case Intrinsic::riscv_vlseg8_mask:
21185 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21186 break;
21187 }
21188
21189 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21190 break;
21191
21192 unsigned SEW = VT.getScalarSizeInBits();
21193 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21194 "Type mismatch without bitcast?");
21195 unsigned Stride = SEW / 8 * NF;
21196 unsigned Offset = SEW / 8 * Idx;
21197
21198 SDValue Ops[] = {
21199 /*Chain=*/Tuple.getOperand(0),
21200 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21201 /*Passthru=*/Tuple.getOperand(2),
21202 /*Ptr=*/
21203 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21204 DAG.getConstant(Offset, DL, XLenVT)),
21205 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21206 /*Mask=*/Tuple.getOperand(4),
21207 /*VL=*/Tuple.getOperand(5),
21208 /*Policy=*/Tuple.getOperand(6)};
21209
21210 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21211 // Match getTgtMemIntrinsic for non-unit stride case
21212 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21215 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21216
21217 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21219 Ops, MemVT, MMO);
21220 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21221 return Result.getValue(0);
21222 }
21223 case RISCVISD::TUPLE_INSERT: {
21224 // tuple_insert tuple, undef, idx -> tuple
21225 if (N->getOperand(1).isUndef())
21226 return N->getOperand(0);
21227 break;
21228 }
21229 case RISCVISD::VSLIDE1UP_VL:
21230 case RISCVISD::VFSLIDE1UP_VL: {
21231 using namespace SDPatternMatch;
21232 SDValue SrcVec;
21233 SDLoc DL(N);
21234 MVT VT = N->getSimpleValueType(0);
21235 // If the scalar we're sliding in was extracted from the first element of a
21236 // vector, we can use that vector as the passthru in a normal slideup of 1.
21237 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21238 if (!N->getOperand(0).isUndef() ||
21239 !sd_match(N->getOperand(2),
21240 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21241 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21242 break;
21243
21244 MVT SrcVecVT = SrcVec.getSimpleValueType();
21245 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21246 break;
21247 // Adapt the value type of source vector.
21248 if (SrcVecVT.isFixedLengthVector()) {
21249 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21250 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21251 }
21253 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21254 else
21255 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21256
21257 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21258 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21259 N->getOperand(4));
21260 }
21261 }
21262
21263 return SDValue();
21264}
21265
21267 EVT XVT, unsigned KeptBits) const {
21268 // For vectors, we don't have a preference..
21269 if (XVT.isVector())
21270 return false;
21271
21272 if (XVT != MVT::i32 && XVT != MVT::i64)
21273 return false;
21274
21275 // We can use sext.w for RV64 or an srai 31 on RV32.
21276 if (KeptBits == 32 || KeptBits == 64)
21277 return true;
21278
21279 // With Zbb we can use sext.h/sext.b.
21280 return Subtarget.hasStdExtZbb() &&
21281 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21282 KeptBits == 16);
21283}
21284
21286 const SDNode *N, CombineLevel Level) const {
21287 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21288 N->getOpcode() == ISD::SRL) &&
21289 "Expected shift op");
21290
21291 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21292 // materialised in fewer instructions than `(OP _, c1)`:
21293 //
21294 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21295 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21296 SDValue N0 = N->getOperand(0);
21297 EVT Ty = N0.getValueType();
21298
21299 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21300 // LD/ST, it can still complete the folding optimization operation performed
21301 // above.
21302 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21303 for (SDNode *Use : X->users()) {
21304 // This use is the one we're on right now. Skip it
21305 if (Use == User || Use->getOpcode() == ISD::SELECT)
21306 continue;
21308 return false;
21309 }
21310 return true;
21311 };
21312
21313 if (Ty.isScalarInteger() &&
21314 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21315 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21316 return isUsedByLdSt(N0.getNode(), N);
21317
21318 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21319 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21320
21321 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21322 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21323 N->user_begin()->getOpcode() == ISD::ADD &&
21324 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21325 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21326 return false;
21327
21328 if (C1 && C2) {
21329 const APInt &C1Int = C1->getAPIntValue();
21330 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21331
21332 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21333 // and the combine should happen, to potentially allow further combines
21334 // later.
21335 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21336 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21337 return true;
21338
21339 // We can materialise `c1` in an add immediate, so it's "free", and the
21340 // combine should be prevented.
21341 if (C1Int.getSignificantBits() <= 64 &&
21343 return false;
21344
21345 // Neither constant will fit into an immediate, so find materialisation
21346 // costs.
21347 int C1Cost =
21348 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21349 /*CompressionCost*/ true);
21350 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21351 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21352 /*CompressionCost*/ true);
21353
21354 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21355 // combine should be prevented.
21356 if (C1Cost < ShiftedC1Cost)
21357 return false;
21358 }
21359 }
21360
21361 if (!N0->hasOneUse())
21362 return false;
21363
21364 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21365 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21366 !N0->getOperand(0)->hasOneUse())
21367 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21368
21369 return true;
21370}
21371
21373 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21374 TargetLoweringOpt &TLO) const {
21375 // Delay this optimization as late as possible.
21376 if (!TLO.LegalOps)
21377 return false;
21378
21379 EVT VT = Op.getValueType();
21380 if (VT.isVector())
21381 return false;
21382
21383 unsigned Opcode = Op.getOpcode();
21384 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21385 return false;
21386
21387 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21388 if (!C)
21389 return false;
21390
21391 const APInt &Mask = C->getAPIntValue();
21392
21393 // Clear all non-demanded bits initially.
21394 APInt ShrunkMask = Mask & DemandedBits;
21395
21396 // Try to make a smaller immediate by setting undemanded bits.
21397
21398 APInt ExpandedMask = Mask | ~DemandedBits;
21399
21400 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21401 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21402 };
21403 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21404 if (NewMask == Mask)
21405 return true;
21406 SDLoc DL(Op);
21407 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21408 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21409 Op.getOperand(0), NewC);
21410 return TLO.CombineTo(Op, NewOp);
21411 };
21412
21413 // If the shrunk mask fits in sign extended 12 bits, let the target
21414 // independent code apply it.
21415 if (ShrunkMask.isSignedIntN(12))
21416 return false;
21417
21418 // And has a few special cases for zext.
21419 if (Opcode == ISD::AND) {
21420 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21421 // otherwise use SLLI + SRLI.
21422 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21423 if (IsLegalMask(NewMask))
21424 return UseMask(NewMask);
21425
21426 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21427 if (VT == MVT::i64) {
21428 APInt NewMask = APInt(64, 0xffffffff);
21429 if (IsLegalMask(NewMask))
21430 return UseMask(NewMask);
21431 }
21432 }
21433
21434 // For the remaining optimizations, we need to be able to make a negative
21435 // number through a combination of mask and undemanded bits.
21436 if (!ExpandedMask.isNegative())
21437 return false;
21438
21439 // What is the fewest number of bits we need to represent the negative number.
21440 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21441
21442 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21443 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21444 // If we can't create a simm12, we shouldn't change opaque constants.
21445 APInt NewMask = ShrunkMask;
21446 if (MinSignedBits <= 12)
21447 NewMask.setBitsFrom(11);
21448 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21449 NewMask.setBitsFrom(31);
21450 else
21451 return false;
21452
21453 // Check that our new mask is a subset of the demanded mask.
21454 assert(IsLegalMask(NewMask));
21455 return UseMask(NewMask);
21456}
21457
21458static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21459 static const uint64_t GREVMasks[] = {
21460 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21461 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21462
21463 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21464 unsigned Shift = 1 << Stage;
21465 if (ShAmt & Shift) {
21466 uint64_t Mask = GREVMasks[Stage];
21467 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21468 if (IsGORC)
21469 Res |= x;
21470 x = Res;
21471 }
21472 }
21473
21474 return x;
21475}
21476
21478 KnownBits &Known,
21479 const APInt &DemandedElts,
21480 const SelectionDAG &DAG,
21481 unsigned Depth) const {
21482 unsigned BitWidth = Known.getBitWidth();
21483 unsigned Opc = Op.getOpcode();
21488 "Should use MaskedValueIsZero if you don't know whether Op"
21489 " is a target node!");
21490
21491 Known.resetAll();
21492 switch (Opc) {
21493 default: break;
21494 case RISCVISD::SELECT_CC: {
21495 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21496 // If we don't know any bits, early out.
21497 if (Known.isUnknown())
21498 break;
21499 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21500
21501 // Only known if known in both the LHS and RHS.
21502 Known = Known.intersectWith(Known2);
21503 break;
21504 }
21505 case RISCVISD::VCPOP_VL: {
21506 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21507 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21508 break;
21509 }
21510 case RISCVISD::CZERO_EQZ:
21511 case RISCVISD::CZERO_NEZ:
21512 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21513 // Result is either all zero or operand 0. We can propagate zeros, but not
21514 // ones.
21515 Known.One.clearAllBits();
21516 break;
21517 case RISCVISD::REMUW: {
21518 KnownBits Known2;
21519 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21520 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21521 // We only care about the lower 32 bits.
21522 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21523 // Restore the original width by sign extending.
21524 Known = Known.sext(BitWidth);
21525 break;
21526 }
21527 case RISCVISD::DIVUW: {
21528 KnownBits Known2;
21529 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21530 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21531 // We only care about the lower 32 bits.
21532 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21533 // Restore the original width by sign extending.
21534 Known = Known.sext(BitWidth);
21535 break;
21536 }
21537 case RISCVISD::SLLW: {
21538 KnownBits Known2;
21539 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21540 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21541 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21542 // Restore the original width by sign extending.
21543 Known = Known.sext(BitWidth);
21544 break;
21545 }
21546 case RISCVISD::SRLW: {
21547 KnownBits Known2;
21548 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21549 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21550 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21551 // Restore the original width by sign extending.
21552 Known = Known.sext(BitWidth);
21553 break;
21554 }
21555 case RISCVISD::SRAW: {
21556 KnownBits Known2;
21557 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21558 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21559 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21560 // Restore the original width by sign extending.
21561 Known = Known.sext(BitWidth);
21562 break;
21563 }
21564 case RISCVISD::CTZW: {
21565 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21566 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21567 unsigned LowBits = llvm::bit_width(PossibleTZ);
21568 Known.Zero.setBitsFrom(LowBits);
21569 break;
21570 }
21571 case RISCVISD::CLZW: {
21572 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21573 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21574 unsigned LowBits = llvm::bit_width(PossibleLZ);
21575 Known.Zero.setBitsFrom(LowBits);
21576 break;
21577 }
21578 case RISCVISD::BREV8:
21579 case RISCVISD::ORC_B: {
21580 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21581 // control value of 7 is equivalent to brev8 and orc.b.
21582 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21583 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21584 // To compute zeros for ORC_B, we need to invert the value and invert it
21585 // back after. This inverting is harmless for BREV8.
21586 Known.Zero =
21587 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21588 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21589 break;
21590 }
21591 case RISCVISD::READ_VLENB: {
21592 // We can use the minimum and maximum VLEN values to bound VLENB. We
21593 // know VLEN must be a power of two.
21594 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21595 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21596 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21597 Known.Zero.setLowBits(Log2_32(MinVLenB));
21598 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21599 if (MaxVLenB == MinVLenB)
21600 Known.One.setBit(Log2_32(MinVLenB));
21601 break;
21602 }
21603 case RISCVISD::FCLASS: {
21604 // fclass will only set one of the low 10 bits.
21605 Known.Zero.setBitsFrom(10);
21606 break;
21607 }
21610 unsigned IntNo =
21611 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21612 switch (IntNo) {
21613 default:
21614 // We can't do anything for most intrinsics.
21615 break;
21616 case Intrinsic::riscv_vsetvli:
21617 case Intrinsic::riscv_vsetvlimax: {
21618 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21619 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21620 RISCVVType::VLMUL VLMUL =
21621 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21622 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21623 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21624 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21625 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21626
21627 // Result of vsetvli must be not larger than AVL.
21628 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21629 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21630
21631 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21632 if (BitWidth > KnownZeroFirstBit)
21633 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21634 break;
21635 }
21636 }
21637 break;
21638 }
21639 }
21640}
21641
21643 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21644 unsigned Depth) const {
21645 switch (Op.getOpcode()) {
21646 default:
21647 break;
21648 case RISCVISD::SELECT_CC: {
21649 unsigned Tmp =
21650 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21651 if (Tmp == 1) return 1; // Early out.
21652 unsigned Tmp2 =
21653 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21654 return std::min(Tmp, Tmp2);
21655 }
21656 case RISCVISD::CZERO_EQZ:
21657 case RISCVISD::CZERO_NEZ:
21658 // Output is either all zero or operand 0. We can propagate sign bit count
21659 // from operand 0.
21660 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21661 case RISCVISD::ABSW: {
21662 // We expand this at isel to negw+max. The result will have 33 sign bits
21663 // if the input has at least 33 sign bits.
21664 unsigned Tmp =
21665 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21666 if (Tmp < 33) return 1;
21667 return 33;
21668 }
21669 case RISCVISD::SRAW: {
21670 unsigned Tmp =
21671 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21672 // sraw produces at least 33 sign bits. If the input already has more than
21673 // 33 sign bits sraw, will preserve them.
21674 // TODO: A more precise answer could be calculated depending on known bits
21675 // in the shift amount.
21676 return std::max(Tmp, 33U);
21677 }
21678 case RISCVISD::SLLW:
21679 case RISCVISD::SRLW:
21680 case RISCVISD::DIVW:
21681 case RISCVISD::DIVUW:
21682 case RISCVISD::REMUW:
21683 case RISCVISD::ROLW:
21684 case RISCVISD::RORW:
21685 case RISCVISD::FCVT_W_RV64:
21686 case RISCVISD::FCVT_WU_RV64:
21687 case RISCVISD::STRICT_FCVT_W_RV64:
21688 case RISCVISD::STRICT_FCVT_WU_RV64:
21689 // TODO: As the result is sign-extended, this is conservatively correct.
21690 return 33;
21691 case RISCVISD::VMV_X_S: {
21692 // The number of sign bits of the scalar result is computed by obtaining the
21693 // element type of the input vector operand, subtracting its width from the
21694 // XLEN, and then adding one (sign bit within the element type). If the
21695 // element type is wider than XLen, the least-significant XLEN bits are
21696 // taken.
21697 unsigned XLen = Subtarget.getXLen();
21698 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21699 if (EltBits <= XLen)
21700 return XLen - EltBits + 1;
21701 break;
21702 }
21704 unsigned IntNo = Op.getConstantOperandVal(1);
21705 switch (IntNo) {
21706 default:
21707 break;
21708 case Intrinsic::riscv_masked_atomicrmw_xchg:
21709 case Intrinsic::riscv_masked_atomicrmw_add:
21710 case Intrinsic::riscv_masked_atomicrmw_sub:
21711 case Intrinsic::riscv_masked_atomicrmw_nand:
21712 case Intrinsic::riscv_masked_atomicrmw_max:
21713 case Intrinsic::riscv_masked_atomicrmw_min:
21714 case Intrinsic::riscv_masked_atomicrmw_umax:
21715 case Intrinsic::riscv_masked_atomicrmw_umin:
21716 case Intrinsic::riscv_masked_cmpxchg:
21717 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21718 // narrow atomic operation. These are implemented using atomic
21719 // operations at the minimum supported atomicrmw/cmpxchg width whose
21720 // result is then sign extended to XLEN. With +A, the minimum width is
21721 // 32 for both 64 and 32.
21723 assert(Subtarget.hasStdExtA());
21724 return Op.getValueSizeInBits() - 31;
21725 }
21726 break;
21727 }
21728 }
21729
21730 return 1;
21731}
21732
21734 SDValue Op, const APInt &OriginalDemandedBits,
21735 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21736 unsigned Depth) const {
21737 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21738
21739 switch (Op.getOpcode()) {
21740 case RISCVISD::BREV8:
21741 case RISCVISD::ORC_B: {
21742 KnownBits Known2;
21743 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21744 // For BREV8, we need to do BREV8 on the demanded bits.
21745 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21746 // So we need to do ORC_B on the demanded bits.
21748 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21749 7, IsGORC));
21750 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21751 OriginalDemandedElts, Known2, TLO, Depth + 1))
21752 return true;
21753
21754 // To compute zeros for ORC_B, we need to invert the value and invert it
21755 // back after. This inverting is harmless for BREV8.
21756 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21757 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21758 return false;
21759 }
21760 }
21761
21763 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21764}
21765
21767 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21768 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21769
21770 // TODO: Add more target nodes.
21771 switch (Op.getOpcode()) {
21772 case RISCVISD::SLLW:
21773 case RISCVISD::SRAW:
21774 case RISCVISD::SRLW:
21775 case RISCVISD::RORW:
21776 case RISCVISD::ROLW:
21777 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21778 // amount is bounds.
21779 return false;
21780 case RISCVISD::SELECT_CC:
21781 // Integer comparisons cannot create poison.
21782 assert(Op.getOperand(0).getValueType().isInteger() &&
21783 "RISCVISD::SELECT_CC only compares integers");
21784 return false;
21785 }
21787 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21788}
21789
21790const Constant *
21792 assert(Ld && "Unexpected null LoadSDNode");
21793 if (!ISD::isNormalLoad(Ld))
21794 return nullptr;
21795
21796 SDValue Ptr = Ld->getBasePtr();
21797
21798 // Only constant pools with no offset are supported.
21799 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21800 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21801 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21802 CNode->getOffset() != 0)
21803 return nullptr;
21804
21805 return CNode;
21806 };
21807
21808 // Simple case, LLA.
21809 if (Ptr.getOpcode() == RISCVISD::LLA) {
21810 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21811 if (!CNode || CNode->getTargetFlags() != 0)
21812 return nullptr;
21813
21814 return CNode->getConstVal();
21815 }
21816
21817 // Look for a HI and ADD_LO pair.
21818 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21819 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21820 return nullptr;
21821
21822 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21823 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21824
21825 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21826 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21827 return nullptr;
21828
21829 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21830 return nullptr;
21831
21832 return CNodeLo->getConstVal();
21833}
21834
21836 MachineBasicBlock *BB) {
21837 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21838
21839 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21840 // Should the count have wrapped while it was being read, we need to try
21841 // again.
21842 // For example:
21843 // ```
21844 // read:
21845 // csrrs x3, counterh # load high word of counter
21846 // csrrs x2, counter # load low word of counter
21847 // csrrs x4, counterh # load high word of counter
21848 // bne x3, x4, read # check if high word reads match, otherwise try again
21849 // ```
21850
21851 MachineFunction &MF = *BB->getParent();
21852 const BasicBlock *LLVMBB = BB->getBasicBlock();
21854
21855 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21856 MF.insert(It, LoopMBB);
21857
21858 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21859 MF.insert(It, DoneMBB);
21860
21861 // Transfer the remainder of BB and its successor edges to DoneMBB.
21862 DoneMBB->splice(DoneMBB->begin(), BB,
21863 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21865
21866 BB->addSuccessor(LoopMBB);
21867
21869 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21870 Register LoReg = MI.getOperand(0).getReg();
21871 Register HiReg = MI.getOperand(1).getReg();
21872 int64_t LoCounter = MI.getOperand(2).getImm();
21873 int64_t HiCounter = MI.getOperand(3).getImm();
21874 DebugLoc DL = MI.getDebugLoc();
21875
21877 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21878 .addImm(HiCounter)
21879 .addReg(RISCV::X0);
21880 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21881 .addImm(LoCounter)
21882 .addReg(RISCV::X0);
21883 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21884 .addImm(HiCounter)
21885 .addReg(RISCV::X0);
21886
21887 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21888 .addReg(HiReg)
21889 .addReg(ReadAgainReg)
21890 .addMBB(LoopMBB);
21891
21892 LoopMBB->addSuccessor(LoopMBB);
21893 LoopMBB->addSuccessor(DoneMBB);
21894
21895 MI.eraseFromParent();
21896
21897 return DoneMBB;
21898}
21899
21902 const RISCVSubtarget &Subtarget) {
21903 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21904
21905 MachineFunction &MF = *BB->getParent();
21906 DebugLoc DL = MI.getDebugLoc();
21909 Register LoReg = MI.getOperand(0).getReg();
21910 Register HiReg = MI.getOperand(1).getReg();
21911 Register SrcReg = MI.getOperand(2).getReg();
21912
21913 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21914 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21915
21916 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21917 RI, Register());
21919 MachineMemOperand *MMOLo =
21923 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21924 .addFrameIndex(FI)
21925 .addImm(0)
21926 .addMemOperand(MMOLo);
21927 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21928 .addFrameIndex(FI)
21929 .addImm(4)
21930 .addMemOperand(MMOHi);
21931 MI.eraseFromParent(); // The pseudo instruction is gone now.
21932 return BB;
21933}
21934
21937 const RISCVSubtarget &Subtarget) {
21938 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21939 "Unexpected instruction");
21940
21941 MachineFunction &MF = *BB->getParent();
21942 DebugLoc DL = MI.getDebugLoc();
21945 Register DstReg = MI.getOperand(0).getReg();
21946 Register LoReg = MI.getOperand(1).getReg();
21947 Register HiReg = MI.getOperand(2).getReg();
21948
21949 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21950 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21951
21953 MachineMemOperand *MMOLo =
21957 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21958 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
21959 .addFrameIndex(FI)
21960 .addImm(0)
21961 .addMemOperand(MMOLo);
21962 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21963 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
21964 .addFrameIndex(FI)
21965 .addImm(4)
21966 .addMemOperand(MMOHi);
21967 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
21968 MI.eraseFromParent(); // The pseudo instruction is gone now.
21969 return BB;
21970}
21971
21973 unsigned RelOpcode, unsigned EqOpcode,
21974 const RISCVSubtarget &Subtarget) {
21975 DebugLoc DL = MI.getDebugLoc();
21976 Register DstReg = MI.getOperand(0).getReg();
21977 Register Src1Reg = MI.getOperand(1).getReg();
21978 Register Src2Reg = MI.getOperand(2).getReg();
21980 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
21982
21983 // Save the current FFLAGS.
21984 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
21985
21986 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
21987 .addReg(Src1Reg)
21988 .addReg(Src2Reg);
21991
21992 // Restore the FFLAGS.
21993 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
21994 .addReg(SavedFFlags, RegState::Kill);
21995
21996 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
21997 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
21998 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
21999 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22002
22003 // Erase the pseudoinstruction.
22004 MI.eraseFromParent();
22005 return BB;
22006}
22007
22008static MachineBasicBlock *
22010 MachineBasicBlock *ThisMBB,
22011 const RISCVSubtarget &Subtarget) {
22012 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22013 // Without this, custom-inserter would have generated:
22014 //
22015 // A
22016 // | \
22017 // | B
22018 // | /
22019 // C
22020 // | \
22021 // | D
22022 // | /
22023 // E
22024 //
22025 // A: X = ...; Y = ...
22026 // B: empty
22027 // C: Z = PHI [X, A], [Y, B]
22028 // D: empty
22029 // E: PHI [X, C], [Z, D]
22030 //
22031 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22032 //
22033 // A
22034 // | \
22035 // | C
22036 // | /|
22037 // |/ |
22038 // | |
22039 // | D
22040 // | /
22041 // E
22042 //
22043 // A: X = ...; Y = ...
22044 // D: empty
22045 // E: PHI [X, A], [X, C], [Y, D]
22046
22047 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22048 const DebugLoc &DL = First.getDebugLoc();
22049 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22050 MachineFunction *F = ThisMBB->getParent();
22051 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22052 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22053 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22054 MachineFunction::iterator It = ++ThisMBB->getIterator();
22055 F->insert(It, FirstMBB);
22056 F->insert(It, SecondMBB);
22057 F->insert(It, SinkMBB);
22058
22059 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22060 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22062 ThisMBB->end());
22063 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22064
22065 // Fallthrough block for ThisMBB.
22066 ThisMBB->addSuccessor(FirstMBB);
22067 // Fallthrough block for FirstMBB.
22068 FirstMBB->addSuccessor(SecondMBB);
22069 ThisMBB->addSuccessor(SinkMBB);
22070 FirstMBB->addSuccessor(SinkMBB);
22071 // This is fallthrough.
22072 SecondMBB->addSuccessor(SinkMBB);
22073
22074 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22075 Register FLHS = First.getOperand(1).getReg();
22076 Register FRHS = First.getOperand(2).getReg();
22077 // Insert appropriate branch.
22078 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22079 .addReg(FLHS)
22080 .addReg(FRHS)
22081 .addMBB(SinkMBB);
22082
22083 Register SLHS = Second.getOperand(1).getReg();
22084 Register SRHS = Second.getOperand(2).getReg();
22085 Register Op1Reg4 = First.getOperand(4).getReg();
22086 Register Op1Reg5 = First.getOperand(5).getReg();
22087
22088 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22089 // Insert appropriate branch.
22090 BuildMI(ThisMBB, DL,
22091 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22092 .addReg(SLHS)
22093 .addReg(SRHS)
22094 .addMBB(SinkMBB);
22095
22096 Register DestReg = Second.getOperand(0).getReg();
22097 Register Op2Reg4 = Second.getOperand(4).getReg();
22098 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22099 .addReg(Op2Reg4)
22100 .addMBB(ThisMBB)
22101 .addReg(Op1Reg4)
22102 .addMBB(FirstMBB)
22103 .addReg(Op1Reg5)
22104 .addMBB(SecondMBB);
22105
22106 // Now remove the Select_FPRX_s.
22107 First.eraseFromParent();
22108 Second.eraseFromParent();
22109 return SinkMBB;
22110}
22111
22114 const RISCVSubtarget &Subtarget) {
22115 // To "insert" Select_* instructions, we actually have to insert the triangle
22116 // control-flow pattern. The incoming instructions know the destination vreg
22117 // to set, the condition code register to branch on, the true/false values to
22118 // select between, and the condcode to use to select the appropriate branch.
22119 //
22120 // We produce the following control flow:
22121 // HeadMBB
22122 // | \
22123 // | IfFalseMBB
22124 // | /
22125 // TailMBB
22126 //
22127 // When we find a sequence of selects we attempt to optimize their emission
22128 // by sharing the control flow. Currently we only handle cases where we have
22129 // multiple selects with the exact same condition (same LHS, RHS and CC).
22130 // The selects may be interleaved with other instructions if the other
22131 // instructions meet some requirements we deem safe:
22132 // - They are not pseudo instructions.
22133 // - They are debug instructions. Otherwise,
22134 // - They do not have side-effects, do not access memory and their inputs do
22135 // not depend on the results of the select pseudo-instructions.
22136 // The TrueV/FalseV operands of the selects cannot depend on the result of
22137 // previous selects in the sequence.
22138 // These conditions could be further relaxed. See the X86 target for a
22139 // related approach and more information.
22140 //
22141 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22142 // is checked here and handled by a separate function -
22143 // EmitLoweredCascadedSelect.
22144
22145 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22146 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22147 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22148 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22149 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22150 Next->getOperand(5).isKill())
22151 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22152
22153 Register LHS = MI.getOperand(1).getReg();
22154 Register RHS;
22155 if (MI.getOperand(2).isReg())
22156 RHS = MI.getOperand(2).getReg();
22157 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22158
22159 SmallVector<MachineInstr *, 4> SelectDebugValues;
22160 SmallSet<Register, 4> SelectDests;
22161 SelectDests.insert(MI.getOperand(0).getReg());
22162
22163 MachineInstr *LastSelectPseudo = &MI;
22164 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22165 SequenceMBBI != E; ++SequenceMBBI) {
22166 if (SequenceMBBI->isDebugInstr())
22167 continue;
22168 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22169 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22170 !SequenceMBBI->getOperand(2).isReg() ||
22171 SequenceMBBI->getOperand(2).getReg() != RHS ||
22172 SequenceMBBI->getOperand(3).getImm() != CC ||
22173 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22174 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22175 break;
22176 LastSelectPseudo = &*SequenceMBBI;
22177 SequenceMBBI->collectDebugValues(SelectDebugValues);
22178 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22179 continue;
22180 }
22181 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22182 SequenceMBBI->mayLoadOrStore() ||
22183 SequenceMBBI->usesCustomInsertionHook())
22184 break;
22185 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22186 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22187 }))
22188 break;
22189 }
22190
22191 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22192 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22193 DebugLoc DL = MI.getDebugLoc();
22195
22196 MachineBasicBlock *HeadMBB = BB;
22197 MachineFunction *F = BB->getParent();
22198 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22199 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22200
22201 F->insert(I, IfFalseMBB);
22202 F->insert(I, TailMBB);
22203
22204 // Set the call frame size on entry to the new basic blocks.
22205 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22206 IfFalseMBB->setCallFrameSize(CallFrameSize);
22207 TailMBB->setCallFrameSize(CallFrameSize);
22208
22209 // Transfer debug instructions associated with the selects to TailMBB.
22210 for (MachineInstr *DebugInstr : SelectDebugValues) {
22211 TailMBB->push_back(DebugInstr->removeFromParent());
22212 }
22213
22214 // Move all instructions after the sequence to TailMBB.
22215 TailMBB->splice(TailMBB->end(), HeadMBB,
22216 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22217 // Update machine-CFG edges by transferring all successors of the current
22218 // block to the new block which will contain the Phi nodes for the selects.
22219 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22220 // Set the successors for HeadMBB.
22221 HeadMBB->addSuccessor(IfFalseMBB);
22222 HeadMBB->addSuccessor(TailMBB);
22223
22224 // Insert appropriate branch.
22225 if (MI.getOperand(2).isImm())
22226 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22227 .addReg(LHS)
22228 .addImm(MI.getOperand(2).getImm())
22229 .addMBB(TailMBB);
22230 else
22231 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22232 .addReg(LHS)
22233 .addReg(RHS)
22234 .addMBB(TailMBB);
22235
22236 // IfFalseMBB just falls through to TailMBB.
22237 IfFalseMBB->addSuccessor(TailMBB);
22238
22239 // Create PHIs for all of the select pseudo-instructions.
22240 auto SelectMBBI = MI.getIterator();
22241 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22242 auto InsertionPoint = TailMBB->begin();
22243 while (SelectMBBI != SelectEnd) {
22244 auto Next = std::next(SelectMBBI);
22245 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22246 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22247 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22248 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22249 .addReg(SelectMBBI->getOperand(4).getReg())
22250 .addMBB(HeadMBB)
22251 .addReg(SelectMBBI->getOperand(5).getReg())
22252 .addMBB(IfFalseMBB);
22253 SelectMBBI->eraseFromParent();
22254 }
22255 SelectMBBI = Next;
22256 }
22257
22258 F->getProperties().resetNoPHIs();
22259 return TailMBB;
22260}
22261
22262// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22263static const RISCV::RISCVMaskedPseudoInfo *
22264lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22266 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22267 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22269 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22270 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22271 return Masked;
22272}
22273
22276 unsigned CVTXOpc) {
22277 DebugLoc DL = MI.getDebugLoc();
22278
22280
22282 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22283
22284 // Save the old value of FFLAGS.
22285 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22286
22287 assert(MI.getNumOperands() == 7);
22288
22289 // Emit a VFCVT_X_F
22290 const TargetRegisterInfo *TRI =
22292 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22293 Register Tmp = MRI.createVirtualRegister(RC);
22294 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22295 .add(MI.getOperand(1))
22296 .add(MI.getOperand(2))
22297 .add(MI.getOperand(3))
22298 .add(MachineOperand::CreateImm(7)) // frm = DYN
22299 .add(MI.getOperand(4))
22300 .add(MI.getOperand(5))
22301 .add(MI.getOperand(6))
22302 .add(MachineOperand::CreateReg(RISCV::FRM,
22303 /*IsDef*/ false,
22304 /*IsImp*/ true));
22305
22306 // Emit a VFCVT_F_X
22307 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22308 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22309 // There is no E8 variant for VFCVT_F_X.
22310 assert(Log2SEW >= 4);
22311 unsigned CVTFOpc =
22312 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22313 ->MaskedPseudo;
22314
22315 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22316 .add(MI.getOperand(0))
22317 .add(MI.getOperand(1))
22318 .addReg(Tmp)
22319 .add(MI.getOperand(3))
22320 .add(MachineOperand::CreateImm(7)) // frm = DYN
22321 .add(MI.getOperand(4))
22322 .add(MI.getOperand(5))
22323 .add(MI.getOperand(6))
22324 .add(MachineOperand::CreateReg(RISCV::FRM,
22325 /*IsDef*/ false,
22326 /*IsImp*/ true));
22327
22328 // Restore FFLAGS.
22329 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22330 .addReg(SavedFFLAGS, RegState::Kill);
22331
22332 // Erase the pseudoinstruction.
22333 MI.eraseFromParent();
22334 return BB;
22335}
22336
22338 const RISCVSubtarget &Subtarget) {
22339 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22340 const TargetRegisterClass *RC;
22341 switch (MI.getOpcode()) {
22342 default:
22343 llvm_unreachable("Unexpected opcode");
22344 case RISCV::PseudoFROUND_H:
22345 CmpOpc = RISCV::FLT_H;
22346 F2IOpc = RISCV::FCVT_W_H;
22347 I2FOpc = RISCV::FCVT_H_W;
22348 FSGNJOpc = RISCV::FSGNJ_H;
22349 FSGNJXOpc = RISCV::FSGNJX_H;
22350 RC = &RISCV::FPR16RegClass;
22351 break;
22352 case RISCV::PseudoFROUND_H_INX:
22353 CmpOpc = RISCV::FLT_H_INX;
22354 F2IOpc = RISCV::FCVT_W_H_INX;
22355 I2FOpc = RISCV::FCVT_H_W_INX;
22356 FSGNJOpc = RISCV::FSGNJ_H_INX;
22357 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22358 RC = &RISCV::GPRF16RegClass;
22359 break;
22360 case RISCV::PseudoFROUND_S:
22361 CmpOpc = RISCV::FLT_S;
22362 F2IOpc = RISCV::FCVT_W_S;
22363 I2FOpc = RISCV::FCVT_S_W;
22364 FSGNJOpc = RISCV::FSGNJ_S;
22365 FSGNJXOpc = RISCV::FSGNJX_S;
22366 RC = &RISCV::FPR32RegClass;
22367 break;
22368 case RISCV::PseudoFROUND_S_INX:
22369 CmpOpc = RISCV::FLT_S_INX;
22370 F2IOpc = RISCV::FCVT_W_S_INX;
22371 I2FOpc = RISCV::FCVT_S_W_INX;
22372 FSGNJOpc = RISCV::FSGNJ_S_INX;
22373 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22374 RC = &RISCV::GPRF32RegClass;
22375 break;
22376 case RISCV::PseudoFROUND_D:
22377 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22378 CmpOpc = RISCV::FLT_D;
22379 F2IOpc = RISCV::FCVT_L_D;
22380 I2FOpc = RISCV::FCVT_D_L;
22381 FSGNJOpc = RISCV::FSGNJ_D;
22382 FSGNJXOpc = RISCV::FSGNJX_D;
22383 RC = &RISCV::FPR64RegClass;
22384 break;
22385 case RISCV::PseudoFROUND_D_INX:
22386 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22387 CmpOpc = RISCV::FLT_D_INX;
22388 F2IOpc = RISCV::FCVT_L_D_INX;
22389 I2FOpc = RISCV::FCVT_D_L_INX;
22390 FSGNJOpc = RISCV::FSGNJ_D_INX;
22391 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22392 RC = &RISCV::GPRRegClass;
22393 break;
22394 }
22395
22396 const BasicBlock *BB = MBB->getBasicBlock();
22397 DebugLoc DL = MI.getDebugLoc();
22398 MachineFunction::iterator I = ++MBB->getIterator();
22399
22400 MachineFunction *F = MBB->getParent();
22401 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22402 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22403
22404 F->insert(I, CvtMBB);
22405 F->insert(I, DoneMBB);
22406 // Move all instructions after the sequence to DoneMBB.
22407 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22408 MBB->end());
22409 // Update machine-CFG edges by transferring all successors of the current
22410 // block to the new block which will contain the Phi nodes for the selects.
22412 // Set the successors for MBB.
22413 MBB->addSuccessor(CvtMBB);
22414 MBB->addSuccessor(DoneMBB);
22415
22416 Register DstReg = MI.getOperand(0).getReg();
22417 Register SrcReg = MI.getOperand(1).getReg();
22418 Register MaxReg = MI.getOperand(2).getReg();
22419 int64_t FRM = MI.getOperand(3).getImm();
22420
22421 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22422 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22423
22424 Register FabsReg = MRI.createVirtualRegister(RC);
22425 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22426
22427 // Compare the FP value to the max value.
22428 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22429 auto MIB =
22430 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22433
22434 // Insert branch.
22435 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22436 .addReg(CmpReg)
22437 .addReg(RISCV::X0)
22438 .addMBB(DoneMBB);
22439
22440 CvtMBB->addSuccessor(DoneMBB);
22441
22442 // Convert to integer.
22443 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22444 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22447
22448 // Convert back to FP.
22449 Register I2FReg = MRI.createVirtualRegister(RC);
22450 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22453
22454 // Restore the sign bit.
22455 Register CvtReg = MRI.createVirtualRegister(RC);
22456 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22457
22458 // Merge the results.
22459 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22460 .addReg(SrcReg)
22461 .addMBB(MBB)
22462 .addReg(CvtReg)
22463 .addMBB(CvtMBB);
22464
22465 MI.eraseFromParent();
22466 return DoneMBB;
22467}
22468
22471 MachineBasicBlock *BB) const {
22472 switch (MI.getOpcode()) {
22473 default:
22474 llvm_unreachable("Unexpected instr type to insert");
22475 case RISCV::ReadCounterWide:
22476 assert(!Subtarget.is64Bit() &&
22477 "ReadCounterWide is only to be used on riscv32");
22478 return emitReadCounterWidePseudo(MI, BB);
22479 case RISCV::Select_GPR_Using_CC_GPR:
22480 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22481 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22482 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22483 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22484 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22485 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22486 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22487 case RISCV::Select_FPR16_Using_CC_GPR:
22488 case RISCV::Select_FPR16INX_Using_CC_GPR:
22489 case RISCV::Select_FPR32_Using_CC_GPR:
22490 case RISCV::Select_FPR32INX_Using_CC_GPR:
22491 case RISCV::Select_FPR64_Using_CC_GPR:
22492 case RISCV::Select_FPR64INX_Using_CC_GPR:
22493 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22494 return emitSelectPseudo(MI, BB, Subtarget);
22495 case RISCV::BuildPairF64Pseudo:
22496 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22497 case RISCV::SplitF64Pseudo:
22498 return emitSplitF64Pseudo(MI, BB, Subtarget);
22499 case RISCV::PseudoQuietFLE_H:
22500 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22501 case RISCV::PseudoQuietFLE_H_INX:
22502 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22503 case RISCV::PseudoQuietFLT_H:
22504 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22505 case RISCV::PseudoQuietFLT_H_INX:
22506 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22507 case RISCV::PseudoQuietFLE_S:
22508 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22509 case RISCV::PseudoQuietFLE_S_INX:
22510 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22511 case RISCV::PseudoQuietFLT_S:
22512 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22513 case RISCV::PseudoQuietFLT_S_INX:
22514 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22515 case RISCV::PseudoQuietFLE_D:
22516 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22517 case RISCV::PseudoQuietFLE_D_INX:
22518 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22519 case RISCV::PseudoQuietFLE_D_IN32X:
22520 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22521 Subtarget);
22522 case RISCV::PseudoQuietFLT_D:
22523 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22524 case RISCV::PseudoQuietFLT_D_INX:
22525 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22526 case RISCV::PseudoQuietFLT_D_IN32X:
22527 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22528 Subtarget);
22529
22530 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22531 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22532 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22533 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22534 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22535 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22536 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22537 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22538 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22539 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22540 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22541 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22542 case RISCV::PseudoFROUND_H:
22543 case RISCV::PseudoFROUND_H_INX:
22544 case RISCV::PseudoFROUND_S:
22545 case RISCV::PseudoFROUND_S_INX:
22546 case RISCV::PseudoFROUND_D:
22547 case RISCV::PseudoFROUND_D_INX:
22548 case RISCV::PseudoFROUND_D_IN32X:
22549 return emitFROUND(MI, BB, Subtarget);
22550 case RISCV::PROBED_STACKALLOC_DYN:
22551 return emitDynamicProbedAlloc(MI, BB);
22552 case TargetOpcode::STATEPOINT:
22553 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22554 // while jal call instruction (where statepoint will be lowered at the end)
22555 // has implicit def. This def is early-clobber as it will be set at
22556 // the moment of the call and earlier than any use is read.
22557 // Add this implicit dead def here as a workaround.
22558 MI.addOperand(*MI.getMF(),
22560 RISCV::X1, /*isDef*/ true,
22561 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22562 /*isUndef*/ false, /*isEarlyClobber*/ true));
22563 [[fallthrough]];
22564 case TargetOpcode::STACKMAP:
22565 case TargetOpcode::PATCHPOINT:
22566 if (!Subtarget.is64Bit())
22567 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22568 "supported on 64-bit targets");
22569 return emitPatchPoint(MI, BB);
22570 }
22571}
22572
22574 SDNode *Node) const {
22575 // If instruction defines FRM operand, conservatively set it as non-dead to
22576 // express data dependency with FRM users and prevent incorrect instruction
22577 // reordering.
22578 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22579 FRMDef->setIsDead(false);
22580 return;
22581 }
22582 // Add FRM dependency to any instructions with dynamic rounding mode.
22583 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22584 if (Idx < 0) {
22585 // Vector pseudos have FRM index indicated by TSFlags.
22586 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22587 if (Idx < 0)
22588 return;
22589 }
22590 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22591 return;
22592 // If the instruction already reads FRM, don't add another read.
22593 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22594 return;
22595 MI.addOperand(
22596 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22597}
22598
22599void RISCVTargetLowering::analyzeInputArgs(
22600 MachineFunction &MF, CCState &CCInfo,
22601 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22602 RISCVCCAssignFn Fn) const {
22603 for (const auto &[Idx, In] : enumerate(Ins)) {
22604 MVT ArgVT = In.VT;
22605 ISD::ArgFlagsTy ArgFlags = In.Flags;
22606
22607 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22608 In.OrigTy)) {
22609 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22610 << ArgVT << '\n');
22611 llvm_unreachable(nullptr);
22612 }
22613 }
22614}
22615
22616void RISCVTargetLowering::analyzeOutputArgs(
22617 MachineFunction &MF, CCState &CCInfo,
22618 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22619 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22620 for (const auto &[Idx, Out] : enumerate(Outs)) {
22621 MVT ArgVT = Out.VT;
22622 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22623
22624 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22625 Out.OrigTy)) {
22626 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22627 << ArgVT << "\n");
22628 llvm_unreachable(nullptr);
22629 }
22630 }
22631}
22632
22633// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22634// values.
22636 const CCValAssign &VA, const SDLoc &DL,
22637 const RISCVSubtarget &Subtarget) {
22638 if (VA.needsCustom()) {
22639 if (VA.getLocVT().isInteger() &&
22640 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22641 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22642 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22643 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22645 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22646 llvm_unreachable("Unexpected Custom handling.");
22647 }
22648
22649 switch (VA.getLocInfo()) {
22650 default:
22651 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22652 case CCValAssign::Full:
22653 break;
22654 case CCValAssign::BCvt:
22655 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22656 break;
22657 }
22658 return Val;
22659}
22660
22661// The caller is responsible for loading the full value if the argument is
22662// passed with CCValAssign::Indirect.
22664 const CCValAssign &VA, const SDLoc &DL,
22665 const ISD::InputArg &In,
22666 const RISCVTargetLowering &TLI) {
22669 EVT LocVT = VA.getLocVT();
22670 SDValue Val;
22671 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22672 Register VReg = RegInfo.createVirtualRegister(RC);
22673 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22674 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22675
22676 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22677 if (In.isOrigArg()) {
22678 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22679 if (OrigArg->getType()->isIntegerTy()) {
22680 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22681 // An input zero extended from i31 can also be considered sign extended.
22682 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22683 (BitWidth < 32 && In.Flags.isZExt())) {
22685 RVFI->addSExt32Register(VReg);
22686 }
22687 }
22688 }
22689
22691 return Val;
22692
22693 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22694}
22695
22697 const CCValAssign &VA, const SDLoc &DL,
22698 const RISCVSubtarget &Subtarget) {
22699 EVT LocVT = VA.getLocVT();
22700
22701 if (VA.needsCustom()) {
22702 if (LocVT.isInteger() &&
22703 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22704 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22705 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22706 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22707 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22708 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22709 llvm_unreachable("Unexpected Custom handling.");
22710 }
22711
22712 switch (VA.getLocInfo()) {
22713 default:
22714 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22715 case CCValAssign::Full:
22716 break;
22717 case CCValAssign::BCvt:
22718 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22719 break;
22720 }
22721 return Val;
22722}
22723
22724// The caller is responsible for loading the full value if the argument is
22725// passed with CCValAssign::Indirect.
22727 const CCValAssign &VA, const SDLoc &DL) {
22729 MachineFrameInfo &MFI = MF.getFrameInfo();
22730 EVT LocVT = VA.getLocVT();
22731 EVT ValVT = VA.getValVT();
22733 if (VA.getLocInfo() == CCValAssign::Indirect) {
22734 // When the value is a scalable vector, we save the pointer which points to
22735 // the scalable vector value in the stack. The ValVT will be the pointer
22736 // type, instead of the scalable vector type.
22737 ValVT = LocVT;
22738 }
22739 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22740 /*IsImmutable=*/true);
22741 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22742 SDValue Val;
22743
22745 switch (VA.getLocInfo()) {
22746 default:
22747 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22748 case CCValAssign::Full:
22750 case CCValAssign::BCvt:
22751 break;
22752 }
22753 Val = DAG.getExtLoad(
22754 ExtType, DL, LocVT, Chain, FIN,
22756 return Val;
22757}
22758
22760 const CCValAssign &VA,
22761 const CCValAssign &HiVA,
22762 const SDLoc &DL) {
22763 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22764 "Unexpected VA");
22766 MachineFrameInfo &MFI = MF.getFrameInfo();
22768
22769 assert(VA.isRegLoc() && "Expected register VA assignment");
22770
22771 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22772 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22773 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22774 SDValue Hi;
22775 if (HiVA.isMemLoc()) {
22776 // Second half of f64 is passed on the stack.
22777 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22778 /*IsImmutable=*/true);
22779 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22780 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22782 } else {
22783 // Second half of f64 is passed in another GPR.
22784 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22785 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22786 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22787 }
22788 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22789}
22790
22791// Transform physical registers into virtual registers.
22793 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22794 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22795 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22796
22798
22799 switch (CallConv) {
22800 default:
22801 reportFatalUsageError("Unsupported calling convention");
22802 case CallingConv::C:
22803 case CallingConv::Fast:
22806 case CallingConv::GRAAL:
22808#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22809 CC_VLS_CASE(32)
22810 CC_VLS_CASE(64)
22811 CC_VLS_CASE(128)
22812 CC_VLS_CASE(256)
22813 CC_VLS_CASE(512)
22814 CC_VLS_CASE(1024)
22815 CC_VLS_CASE(2048)
22816 CC_VLS_CASE(4096)
22817 CC_VLS_CASE(8192)
22818 CC_VLS_CASE(16384)
22819 CC_VLS_CASE(32768)
22820 CC_VLS_CASE(65536)
22821#undef CC_VLS_CASE
22822 break;
22823 case CallingConv::GHC:
22824 if (Subtarget.hasStdExtE())
22825 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22826 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22827 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22828 "(Zdinx/D) instruction set extensions");
22829 }
22830
22831 const Function &Func = MF.getFunction();
22832 if (Func.hasFnAttribute("interrupt")) {
22833 if (!Func.arg_empty())
22835 "Functions with the interrupt attribute cannot have arguments!");
22836
22837 StringRef Kind =
22838 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22839
22840 constexpr StringLiteral SupportedInterruptKinds[] = {
22841 "machine",
22842 "supervisor",
22843 "rnmi",
22844 "qci-nest",
22845 "qci-nonest",
22846 "SiFive-CLIC-preemptible",
22847 "SiFive-CLIC-stack-swap",
22848 "SiFive-CLIC-preemptible-stack-swap",
22849 };
22850 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22852 "Function interrupt attribute argument not supported!");
22853
22854 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22856 "'qci-*' interrupt kinds require Xqciint extension");
22857
22858 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22860 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22861
22862 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22863 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22864 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22865 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22866 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22867 "have a frame pointer");
22868 }
22869
22870 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22871 MVT XLenVT = Subtarget.getXLenVT();
22872 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22873 // Used with vargs to accumulate store chains.
22874 std::vector<SDValue> OutChains;
22875
22876 // Assign locations to all of the incoming arguments.
22878 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22879
22880 if (CallConv == CallingConv::GHC)
22882 else
22883 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22885 : CC_RISCV);
22886
22887 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22888 CCValAssign &VA = ArgLocs[i];
22889 SDValue ArgValue;
22890 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22891 // case.
22892 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22893 assert(VA.needsCustom());
22894 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22895 } else if (VA.isRegLoc())
22896 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22897 else
22898 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22899
22900 if (VA.getLocInfo() == CCValAssign::Indirect) {
22901 // If the original argument was split and passed by reference (e.g. i128
22902 // on RV32), we need to load all parts of it here (using the same
22903 // address). Vectors may be partly split to registers and partly to the
22904 // stack, in which case the base address is partly offset and subsequent
22905 // stores are relative to that.
22906 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22908 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22909 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22910 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22911 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22912 CCValAssign &PartVA = ArgLocs[i + 1];
22913 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22914 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22915 if (PartVA.getValVT().isScalableVector())
22916 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22917 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22918 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22920 ++i;
22921 ++InsIdx;
22922 }
22923 continue;
22924 }
22925 InVals.push_back(ArgValue);
22926 }
22927
22928 if (any_of(ArgLocs,
22929 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22931
22932 if (IsVarArg) {
22933 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22934 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22935 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22936 MachineFrameInfo &MFI = MF.getFrameInfo();
22937 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22939
22940 // Size of the vararg save area. For now, the varargs save area is either
22941 // zero or large enough to hold a0-a7.
22942 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22943 int FI;
22944
22945 // If all registers are allocated, then all varargs must be passed on the
22946 // stack and we don't need to save any argregs.
22947 if (VarArgsSaveSize == 0) {
22948 int VaArgOffset = CCInfo.getStackSize();
22949 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
22950 } else {
22951 int VaArgOffset = -VarArgsSaveSize;
22952 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
22953
22954 // If saving an odd number of registers then create an extra stack slot to
22955 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
22956 // offsets to even-numbered registered remain 2*XLEN-aligned.
22957 if (Idx % 2) {
22959 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
22960 VarArgsSaveSize += XLenInBytes;
22961 }
22962
22963 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22964
22965 // Copy the integer registers that may have been used for passing varargs
22966 // to the vararg save area.
22967 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22968 const Register Reg = RegInfo.createVirtualRegister(RC);
22969 RegInfo.addLiveIn(ArgRegs[I], Reg);
22970 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
22971 SDValue Store = DAG.getStore(
22972 Chain, DL, ArgValue, FIN,
22973 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
22974 OutChains.push_back(Store);
22975 FIN =
22976 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
22977 }
22978 }
22979
22980 // Record the frame index of the first variable argument
22981 // which is a value necessary to VASTART.
22982 RVFI->setVarArgsFrameIndex(FI);
22983 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
22984 }
22985
22986 // All stores are grouped in one node to allow the matching between
22987 // the size of Ins and InVals. This only happens for vararg functions.
22988 if (!OutChains.empty()) {
22989 OutChains.push_back(Chain);
22990 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
22991 }
22992
22993 return Chain;
22994}
22995
22996/// isEligibleForTailCallOptimization - Check whether the call is eligible
22997/// for tail call optimization.
22998/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
22999bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23000 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23001 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23002
23003 auto CalleeCC = CLI.CallConv;
23004 auto &Outs = CLI.Outs;
23005 auto &Caller = MF.getFunction();
23006 auto CallerCC = Caller.getCallingConv();
23007
23008 // Exception-handling functions need a special set of instructions to
23009 // indicate a return to the hardware. Tail-calling another function would
23010 // probably break this.
23011 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23012 // should be expanded as new function attributes are introduced.
23013 if (Caller.hasFnAttribute("interrupt"))
23014 return false;
23015
23016 // Do not tail call opt if the stack is used to pass parameters.
23017 if (CCInfo.getStackSize() != 0)
23018 return false;
23019
23020 // Do not tail call opt if any parameters need to be passed indirectly.
23021 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23022 // passed indirectly. So the address of the value will be passed in a
23023 // register, or if not available, then the address is put on the stack. In
23024 // order to pass indirectly, space on the stack often needs to be allocated
23025 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23026 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23027 // are passed CCValAssign::Indirect.
23028 for (auto &VA : ArgLocs)
23029 if (VA.getLocInfo() == CCValAssign::Indirect)
23030 return false;
23031
23032 // Do not tail call opt if either caller or callee uses struct return
23033 // semantics.
23034 auto IsCallerStructRet = Caller.hasStructRetAttr();
23035 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23036 if (IsCallerStructRet || IsCalleeStructRet)
23037 return false;
23038
23039 // The callee has to preserve all registers the caller needs to preserve.
23040 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23041 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23042 if (CalleeCC != CallerCC) {
23043 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23044 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23045 return false;
23046 }
23047
23048 // Byval parameters hand the function a pointer directly into the stack area
23049 // we want to reuse during a tail call. Working around this *is* possible
23050 // but less efficient and uglier in LowerCall.
23051 for (auto &Arg : Outs)
23052 if (Arg.Flags.isByVal())
23053 return false;
23054
23055 return true;
23056}
23057
23059 return DAG.getDataLayout().getPrefTypeAlign(
23060 VT.getTypeForEVT(*DAG.getContext()));
23061}
23062
23063// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23064// and output parameter nodes.
23066 SmallVectorImpl<SDValue> &InVals) const {
23067 SelectionDAG &DAG = CLI.DAG;
23068 SDLoc &DL = CLI.DL;
23070 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23072 SDValue Chain = CLI.Chain;
23073 SDValue Callee = CLI.Callee;
23074 bool &IsTailCall = CLI.IsTailCall;
23075 CallingConv::ID CallConv = CLI.CallConv;
23076 bool IsVarArg = CLI.IsVarArg;
23077 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23078 MVT XLenVT = Subtarget.getXLenVT();
23079 const CallBase *CB = CLI.CB;
23080
23083
23084 // Set type id for call site info.
23085 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23086 CSInfo = MachineFunction::CallSiteInfo(*CB);
23087
23088 // Analyze the operands of the call, assigning locations to each operand.
23090 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23091
23092 if (CallConv == CallingConv::GHC) {
23093 if (Subtarget.hasStdExtE())
23094 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23095 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23096 } else
23097 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23099 : CC_RISCV);
23100
23101 // Check if it's really possible to do a tail call.
23102 if (IsTailCall)
23103 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23104
23105 if (IsTailCall)
23106 ++NumTailCalls;
23107 else if (CLI.CB && CLI.CB->isMustTailCall())
23108 reportFatalInternalError("failed to perform tail call elimination on a "
23109 "call site marked musttail");
23110
23111 // Get a count of how many bytes are to be pushed on the stack.
23112 unsigned NumBytes = ArgCCInfo.getStackSize();
23113
23114 // Create local copies for byval args
23115 SmallVector<SDValue, 8> ByValArgs;
23116 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23117 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23118 if (!Flags.isByVal())
23119 continue;
23120
23121 SDValue Arg = OutVals[i];
23122 unsigned Size = Flags.getByValSize();
23123 Align Alignment = Flags.getNonZeroByValAlign();
23124
23125 int FI =
23126 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23127 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23128 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23129
23130 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23131 /*IsVolatile=*/false,
23132 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23134 ByValArgs.push_back(FIPtr);
23135 }
23136
23137 if (!IsTailCall)
23138 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23139
23140 // Copy argument values to their designated locations.
23142 SmallVector<SDValue, 8> MemOpChains;
23143 SDValue StackPtr;
23144 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23145 ++i, ++OutIdx) {
23146 CCValAssign &VA = ArgLocs[i];
23147 SDValue ArgValue = OutVals[OutIdx];
23148 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23149
23150 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23151 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23152 assert(VA.isRegLoc() && "Expected register VA assignment");
23153 assert(VA.needsCustom());
23154 SDValue SplitF64 = DAG.getNode(
23155 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23156 SDValue Lo = SplitF64.getValue(0);
23157 SDValue Hi = SplitF64.getValue(1);
23158
23159 Register RegLo = VA.getLocReg();
23160 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23161
23162 // Get the CCValAssign for the Hi part.
23163 CCValAssign &HiVA = ArgLocs[++i];
23164
23165 if (HiVA.isMemLoc()) {
23166 // Second half of f64 is passed on the stack.
23167 if (!StackPtr.getNode())
23168 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23170 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23171 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23172 // Emit the store.
23173 MemOpChains.push_back(DAG.getStore(
23174 Chain, DL, Hi, Address,
23176 } else {
23177 // Second half of f64 is passed in another GPR.
23178 Register RegHigh = HiVA.getLocReg();
23179 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23180 }
23181 continue;
23182 }
23183
23184 // Promote the value if needed.
23185 // For now, only handle fully promoted and indirect arguments.
23186 if (VA.getLocInfo() == CCValAssign::Indirect) {
23187 // Store the argument in a stack slot and pass its address.
23188 Align StackAlign =
23189 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23190 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23191 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23192 // If the original argument was split (e.g. i128), we need
23193 // to store the required parts of it here (and pass just one address).
23194 // Vectors may be partly split to registers and partly to the stack, in
23195 // which case the base address is partly offset and subsequent stores are
23196 // relative to that.
23197 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23198 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23199 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23200 // Calculate the total size to store. We don't have access to what we're
23201 // actually storing other than performing the loop and collecting the
23202 // info.
23204 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23205 SDValue PartValue = OutVals[OutIdx + 1];
23206 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23207 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23208 EVT PartVT = PartValue.getValueType();
23209 if (PartVT.isScalableVector())
23210 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23211 StoredSize += PartVT.getStoreSize();
23212 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23213 Parts.push_back(std::make_pair(PartValue, Offset));
23214 ++i;
23215 ++OutIdx;
23216 }
23217 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23218 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23219 MemOpChains.push_back(
23220 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23222 for (const auto &Part : Parts) {
23223 SDValue PartValue = Part.first;
23224 SDValue PartOffset = Part.second;
23226 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23227 MemOpChains.push_back(
23228 DAG.getStore(Chain, DL, PartValue, Address,
23230 }
23231 ArgValue = SpillSlot;
23232 } else {
23233 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23234 }
23235
23236 // Use local copy if it is a byval arg.
23237 if (Flags.isByVal())
23238 ArgValue = ByValArgs[j++];
23239
23240 if (VA.isRegLoc()) {
23241 // Queue up the argument copies and emit them at the end.
23242 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23243
23244 const TargetOptions &Options = DAG.getTarget().Options;
23245 if (Options.EmitCallSiteInfo)
23246 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23247 } else {
23248 assert(VA.isMemLoc() && "Argument not register or memory");
23249 assert(!IsTailCall && "Tail call not allowed if stack is used "
23250 "for passing parameters");
23251
23252 // Work out the address of the stack slot.
23253 if (!StackPtr.getNode())
23254 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23256 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23258
23259 // Emit the store.
23260 MemOpChains.push_back(
23261 DAG.getStore(Chain, DL, ArgValue, Address,
23263 }
23264 }
23265
23266 // Join the stores, which are independent of one another.
23267 if (!MemOpChains.empty())
23268 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23269
23270 SDValue Glue;
23271
23272 // Build a sequence of copy-to-reg nodes, chained and glued together.
23273 for (auto &Reg : RegsToPass) {
23274 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23275 Glue = Chain.getValue(1);
23276 }
23277
23278 // Validate that none of the argument registers have been marked as
23279 // reserved, if so report an error. Do the same for the return address if this
23280 // is not a tailcall.
23281 validateCCReservedRegs(RegsToPass, MF);
23282 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23284 MF.getFunction(),
23285 "Return address register required, but has been reserved."});
23286
23287 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23288 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23289 // split it and then direct call can be matched by PseudoCALL.
23290 bool CalleeIsLargeExternalSymbol = false;
23292 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23293 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23294 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23295 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23296 CalleeIsLargeExternalSymbol = true;
23297 }
23298 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23299 const GlobalValue *GV = S->getGlobal();
23300 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23301 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23302 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23303 }
23304
23305 // The first call operand is the chain and the second is the target address.
23307 Ops.push_back(Chain);
23308 Ops.push_back(Callee);
23309
23310 // Add argument registers to the end of the list so that they are
23311 // known live into the call.
23312 for (auto &Reg : RegsToPass)
23313 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23314
23315 // Add a register mask operand representing the call-preserved registers.
23316 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23317 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23318 assert(Mask && "Missing call preserved mask for calling convention");
23319 Ops.push_back(DAG.getRegisterMask(Mask));
23320
23321 // Glue the call to the argument copies, if any.
23322 if (Glue.getNode())
23323 Ops.push_back(Glue);
23324
23325 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23326 "Unexpected CFI type for a direct call");
23327
23328 // Emit the call.
23329 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23330
23331 // Use software guarded branch for large code model non-indirect calls
23332 // Tail call to external symbol will have a null CLI.CB and we need another
23333 // way to determine the callsite type
23334 bool NeedSWGuarded = false;
23336 Subtarget.hasStdExtZicfilp() &&
23337 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23338 NeedSWGuarded = true;
23339
23340 if (IsTailCall) {
23342 unsigned CallOpc =
23343 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23344 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23345 if (CLI.CFIType)
23346 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23347 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23348 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23349 return Ret;
23350 }
23351
23352 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23353 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23354 if (CLI.CFIType)
23355 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23356
23357 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23358 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23359 Glue = Chain.getValue(1);
23360
23361 // Mark the end of the call, which is glued to the call itself.
23362 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23363 Glue = Chain.getValue(1);
23364
23365 // Assign locations to each value returned by this call.
23367 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23368 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23369
23370 // Copy all of the result registers out of their specified physreg.
23371 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23372 auto &VA = RVLocs[i];
23373 // Copy the value out
23374 SDValue RetValue =
23375 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23376 // Glue the RetValue to the end of the call sequence
23377 Chain = RetValue.getValue(1);
23378 Glue = RetValue.getValue(2);
23379
23380 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23381 assert(VA.needsCustom());
23382 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23383 MVT::i32, Glue);
23384 Chain = RetValue2.getValue(1);
23385 Glue = RetValue2.getValue(2);
23386 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23387 RetValue2);
23388 } else
23389 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23390
23391 InVals.push_back(RetValue);
23392 }
23393
23394 return Chain;
23395}
23396
23398 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23399 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23400 const Type *RetTy) const {
23402 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23403
23404 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23405 MVT VT = Outs[i].VT;
23406 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23407 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23408 /*IsRet=*/true, Outs[i].OrigTy))
23409 return false;
23410 }
23411 return true;
23412}
23413
23414SDValue
23416 bool IsVarArg,
23418 const SmallVectorImpl<SDValue> &OutVals,
23419 const SDLoc &DL, SelectionDAG &DAG) const {
23421 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23422
23423 // Stores the assignment of the return value to a location.
23425
23426 // Info about the registers and stack slot.
23427 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23428 *DAG.getContext());
23429
23430 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23431 nullptr, CC_RISCV);
23432
23433 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23434 reportFatalUsageError("GHC functions return void only");
23435
23436 SDValue Glue;
23437 SmallVector<SDValue, 4> RetOps(1, Chain);
23438
23439 // Copy the result values into the output registers.
23440 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23441 SDValue Val = OutVals[OutIdx];
23442 CCValAssign &VA = RVLocs[i];
23443 assert(VA.isRegLoc() && "Can only return in registers!");
23444
23445 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23446 // Handle returning f64 on RV32D with a soft float ABI.
23447 assert(VA.isRegLoc() && "Expected return via registers");
23448 assert(VA.needsCustom());
23449 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23450 DAG.getVTList(MVT::i32, MVT::i32), Val);
23451 SDValue Lo = SplitF64.getValue(0);
23452 SDValue Hi = SplitF64.getValue(1);
23453 Register RegLo = VA.getLocReg();
23454 Register RegHi = RVLocs[++i].getLocReg();
23455
23456 if (STI.isRegisterReservedByUser(RegLo) ||
23457 STI.isRegisterReservedByUser(RegHi))
23459 MF.getFunction(),
23460 "Return value register required, but has been reserved."});
23461
23462 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23463 Glue = Chain.getValue(1);
23464 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23465 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23466 Glue = Chain.getValue(1);
23467 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23468 } else {
23469 // Handle a 'normal' return.
23470 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23471 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23472
23473 if (STI.isRegisterReservedByUser(VA.getLocReg()))
23475 MF.getFunction(),
23476 "Return value register required, but has been reserved."});
23477
23478 // Guarantee that all emitted copies are stuck together.
23479 Glue = Chain.getValue(1);
23480 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23481 }
23482 }
23483
23484 RetOps[0] = Chain; // Update chain.
23485
23486 // Add the glue node if we have it.
23487 if (Glue.getNode()) {
23488 RetOps.push_back(Glue);
23489 }
23490
23491 if (any_of(RVLocs,
23492 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23494
23495 unsigned RetOpc = RISCVISD::RET_GLUE;
23496 // Interrupt service routines use different return instructions.
23497 const Function &Func = DAG.getMachineFunction().getFunction();
23498 if (Func.hasFnAttribute("interrupt")) {
23499 if (!Func.getReturnType()->isVoidTy())
23501 "Functions with the interrupt attribute must have void return type!");
23502
23504 StringRef Kind =
23505 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23506
23507 if (Kind == "supervisor")
23508 RetOpc = RISCVISD::SRET_GLUE;
23509 else if (Kind == "rnmi") {
23510 assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23511 "Need Smrnmi extension for rnmi");
23512 RetOpc = RISCVISD::MNRET_GLUE;
23513 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23514 assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
23515 "Need Xqciint for qci-(no)nest");
23516 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23517 } else
23518 RetOpc = RISCVISD::MRET_GLUE;
23519 }
23520
23521 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23522}
23523
23524void RISCVTargetLowering::validateCCReservedRegs(
23525 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23526 MachineFunction &MF) const {
23527 const Function &F = MF.getFunction();
23528 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23529
23530 if (llvm::any_of(Regs, [&STI](auto Reg) {
23531 return STI.isRegisterReservedByUser(Reg.first);
23532 }))
23533 F.getContext().diagnose(DiagnosticInfoUnsupported{
23534 F, "Argument register required, but has been reserved."});
23535}
23536
23537// Check if the result of the node is only used as a return value, as
23538// otherwise we can't perform a tail-call.
23540 if (N->getNumValues() != 1)
23541 return false;
23542 if (!N->hasNUsesOfValue(1, 0))
23543 return false;
23544
23545 SDNode *Copy = *N->user_begin();
23546
23547 if (Copy->getOpcode() == ISD::BITCAST) {
23548 return isUsedByReturnOnly(Copy, Chain);
23549 }
23550
23551 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23552 // with soft float ABIs.
23553 if (Copy->getOpcode() != ISD::CopyToReg) {
23554 return false;
23555 }
23556
23557 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23558 // isn't safe to perform a tail call.
23559 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23560 return false;
23561
23562 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23563 bool HasRet = false;
23564 for (SDNode *Node : Copy->users()) {
23565 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23566 return false;
23567 HasRet = true;
23568 }
23569 if (!HasRet)
23570 return false;
23571
23572 Chain = Copy->getOperand(0);
23573 return true;
23574}
23575
23577 return CI->isTailCall();
23578}
23579
23580/// getConstraintType - Given a constraint letter, return the type of
23581/// constraint it is for this target.
23584 if (Constraint.size() == 1) {
23585 switch (Constraint[0]) {
23586 default:
23587 break;
23588 case 'f':
23589 case 'R':
23590 return C_RegisterClass;
23591 case 'I':
23592 case 'J':
23593 case 'K':
23594 return C_Immediate;
23595 case 'A':
23596 return C_Memory;
23597 case 's':
23598 case 'S': // A symbolic address
23599 return C_Other;
23600 }
23601 } else {
23602 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23603 return C_RegisterClass;
23604 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23605 return C_RegisterClass;
23606 }
23607 return TargetLowering::getConstraintType(Constraint);
23608}
23609
23610std::pair<unsigned, const TargetRegisterClass *>
23612 StringRef Constraint,
23613 MVT VT) const {
23614 // First, see if this is a constraint that directly corresponds to a RISC-V
23615 // register class.
23616 if (Constraint.size() == 1) {
23617 switch (Constraint[0]) {
23618 case 'r':
23619 // TODO: Support fixed vectors up to XLen for P extension?
23620 if (VT.isVector())
23621 break;
23622 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23623 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23624 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23625 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23626 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23627 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23628 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23629 case 'f':
23630 if (VT == MVT::f16) {
23631 if (Subtarget.hasStdExtZfhmin())
23632 return std::make_pair(0U, &RISCV::FPR16RegClass);
23633 if (Subtarget.hasStdExtZhinxmin())
23634 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23635 } else if (VT == MVT::f32) {
23636 if (Subtarget.hasStdExtF())
23637 return std::make_pair(0U, &RISCV::FPR32RegClass);
23638 if (Subtarget.hasStdExtZfinx())
23639 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23640 } else if (VT == MVT::f64) {
23641 if (Subtarget.hasStdExtD())
23642 return std::make_pair(0U, &RISCV::FPR64RegClass);
23643 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23644 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23645 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23646 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23647 }
23648 break;
23649 case 'R':
23650 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23651 (VT == MVT::i128 && Subtarget.is64Bit()))
23652 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23653 break;
23654 default:
23655 break;
23656 }
23657 } else if (Constraint == "vr") {
23658 for (const auto *RC :
23659 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23660 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23661 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23662 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23663 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23664 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23665 &RISCV::VRN2M4RegClass}) {
23666 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23667 return std::make_pair(0U, RC);
23668
23669 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23670 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23671 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23672 return std::make_pair(0U, RC);
23673 }
23674 }
23675 } else if (Constraint == "vd") {
23676 for (const auto *RC :
23677 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23678 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23679 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23680 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23681 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23682 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23683 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23684 &RISCV::VRN2M4NoV0RegClass}) {
23685 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23686 return std::make_pair(0U, RC);
23687
23688 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23689 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23690 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23691 return std::make_pair(0U, RC);
23692 }
23693 }
23694 } else if (Constraint == "vm") {
23695 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23696 return std::make_pair(0U, &RISCV::VMV0RegClass);
23697
23698 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23699 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23700 // VT here might be coerced to vector with i8 elements, so we need to
23701 // check if this is a M1 register here instead of checking VMV0RegClass.
23702 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23703 return std::make_pair(0U, &RISCV::VMV0RegClass);
23704 }
23705 } else if (Constraint == "cr") {
23706 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23707 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23708 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23709 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23710 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23711 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23712 if (!VT.isVector())
23713 return std::make_pair(0U, &RISCV::GPRCRegClass);
23714 } else if (Constraint == "cR") {
23715 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23716 (VT == MVT::i128 && Subtarget.is64Bit()))
23717 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23718 } else if (Constraint == "cf") {
23719 if (VT == MVT::f16) {
23720 if (Subtarget.hasStdExtZfhmin())
23721 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23722 if (Subtarget.hasStdExtZhinxmin())
23723 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23724 } else if (VT == MVT::f32) {
23725 if (Subtarget.hasStdExtF())
23726 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23727 if (Subtarget.hasStdExtZfinx())
23728 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23729 } else if (VT == MVT::f64) {
23730 if (Subtarget.hasStdExtD())
23731 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23732 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23733 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23734 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23735 return std::make_pair(0U, &RISCV::GPRCRegClass);
23736 }
23737 }
23738
23739 // Clang will correctly decode the usage of register name aliases into their
23740 // official names. However, other frontends like `rustc` do not. This allows
23741 // users of these frontends to use the ABI names for registers in LLVM-style
23742 // register constraints.
23743 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23744 .Case("{zero}", RISCV::X0)
23745 .Case("{ra}", RISCV::X1)
23746 .Case("{sp}", RISCV::X2)
23747 .Case("{gp}", RISCV::X3)
23748 .Case("{tp}", RISCV::X4)
23749 .Case("{t0}", RISCV::X5)
23750 .Case("{t1}", RISCV::X6)
23751 .Case("{t2}", RISCV::X7)
23752 .Cases("{s0}", "{fp}", RISCV::X8)
23753 .Case("{s1}", RISCV::X9)
23754 .Case("{a0}", RISCV::X10)
23755 .Case("{a1}", RISCV::X11)
23756 .Case("{a2}", RISCV::X12)
23757 .Case("{a3}", RISCV::X13)
23758 .Case("{a4}", RISCV::X14)
23759 .Case("{a5}", RISCV::X15)
23760 .Case("{a6}", RISCV::X16)
23761 .Case("{a7}", RISCV::X17)
23762 .Case("{s2}", RISCV::X18)
23763 .Case("{s3}", RISCV::X19)
23764 .Case("{s4}", RISCV::X20)
23765 .Case("{s5}", RISCV::X21)
23766 .Case("{s6}", RISCV::X22)
23767 .Case("{s7}", RISCV::X23)
23768 .Case("{s8}", RISCV::X24)
23769 .Case("{s9}", RISCV::X25)
23770 .Case("{s10}", RISCV::X26)
23771 .Case("{s11}", RISCV::X27)
23772 .Case("{t3}", RISCV::X28)
23773 .Case("{t4}", RISCV::X29)
23774 .Case("{t5}", RISCV::X30)
23775 .Case("{t6}", RISCV::X31)
23776 .Default(RISCV::NoRegister);
23777 if (XRegFromAlias != RISCV::NoRegister)
23778 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23779
23780 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23781 // TableGen record rather than the AsmName to choose registers for InlineAsm
23782 // constraints, plus we want to match those names to the widest floating point
23783 // register type available, manually select floating point registers here.
23784 //
23785 // The second case is the ABI name of the register, so that frontends can also
23786 // use the ABI names in register constraint lists.
23787 if (Subtarget.hasStdExtF()) {
23788 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23789 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23790 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23791 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23792 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23793 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23794 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23795 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23796 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23797 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23798 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23799 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23800 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23801 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23802 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23803 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23804 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23805 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23806 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23807 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23808 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23809 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23810 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23811 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23812 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23813 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23814 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23815 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23816 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23817 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23818 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23819 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23820 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23821 .Default(RISCV::NoRegister);
23822 if (FReg != RISCV::NoRegister) {
23823 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23824 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23825 unsigned RegNo = FReg - RISCV::F0_F;
23826 unsigned DReg = RISCV::F0_D + RegNo;
23827 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23828 }
23829 if (VT == MVT::f32 || VT == MVT::Other)
23830 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23831 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23832 unsigned RegNo = FReg - RISCV::F0_F;
23833 unsigned HReg = RISCV::F0_H + RegNo;
23834 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23835 }
23836 }
23837 }
23838
23839 if (Subtarget.hasVInstructions()) {
23840 Register VReg = StringSwitch<Register>(Constraint.lower())
23841 .Case("{v0}", RISCV::V0)
23842 .Case("{v1}", RISCV::V1)
23843 .Case("{v2}", RISCV::V2)
23844 .Case("{v3}", RISCV::V3)
23845 .Case("{v4}", RISCV::V4)
23846 .Case("{v5}", RISCV::V5)
23847 .Case("{v6}", RISCV::V6)
23848 .Case("{v7}", RISCV::V7)
23849 .Case("{v8}", RISCV::V8)
23850 .Case("{v9}", RISCV::V9)
23851 .Case("{v10}", RISCV::V10)
23852 .Case("{v11}", RISCV::V11)
23853 .Case("{v12}", RISCV::V12)
23854 .Case("{v13}", RISCV::V13)
23855 .Case("{v14}", RISCV::V14)
23856 .Case("{v15}", RISCV::V15)
23857 .Case("{v16}", RISCV::V16)
23858 .Case("{v17}", RISCV::V17)
23859 .Case("{v18}", RISCV::V18)
23860 .Case("{v19}", RISCV::V19)
23861 .Case("{v20}", RISCV::V20)
23862 .Case("{v21}", RISCV::V21)
23863 .Case("{v22}", RISCV::V22)
23864 .Case("{v23}", RISCV::V23)
23865 .Case("{v24}", RISCV::V24)
23866 .Case("{v25}", RISCV::V25)
23867 .Case("{v26}", RISCV::V26)
23868 .Case("{v27}", RISCV::V27)
23869 .Case("{v28}", RISCV::V28)
23870 .Case("{v29}", RISCV::V29)
23871 .Case("{v30}", RISCV::V30)
23872 .Case("{v31}", RISCV::V31)
23873 .Default(RISCV::NoRegister);
23874 if (VReg != RISCV::NoRegister) {
23875 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23876 return std::make_pair(VReg, &RISCV::VMRegClass);
23877 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23878 return std::make_pair(VReg, &RISCV::VRRegClass);
23879 for (const auto *RC :
23880 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23881 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23882 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23883 return std::make_pair(VReg, RC);
23884 }
23885 }
23886 }
23887 }
23888
23889 std::pair<Register, const TargetRegisterClass *> Res =
23891
23892 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23893 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23894 // Subtarget into account.
23895 if (Res.second == &RISCV::GPRF16RegClass ||
23896 Res.second == &RISCV::GPRF32RegClass ||
23897 Res.second == &RISCV::GPRPairRegClass)
23898 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23899
23900 return Res;
23901}
23902
23905 // Currently only support length 1 constraints.
23906 if (ConstraintCode.size() == 1) {
23907 switch (ConstraintCode[0]) {
23908 case 'A':
23910 default:
23911 break;
23912 }
23913 }
23914
23915 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23916}
23917
23919 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23920 SelectionDAG &DAG) const {
23921 // Currently only support length 1 constraints.
23922 if (Constraint.size() == 1) {
23923 switch (Constraint[0]) {
23924 case 'I':
23925 // Validate & create a 12-bit signed immediate operand.
23926 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23927 uint64_t CVal = C->getSExtValue();
23928 if (isInt<12>(CVal))
23929 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23930 Subtarget.getXLenVT()));
23931 }
23932 return;
23933 case 'J':
23934 // Validate & create an integer zero operand.
23935 if (isNullConstant(Op))
23936 Ops.push_back(
23937 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23938 return;
23939 case 'K':
23940 // Validate & create a 5-bit unsigned immediate operand.
23941 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23942 uint64_t CVal = C->getZExtValue();
23943 if (isUInt<5>(CVal))
23944 Ops.push_back(
23945 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
23946 }
23947 return;
23948 case 'S':
23950 return;
23951 default:
23952 break;
23953 }
23954 }
23956}
23957
23959 Instruction *Inst,
23960 AtomicOrdering Ord) const {
23961 if (Subtarget.hasStdExtZtso()) {
23963 return Builder.CreateFence(Ord);
23964 return nullptr;
23965 }
23966
23968 return Builder.CreateFence(Ord);
23969 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
23970 return Builder.CreateFence(AtomicOrdering::Release);
23971 return nullptr;
23972}
23973
23975 Instruction *Inst,
23976 AtomicOrdering Ord) const {
23977 if (Subtarget.hasStdExtZtso()) {
23979 return Builder.CreateFence(Ord);
23980 return nullptr;
23981 }
23982
23983 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
23984 return Builder.CreateFence(AtomicOrdering::Acquire);
23985 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
23987 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
23988 return nullptr;
23989}
23990
23993 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
23994 // point operations can't be used in an lr/sc sequence without breaking the
23995 // forward-progress guarantee.
23996 if (AI->isFloatingPointOperation() ||
24002
24003 // Don't expand forced atomics, we want to have __sync libcalls instead.
24004 if (Subtarget.hasForcedAtomics())
24006
24007 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24008 if (AI->getOperation() == AtomicRMWInst::Nand) {
24009 if (Subtarget.hasStdExtZacas() &&
24010 (Size >= 32 || Subtarget.hasStdExtZabha()))
24012 if (Size < 32)
24014 }
24015
24016 if (Size < 32 && !Subtarget.hasStdExtZabha())
24018
24020}
24021
24022static Intrinsic::ID
24024 switch (BinOp) {
24025 default:
24026 llvm_unreachable("Unexpected AtomicRMW BinOp");
24028 return Intrinsic::riscv_masked_atomicrmw_xchg;
24029 case AtomicRMWInst::Add:
24030 return Intrinsic::riscv_masked_atomicrmw_add;
24031 case AtomicRMWInst::Sub:
24032 return Intrinsic::riscv_masked_atomicrmw_sub;
24034 return Intrinsic::riscv_masked_atomicrmw_nand;
24035 case AtomicRMWInst::Max:
24036 return Intrinsic::riscv_masked_atomicrmw_max;
24037 case AtomicRMWInst::Min:
24038 return Intrinsic::riscv_masked_atomicrmw_min;
24040 return Intrinsic::riscv_masked_atomicrmw_umax;
24042 return Intrinsic::riscv_masked_atomicrmw_umin;
24043 }
24044}
24045
24047 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24048 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24049 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24050 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24051 // mask, as this produces better code than the LR/SC loop emitted by
24052 // int_riscv_masked_atomicrmw_xchg.
24053 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24056 if (CVal->isZero())
24057 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24058 Builder.CreateNot(Mask, "Inv_Mask"),
24059 AI->getAlign(), Ord);
24060 if (CVal->isMinusOne())
24061 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24062 AI->getAlign(), Ord);
24063 }
24064
24065 unsigned XLen = Subtarget.getXLen();
24066 Value *Ordering =
24067 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24068 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24070 AI->getModule(),
24072
24073 if (XLen == 64) {
24074 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24075 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24076 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24077 }
24078
24079 Value *Result;
24080
24081 // Must pass the shift amount needed to sign extend the loaded value prior
24082 // to performing a signed comparison for min/max. ShiftAmt is the number of
24083 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24084 // is the number of bits to left+right shift the value in order to
24085 // sign-extend.
24086 if (AI->getOperation() == AtomicRMWInst::Min ||
24088 const DataLayout &DL = AI->getDataLayout();
24089 unsigned ValWidth =
24090 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24091 Value *SextShamt =
24092 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24093 Result = Builder.CreateCall(LrwOpScwLoop,
24094 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24095 } else {
24096 Result =
24097 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24098 }
24099
24100 if (XLen == 64)
24101 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24102 return Result;
24103}
24104
24107 AtomicCmpXchgInst *CI) const {
24108 // Don't expand forced atomics, we want to have __sync libcalls instead.
24109 if (Subtarget.hasForcedAtomics())
24111
24113 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24114 (Size == 8 || Size == 16))
24117}
24118
24120 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24121 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24122 unsigned XLen = Subtarget.getXLen();
24123 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24124 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24125 if (XLen == 64) {
24126 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24127 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24128 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24129 }
24130 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24131 Value *Result = Builder.CreateIntrinsic(
24132 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24133 if (XLen == 64)
24134 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24135 return Result;
24136}
24137
24139 EVT DataVT) const {
24140 // We have indexed loads for all supported EEW types. Indices are always
24141 // zero extended.
24142 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24143 isTypeLegal(Extend.getValueType()) &&
24144 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24145 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24146}
24147
24149 EVT VT) const {
24150 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24151 return false;
24152
24153 switch (FPVT.getSimpleVT().SimpleTy) {
24154 case MVT::f16:
24155 return Subtarget.hasStdExtZfhmin();
24156 case MVT::f32:
24157 return Subtarget.hasStdExtF();
24158 case MVT::f64:
24159 return Subtarget.hasStdExtD();
24160 default:
24161 return false;
24162 }
24163}
24164
24166 // If we are using the small code model, we can reduce size of jump table
24167 // entry to 4 bytes.
24168 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24171 }
24173}
24174
24176 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24177 unsigned uid, MCContext &Ctx) const {
24178 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24180 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24181}
24182
24184 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24185 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24186 // a power of two as well.
24187 // FIXME: This doesn't work for zve32, but that's already broken
24188 // elsewhere for the same reason.
24189 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24190 static_assert(RISCV::RVVBitsPerBlock == 64,
24191 "RVVBitsPerBlock changed, audit needed");
24192 return true;
24193}
24194
24196 SDValue &Offset,
24198 SelectionDAG &DAG) const {
24199 // Target does not support indexed loads.
24200 if (!Subtarget.hasVendorXTHeadMemIdx())
24201 return false;
24202
24203 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24204 return false;
24205
24206 Base = Op->getOperand(0);
24207 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24208 int64_t RHSC = RHS->getSExtValue();
24209 if (Op->getOpcode() == ISD::SUB)
24210 RHSC = -(uint64_t)RHSC;
24211
24212 // The constants that can be encoded in the THeadMemIdx instructions
24213 // are of the form (sign_extend(imm5) << imm2).
24214 bool isLegalIndexedOffset = false;
24215 for (unsigned i = 0; i < 4; i++)
24216 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24217 isLegalIndexedOffset = true;
24218 break;
24219 }
24220
24221 if (!isLegalIndexedOffset)
24222 return false;
24223
24224 Offset = Op->getOperand(1);
24225 return true;
24226 }
24227
24228 return false;
24229}
24230
24232 SDValue &Offset,
24234 SelectionDAG &DAG) const {
24235 EVT VT;
24236 SDValue Ptr;
24237 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24238 VT = LD->getMemoryVT();
24239 Ptr = LD->getBasePtr();
24240 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24241 VT = ST->getMemoryVT();
24242 Ptr = ST->getBasePtr();
24243 } else
24244 return false;
24245
24246 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24247 return false;
24248
24249 AM = ISD::PRE_INC;
24250 return true;
24251}
24252
24254 SDValue &Base,
24255 SDValue &Offset,
24257 SelectionDAG &DAG) const {
24258 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24259 if (Op->getOpcode() != ISD::ADD)
24260 return false;
24261
24263 Base = LS->getBasePtr();
24264 else
24265 return false;
24266
24267 if (Base == Op->getOperand(0))
24268 Offset = Op->getOperand(1);
24269 else if (Base == Op->getOperand(1))
24270 Offset = Op->getOperand(0);
24271 else
24272 return false;
24273
24274 AM = ISD::POST_INC;
24275 return true;
24276 }
24277
24278 EVT VT;
24279 SDValue Ptr;
24280 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24281 VT = LD->getMemoryVT();
24282 Ptr = LD->getBasePtr();
24283 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24284 VT = ST->getMemoryVT();
24285 Ptr = ST->getBasePtr();
24286 } else
24287 return false;
24288
24289 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24290 return false;
24291 // Post-indexing updates the base, so it's not a valid transform
24292 // if that's not the same as the load's pointer.
24293 if (Ptr != Base)
24294 return false;
24295
24296 AM = ISD::POST_INC;
24297 return true;
24298}
24299
24301 EVT VT) const {
24302 EVT SVT = VT.getScalarType();
24303
24304 if (!SVT.isSimple())
24305 return false;
24306
24307 switch (SVT.getSimpleVT().SimpleTy) {
24308 case MVT::f16:
24309 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24310 : Subtarget.hasStdExtZfhOrZhinx();
24311 case MVT::f32:
24312 return Subtarget.hasStdExtFOrZfinx();
24313 case MVT::f64:
24314 return Subtarget.hasStdExtDOrZdinx();
24315 default:
24316 break;
24317 }
24318
24319 return false;
24320}
24321
24323 // Zacas will use amocas.w which does not require extension.
24324 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24325}
24326
24328 const Constant *PersonalityFn) const {
24329 return RISCV::X10;
24330}
24331
24333 const Constant *PersonalityFn) const {
24334 return RISCV::X11;
24335}
24336
24338 // Return false to suppress the unnecessary extensions if the LibCall
24339 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24340 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24341 Type.getSizeInBits() < Subtarget.getXLen()))
24342 return false;
24343
24344 return true;
24345}
24346
24348 bool IsSigned) const {
24349 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24350 return true;
24351
24352 return IsSigned;
24353}
24354
24356 SDValue C) const {
24357 // Check integral scalar types.
24358 if (!VT.isScalarInteger())
24359 return false;
24360
24361 // Omit the optimization if the sub target has the M extension and the data
24362 // size exceeds XLen.
24363 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24364 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24365 return false;
24366
24367 auto *ConstNode = cast<ConstantSDNode>(C);
24368 const APInt &Imm = ConstNode->getAPIntValue();
24369
24370 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24371 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24372 return false;
24373
24374 // Break the MUL to a SLLI and an ADD/SUB.
24375 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24376 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24377 return true;
24378
24379 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24380 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24381 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24382 (Imm - 8).isPowerOf2()))
24383 return true;
24384
24385 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24386 // a pair of LUI/ADDI.
24387 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24388 ConstNode->hasOneUse()) {
24389 APInt ImmS = Imm.ashr(Imm.countr_zero());
24390 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24391 (1 - ImmS).isPowerOf2())
24392 return true;
24393 }
24394
24395 return false;
24396}
24397
24399 SDValue ConstNode) const {
24400 // Let the DAGCombiner decide for vectors.
24401 EVT VT = AddNode.getValueType();
24402 if (VT.isVector())
24403 return true;
24404
24405 // Let the DAGCombiner decide for larger types.
24406 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24407 return true;
24408
24409 // It is worse if c1 is simm12 while c1*c2 is not.
24410 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24411 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24412 const APInt &C1 = C1Node->getAPIntValue();
24413 const APInt &C2 = C2Node->getAPIntValue();
24414 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24415 return false;
24416
24417 // Default to true and let the DAGCombiner decide.
24418 return true;
24419}
24420
24422 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24423 unsigned *Fast) const {
24424 if (!VT.isVector()) {
24425 if (Fast)
24426 *Fast = Subtarget.enableUnalignedScalarMem();
24427 return Subtarget.enableUnalignedScalarMem();
24428 }
24429
24430 // All vector implementations must support element alignment
24431 EVT ElemVT = VT.getVectorElementType();
24432 if (Alignment >= ElemVT.getStoreSize()) {
24433 if (Fast)
24434 *Fast = 1;
24435 return true;
24436 }
24437
24438 // Note: We lower an unmasked unaligned vector access to an equally sized
24439 // e8 element type access. Given this, we effectively support all unmasked
24440 // misaligned accesses. TODO: Work through the codegen implications of
24441 // allowing such accesses to be formed, and considered fast.
24442 if (Fast)
24443 *Fast = Subtarget.enableUnalignedVectorMem();
24444 return Subtarget.enableUnalignedVectorMem();
24445}
24446
24448 LLVMContext &Context, const MemOp &Op,
24449 const AttributeList &FuncAttributes) const {
24450 if (!Subtarget.hasVInstructions())
24451 return MVT::Other;
24452
24453 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24454 return MVT::Other;
24455
24456 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24457 // has an expansion threshold, and we want the number of hardware memory
24458 // operations to correspond roughly to that threshold. LMUL>1 operations
24459 // are typically expanded linearly internally, and thus correspond to more
24460 // than one actual memory operation. Note that store merging and load
24461 // combining will typically form larger LMUL operations from the LMUL1
24462 // operations emitted here, and that's okay because combining isn't
24463 // introducing new memory operations; it's just merging existing ones.
24464 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24465 const unsigned MinVLenInBytes =
24466 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24467
24468 if (Op.size() < MinVLenInBytes)
24469 // TODO: Figure out short memops. For the moment, do the default thing
24470 // which ends up using scalar sequences.
24471 return MVT::Other;
24472
24473 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24474 // fixed vectors.
24475 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24476 return MVT::Other;
24477
24478 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24479 // a large scalar constant and instead use vmv.v.x/i to do the
24480 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24481 // maximize the chance we can encode the size in the vsetvli.
24482 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24483 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24484
24485 // Do we have sufficient alignment for our preferred VT? If not, revert
24486 // to largest size allowed by our alignment criteria.
24487 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24488 Align RequiredAlign(PreferredVT.getStoreSize());
24489 if (Op.isFixedDstAlign())
24490 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24491 if (Op.isMemcpy())
24492 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24493 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24494 }
24495 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24496}
24497
24499 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24500 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24501 bool IsABIRegCopy = CC.has_value();
24502 EVT ValueVT = Val.getValueType();
24503
24504 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24505 if ((ValueVT == PairVT ||
24506 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24507 ValueVT == MVT::f64)) &&
24508 NumParts == 1 && PartVT == MVT::Untyped) {
24509 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24510 MVT XLenVT = Subtarget.getXLenVT();
24511 if (ValueVT == MVT::f64)
24512 Val = DAG.getBitcast(MVT::i64, Val);
24513 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24514 // Always creating an MVT::Untyped part, so always use
24515 // RISCVISD::BuildGPRPair.
24516 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24517 return true;
24518 }
24519
24520 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24521 PartVT == MVT::f32) {
24522 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24523 // nan, and cast to f32.
24524 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24525 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24526 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24527 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24528 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24529 Parts[0] = Val;
24530 return true;
24531 }
24532
24533 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24534#ifndef NDEBUG
24535 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24536 [[maybe_unused]] unsigned ValLMUL =
24538 ValNF * RISCV::RVVBitsPerBlock);
24539 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24540 [[maybe_unused]] unsigned PartLMUL =
24542 PartNF * RISCV::RVVBitsPerBlock);
24543 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24544 "RISC-V vector tuple type only accepts same register class type "
24545 "TUPLE_INSERT");
24546#endif
24547
24548 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24549 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24550 Parts[0] = Val;
24551 return true;
24552 }
24553
24554 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24555 PartVT.isScalableVector()) {
24556 if (ValueVT.isFixedLengthVector()) {
24557 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24558 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24559 }
24560 LLVMContext &Context = *DAG.getContext();
24561 EVT ValueEltVT = ValueVT.getVectorElementType();
24562 EVT PartEltVT = PartVT.getVectorElementType();
24563 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24564 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24565 if (PartVTBitSize % ValueVTBitSize == 0) {
24566 assert(PartVTBitSize >= ValueVTBitSize);
24567 // If the element types are different, bitcast to the same element type of
24568 // PartVT first.
24569 // Give an example here, we want copy a <vscale x 1 x i8> value to
24570 // <vscale x 4 x i16>.
24571 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24572 // subvector, then we can bitcast to <vscale x 4 x i16>.
24573 if (ValueEltVT != PartEltVT) {
24574 if (PartVTBitSize > ValueVTBitSize) {
24575 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24576 assert(Count != 0 && "The number of element should not be zero.");
24577 EVT SameEltTypeVT =
24578 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24579 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24580 }
24581 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24582 } else {
24583 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24584 }
24585 Parts[0] = Val;
24586 return true;
24587 }
24588 }
24589
24590 return false;
24591}
24592
24594 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24595 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24596 bool IsABIRegCopy = CC.has_value();
24597
24598 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24599 if ((ValueVT == PairVT ||
24600 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24601 ValueVT == MVT::f64)) &&
24602 NumParts == 1 && PartVT == MVT::Untyped) {
24603 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24604 MVT XLenVT = Subtarget.getXLenVT();
24605
24606 SDValue Val = Parts[0];
24607 // Always starting with an MVT::Untyped part, so always use
24608 // RISCVISD::SplitGPRPair
24609 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24610 Val);
24611 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24612 Val.getValue(1));
24613 if (ValueVT == MVT::f64)
24614 Val = DAG.getBitcast(ValueVT, Val);
24615 return Val;
24616 }
24617
24618 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24619 PartVT == MVT::f32) {
24620 SDValue Val = Parts[0];
24621
24622 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24623 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24624 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24625 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24626 return Val;
24627 }
24628
24629 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24630 PartVT.isScalableVector()) {
24631 LLVMContext &Context = *DAG.getContext();
24632 SDValue Val = Parts[0];
24633 EVT ValueEltVT = ValueVT.getVectorElementType();
24634 EVT PartEltVT = PartVT.getVectorElementType();
24635 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24636 if (ValueVT.isFixedLengthVector())
24637 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24638 .getSizeInBits()
24640 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24641 if (PartVTBitSize % ValueVTBitSize == 0) {
24642 assert(PartVTBitSize >= ValueVTBitSize);
24643 EVT SameEltTypeVT = ValueVT;
24644 // If the element types are different, convert it to the same element type
24645 // of PartVT.
24646 // Give an example here, we want copy a <vscale x 1 x i8> value from
24647 // <vscale x 4 x i16>.
24648 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24649 // then we can extract <vscale x 1 x i8>.
24650 if (ValueEltVT != PartEltVT) {
24651 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24652 assert(Count != 0 && "The number of element should not be zero.");
24653 SameEltTypeVT =
24654 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24655 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24656 }
24657 if (ValueVT.isFixedLengthVector())
24658 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24659 else
24660 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24661 return Val;
24662 }
24663 }
24664 return SDValue();
24665}
24666
24667bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24668 // When aggressively optimizing for code size, we prefer to use a div
24669 // instruction, as it is usually smaller than the alternative sequence.
24670 // TODO: Add vector division?
24671 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24672 return OptSize && !VT.isVector();
24673}
24674
24676 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24677 // some situation.
24678 unsigned Opc = N->getOpcode();
24680 return false;
24681 return true;
24682}
24683
24684static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24685 Module *M = IRB.GetInsertBlock()->getModule();
24686 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24687 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24688 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24689 IRB.CreateCall(ThreadPointerFunc), Offset);
24690}
24691
24693 // Fuchsia provides a fixed TLS slot for the stack cookie.
24694 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24695 if (Subtarget.isTargetFuchsia())
24696 return useTpOffset(IRB, -0x10);
24697
24698 // Android provides a fixed TLS slot for the stack cookie. See the definition
24699 // of TLS_SLOT_STACK_GUARD in
24700 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24701 if (Subtarget.isTargetAndroid())
24702 return useTpOffset(IRB, -0x18);
24703
24704 Module *M = IRB.GetInsertBlock()->getModule();
24705
24706 if (M->getStackProtectorGuard() == "tls") {
24707 // Users must specify the offset explicitly
24708 int Offset = M->getStackProtectorGuardOffset();
24709 return useTpOffset(IRB, Offset);
24710 }
24711
24713}
24714
24716 Align Alignment) const {
24717 if (!Subtarget.hasVInstructions())
24718 return false;
24719
24720 // Only support fixed vectors if we know the minimum vector size.
24721 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24722 return false;
24723
24724 EVT ScalarType = DataType.getScalarType();
24725 if (!isLegalElementTypeForRVV(ScalarType))
24726 return false;
24727
24728 if (!Subtarget.enableUnalignedVectorMem() &&
24729 Alignment < ScalarType.getStoreSize())
24730 return false;
24731
24732 return true;
24733}
24734
24738 const TargetInstrInfo *TII) const {
24739 assert(MBBI->isCall() && MBBI->getCFIType() &&
24740 "Invalid call instruction for a KCFI check");
24741 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24742 MBBI->getOpcode()));
24743
24744 MachineOperand &Target = MBBI->getOperand(0);
24745 Target.setIsRenamable(false);
24746
24747 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24748 .addReg(Target.getReg())
24749 .addImm(MBBI->getCFIType())
24750 .getInstr();
24751}
24752
24753#define GET_REGISTER_MATCHER
24754#include "RISCVGenAsmMatcher.inc"
24755
24758 const MachineFunction &MF) const {
24760 if (!Reg)
24762 if (!Reg)
24763 return Reg;
24764
24765 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24766 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24767 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24768 StringRef(RegName) + "\"."));
24769 return Reg;
24770}
24771
24774 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24775
24776 if (NontemporalInfo == nullptr)
24778
24779 // 1 for default value work as __RISCV_NTLH_ALL
24780 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24781 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24782 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24783 // 5 -> __RISCV_NTLH_ALL
24784 int NontemporalLevel = 5;
24785 const MDNode *RISCVNontemporalInfo =
24786 I.getMetadata("riscv-nontemporal-domain");
24787 if (RISCVNontemporalInfo != nullptr)
24788 NontemporalLevel =
24790 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24791 ->getValue())
24792 ->getZExtValue();
24793
24794 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24795 "RISC-V target doesn't support this non-temporal domain.");
24796
24797 NontemporalLevel -= 2;
24799 if (NontemporalLevel & 0b1)
24800 Flags |= MONontemporalBit0;
24801 if (NontemporalLevel & 0b10)
24802 Flags |= MONontemporalBit1;
24803
24804 return Flags;
24805}
24806
24809
24810 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24812 TargetFlags |= (NodeFlags & MONontemporalBit0);
24813 TargetFlags |= (NodeFlags & MONontemporalBit1);
24814 return TargetFlags;
24815}
24816
24818 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24819 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24820}
24821
24823 if (VT.isScalableVector())
24824 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
24825 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
24826 return true;
24827 return Subtarget.hasCPOPLike() &&
24828 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
24829}
24830
24832 ISD::CondCode Cond) const {
24833 return isCtpopFast(VT) ? 0 : 1;
24834}
24835
24837 const Instruction *I) const {
24838 if (Subtarget.hasStdExtZalasr()) {
24839 if (Subtarget.hasStdExtZtso()) {
24840 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24841 // should be lowered to plain load/store. The easiest way to do this is
24842 // to say we should insert fences for them, and the fence insertion code
24843 // will just not insert any fences
24844 auto *LI = dyn_cast<LoadInst>(I);
24845 auto *SI = dyn_cast<StoreInst>(I);
24846 if ((LI &&
24847 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24848 (SI &&
24849 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24850 // Here, this is a load or store which is seq_cst, and needs a .aq or
24851 // .rl therefore we shouldn't try to insert fences
24852 return false;
24853 }
24854 // Here, we are a TSO inst that isn't a seq_cst load/store
24855 return isa<LoadInst>(I) || isa<StoreInst>(I);
24856 }
24857 return false;
24858 }
24859 // Note that one specific case requires fence insertion for an
24860 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24861 // than this hook due to limitations in the interface here.
24862 return isa<LoadInst>(I) || isa<StoreInst>(I);
24863}
24864
24866
24867 // GISel support is in progress or complete for these opcodes.
24868 unsigned Op = Inst.getOpcode();
24869 if (Op == Instruction::Add || Op == Instruction::Sub ||
24870 Op == Instruction::And || Op == Instruction::Or ||
24871 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24872 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24873 Op == Instruction::Freeze || Op == Instruction::Store)
24874 return false;
24875
24876 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
24877 // Mark RVV intrinsic as supported.
24878 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID()))
24879 return false;
24880 }
24881
24882 if (Inst.getType()->isScalableTy())
24883 return true;
24884
24885 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24886 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24887 !isa<ReturnInst>(&Inst))
24888 return true;
24889
24890 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24891 if (AI->getAllocatedType()->isScalableTy())
24892 return true;
24893 }
24894
24895 return false;
24896}
24897
24898SDValue
24899RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24900 SelectionDAG &DAG,
24901 SmallVectorImpl<SDNode *> &Created) const {
24902 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
24903 if (isIntDivCheap(N->getValueType(0), Attr))
24904 return SDValue(N, 0); // Lower SDIV as SDIV
24905
24906 // Only perform this transform if short forward branch opt is supported.
24907 if (!Subtarget.hasShortForwardBranchOpt())
24908 return SDValue();
24909 EVT VT = N->getValueType(0);
24910 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24911 return SDValue();
24912
24913 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24914 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24915 return SDValue();
24916 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24917}
24918
24919bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24920 EVT VT, const APInt &AndMask) const {
24921 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
24922 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
24924}
24925
24927 return Subtarget.getMinimumJumpTableEntries();
24928}
24929
24931 SDValue Value, SDValue Addr,
24932 int JTI,
24933 SelectionDAG &DAG) const {
24934 if (Subtarget.hasStdExtZicfilp()) {
24935 // When Zicfilp enabled, we need to use software guarded branch for jump
24936 // table branch.
24937 SDValue Chain = Value;
24938 // Jump table debug info is only needed if CodeView is enabled.
24940 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
24941 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
24942 }
24943 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24944}
24945
24946// If an output pattern produces multiple instructions tablegen may pick an
24947// arbitrary type from an instructions destination register class to use for the
24948// VT of that MachineSDNode. This VT may be used to look up the representative
24949// register class. If the type isn't legal, the default implementation will
24950// not find a register class.
24951//
24952// Some integer types smaller than XLen are listed in the GPR register class to
24953// support isel patterns for GISel, but are not legal in SelectionDAG. The
24954// arbitrary type tablegen picks may be one of these smaller types.
24955//
24956// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24957// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24958std::pair<const TargetRegisterClass *, uint8_t>
24959RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24960 MVT VT) const {
24961 switch (VT.SimpleTy) {
24962 default:
24963 break;
24964 case MVT::i8:
24965 case MVT::i16:
24966 case MVT::i32:
24968 case MVT::bf16:
24969 case MVT::f16:
24971 }
24972
24974}
24975
24977
24978#define GET_RISCVVIntrinsicsTable_IMPL
24979#include "RISCVGenSearchableTables.inc"
24980
24981} // namespace llvm::RISCVVIntrinsicsTable
24982
24984
24985 // If the function specifically requests inline stack probes, emit them.
24986 if (MF.getFunction().hasFnAttribute("probe-stack"))
24987 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
24988 "inline-asm";
24989
24990 return false;
24991}
24992
24994 Align StackAlign) const {
24995 // The default stack probe size is 4096 if the function has no
24996 // stack-probe-size attribute.
24997 const Function &Fn = MF.getFunction();
24998 unsigned StackProbeSize =
24999 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25000 // Round down to the stack alignment.
25001 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25002 return StackProbeSize ? StackProbeSize : StackAlign.value();
25003}
25004
25005SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25006 SelectionDAG &DAG) const {
25008 if (!hasInlineStackProbe(MF))
25009 return SDValue();
25010
25011 MVT XLenVT = Subtarget.getXLenVT();
25012 // Get the inputs.
25013 SDValue Chain = Op.getOperand(0);
25014 SDValue Size = Op.getOperand(1);
25015
25017 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25018 SDLoc dl(Op);
25019 EVT VT = Op.getValueType();
25020
25021 // Construct the new SP value in a GPR.
25022 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25023 Chain = SP.getValue(1);
25024 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25025 if (Align)
25026 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25027 DAG.getSignedConstant(-Align->value(), dl, VT));
25028
25029 // Set the real SP to the new value with a probing loop.
25030 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25031 return DAG.getMergeValues({SP, Chain}, dl);
25032}
25033
25036 MachineBasicBlock *MBB) const {
25037 MachineFunction &MF = *MBB->getParent();
25038 MachineBasicBlock::iterator MBBI = MI.getIterator();
25039 DebugLoc DL = MBB->findDebugLoc(MBBI);
25040 Register TargetReg = MI.getOperand(0).getReg();
25041
25042 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25043 bool IsRV64 = Subtarget.is64Bit();
25044 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25045 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25046 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25047
25048 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25049 MachineBasicBlock *LoopTestMBB =
25050 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25051 MF.insert(MBBInsertPoint, LoopTestMBB);
25052 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25053 MF.insert(MBBInsertPoint, ExitMBB);
25054 Register SPReg = RISCV::X2;
25055 Register ScratchReg =
25056 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25057
25058 // ScratchReg = ProbeSize
25059 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25060
25061 // LoopTest:
25062 // SUB SP, SP, ProbeSize
25063 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25064 .addReg(SPReg)
25065 .addReg(ScratchReg);
25066
25067 // s[d|w] zero, 0(sp)
25068 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25069 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25070 .addReg(RISCV::X0)
25071 .addReg(SPReg)
25072 .addImm(0);
25073
25074 // BLT TargetReg, SP, LoopTest
25075 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25076 .addReg(TargetReg)
25077 .addReg(SPReg)
25078 .addMBB(LoopTestMBB);
25079
25080 // Adjust with: MV SP, TargetReg.
25081 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25082 .addReg(TargetReg)
25083 .addImm(0);
25084
25085 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25087
25088 LoopTestMBB->addSuccessor(ExitMBB);
25089 LoopTestMBB->addSuccessor(LoopTestMBB);
25090 MBB->addSuccessor(LoopTestMBB);
25091
25092 MI.eraseFromParent();
25093 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25094 return ExitMBB->begin()->getParent();
25095}
25096
25098 if (Subtarget.hasStdExtFOrZfinx()) {
25099 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25100 return RCRegs;
25101 }
25102 return {};
25103}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:269
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1936
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:134
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:428
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1707
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2454
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1950
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:317
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1943
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1740
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:267
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:273
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...