LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/IntrinsicsRISCV.h"
45#include "llvm/Support/Debug.h"
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "riscv-lower"
56
57STATISTIC(NumTailCalls, "Number of tail calls");
58
60 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
61 cl::desc("Give the maximum size (in number of nodes) of the web of "
62 "instructions that we will consider for VW expansion"),
63 cl::init(18));
64
65static cl::opt<bool>
66 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
67 cl::desc("Allow the formation of VW_W operations (e.g., "
68 "VWADD_W) with splat constants"),
69 cl::init(false));
70
72 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
73 cl::desc("Set the minimum number of repetitions of a divisor to allow "
74 "transformation to multiplications by the reciprocal"),
75 cl::init(2));
76
77static cl::opt<int>
79 cl::desc("Give the maximum number of instructions that we will "
80 "use for creating a floating-point immediate value"),
81 cl::init(2));
82
83static cl::opt<bool>
84 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
85 cl::desc("Swap add and addi in cases where the add may "
86 "be combined with a shift"),
87 cl::init(true));
88
90 const RISCVSubtarget &STI)
91 : TargetLowering(TM), Subtarget(STI) {
92
93 RISCVABI::ABI ABI = Subtarget.getTargetABI();
94 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
95
96 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
97 !Subtarget.hasStdExtF()) {
98 errs() << "Hard-float 'f' ABI can't be used for a target that "
99 "doesn't support the F instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
103 !Subtarget.hasStdExtD()) {
104 errs() << "Hard-float 'd' ABI can't be used for a target that "
105 "doesn't support the D instruction set extension (ignoring "
106 "target-abi)\n";
107 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
108 }
109
110 switch (ABI) {
111 default:
112 reportFatalUsageError("Don't know how to lower this ABI");
121 break;
122 }
123
124 MVT XLenVT = Subtarget.getXLenVT();
125
126 // Set up the register classes.
127 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
128
129 if (Subtarget.hasStdExtZfhmin())
130 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
131 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
132 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
133 if (Subtarget.hasStdExtF())
134 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
135 if (Subtarget.hasStdExtD())
136 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
137 if (Subtarget.hasStdExtZhinxmin())
138 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
139 if (Subtarget.hasStdExtZfinx())
140 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
141 if (Subtarget.hasStdExtZdinx()) {
142 if (Subtarget.is64Bit())
143 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
144 else
145 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
146 }
147
148 static const MVT::SimpleValueType BoolVecVTs[] = {
149 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
150 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
151 static const MVT::SimpleValueType IntVecVTs[] = {
152 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
153 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
154 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
155 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
156 MVT::nxv4i64, MVT::nxv8i64};
157 static const MVT::SimpleValueType F16VecVTs[] = {
158 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
159 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
160 static const MVT::SimpleValueType BF16VecVTs[] = {
161 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
162 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
163 static const MVT::SimpleValueType F32VecVTs[] = {
164 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
165 static const MVT::SimpleValueType F64VecVTs[] = {
166 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
167 static const MVT::SimpleValueType VecTupleVTs[] = {
168 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
169 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
170 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
171 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
172 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
173 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
174 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
175 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
176 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
177 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
178 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
179
180 if (Subtarget.hasVInstructions()) {
181 auto addRegClassForRVV = [this](MVT VT) {
182 // Disable the smallest fractional LMUL types if ELEN is less than
183 // RVVBitsPerBlock.
184 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
185 if (VT.getVectorMinNumElements() < MinElts)
186 return;
187
188 unsigned Size = VT.getSizeInBits().getKnownMinValue();
189 const TargetRegisterClass *RC;
191 RC = &RISCV::VRRegClass;
192 else if (Size == 2 * RISCV::RVVBitsPerBlock)
193 RC = &RISCV::VRM2RegClass;
194 else if (Size == 4 * RISCV::RVVBitsPerBlock)
195 RC = &RISCV::VRM4RegClass;
196 else if (Size == 8 * RISCV::RVVBitsPerBlock)
197 RC = &RISCV::VRM8RegClass;
198 else
199 llvm_unreachable("Unexpected size");
200
201 addRegisterClass(VT, RC);
202 };
203
204 for (MVT VT : BoolVecVTs)
205 addRegClassForRVV(VT);
206 for (MVT VT : IntVecVTs) {
207 if (VT.getVectorElementType() == MVT::i64 &&
208 !Subtarget.hasVInstructionsI64())
209 continue;
210 addRegClassForRVV(VT);
211 }
212
213 if (Subtarget.hasVInstructionsF16Minimal() ||
214 Subtarget.hasVendorXAndesVPackFPH())
215 for (MVT VT : F16VecVTs)
216 addRegClassForRVV(VT);
217
218 if (Subtarget.hasVInstructionsBF16Minimal() ||
219 Subtarget.hasVendorXAndesVBFHCvt())
220 for (MVT VT : BF16VecVTs)
221 addRegClassForRVV(VT);
222
223 if (Subtarget.hasVInstructionsF32())
224 for (MVT VT : F32VecVTs)
225 addRegClassForRVV(VT);
226
227 if (Subtarget.hasVInstructionsF64())
228 for (MVT VT : F64VecVTs)
229 addRegClassForRVV(VT);
230
231 if (Subtarget.useRVVForFixedLengthVectors()) {
232 auto addRegClassForFixedVectors = [this](MVT VT) {
233 MVT ContainerVT = getContainerForFixedLengthVector(VT);
234 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
235 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
236 addRegisterClass(VT, TRI.getRegClass(RCID));
237 };
239 if (useRVVForFixedLengthVectorVT(VT))
240 addRegClassForFixedVectors(VT);
241
243 if (useRVVForFixedLengthVectorVT(VT))
244 addRegClassForFixedVectors(VT);
245 }
246
247 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
248 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
254 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
261 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
268 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
275 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
278 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
279 }
280
281 // Compute derived properties from the register classes.
283
285
287 MVT::i1, Promote);
288 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
290 MVT::i1, Promote);
291
292 // TODO: add all necessary setOperationAction calls.
293 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
294
295 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
296 setOperationAction(ISD::BR_CC, XLenVT, Expand);
297 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
299
304 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
307 }
308
309 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
310
311 setOperationAction(ISD::VASTART, MVT::Other, Custom);
312 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
313
314 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
315 !Subtarget.hasVendorXAndesPerf())
317
319
320 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
321 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
322 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
323 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
324
325 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
326 setOperationAction(ISD::LOAD, MVT::i64, Custom);
327 setOperationAction(ISD::STORE, MVT::i64, Custom);
328 }
329
330 if (Subtarget.is64Bit()) {
332
333 setOperationAction(ISD::LOAD, MVT::i32, Custom);
335 MVT::i32, Custom);
337 if (!Subtarget.hasStdExtZbb())
340 Custom);
342 }
343 if (!Subtarget.hasStdExtZmmul()) {
345 } else if (Subtarget.is64Bit()) {
348 } else {
350 }
351
352 if (!Subtarget.hasStdExtM()) {
354 Expand);
355 } else if (Subtarget.is64Bit()) {
357 {MVT::i8, MVT::i16, MVT::i32}, Custom);
358 }
359
362 Expand);
363
365 Custom);
366
367 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
368 if (Subtarget.is64Bit())
370 } else if (Subtarget.hasVendorXTHeadBb()) {
371 if (Subtarget.is64Bit())
374 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
376 } else {
378 }
379
380 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
381 // pattern match it directly in isel.
383 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
384 Subtarget.hasVendorXTHeadBb())
385 ? Legal
386 : Expand);
387
388 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
389 !Subtarget.is64Bit()) {
391 } else {
392 // Zbkb can use rev8+brev8 to implement bitreverse.
394 Subtarget.hasStdExtZbkb() ? Custom : Expand);
395 if (Subtarget.hasStdExtZbkb())
397 }
398
399 if (Subtarget.hasStdExtZbb() ||
400 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
402 Legal);
403 }
404
405 if (Subtarget.hasStdExtZbb() ||
406 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
407 if (Subtarget.is64Bit())
409 } else {
411 // TODO: These should be set to LibCall, but this currently breaks
412 // the Linux kernel build. See #101786. Lacks i128 tests, too.
413 if (Subtarget.is64Bit())
415 else
418 }
419
420 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
421 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
422 // We need the custom lowering to make sure that the resulting sequence
423 // for the 32bit case is efficient on 64bit targets.
424 if (Subtarget.is64Bit())
426 } else {
428 }
429
430 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
432 } else if (Subtarget.hasShortForwardBranchOpt()) {
433 // We can use PseudoCCSUB to implement ABS.
435 } else if (Subtarget.is64Bit()) {
437 }
438
439 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
440 !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
442
443 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
450 }
451
452 static const unsigned FPLegalNodeTypes[] = {
453 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
454 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
455 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
460
461 static const ISD::CondCode FPCCToExpand[] = {
465
466 static const unsigned FPOpToExpand[] = {
467 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
468 ISD::FREM};
469
470 static const unsigned FPRndMode[] = {
471 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
472 ISD::FROUNDEVEN};
473
474 static const unsigned ZfhminZfbfminPromoteOps[] = {
475 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
476 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
481 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
482 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
483 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
484
485 if (Subtarget.hasStdExtZfbfmin()) {
486 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
490 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
491 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
493 setOperationAction(ISD::FABS, MVT::bf16, Custom);
494 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
498 }
499
500 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
501 if (Subtarget.hasStdExtZfhOrZhinx()) {
502 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
503 setOperationAction(FPRndMode, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
506 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
507 Subtarget.hasStdExtZfa() ? Legal : Custom);
508 if (Subtarget.hasStdExtZfa())
510 } else {
511 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
512 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
513 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
516 setOperationAction(Op, MVT::f16, Custom);
517 setOperationAction(ISD::FABS, MVT::f16, Custom);
518 setOperationAction(ISD::FNEG, MVT::f16, Custom);
522 }
523
524 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
525
528 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
531 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
532
534 ISD::FNEARBYINT, MVT::f16,
535 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
536 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
537 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
538 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
539 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
540 MVT::f16, Promote);
541
542 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
543 // complete support for all operations in LegalizeDAG.
548 MVT::f16, Promote);
549
550 // We need to custom promote this.
551 if (Subtarget.is64Bit())
552 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
553 }
554
555 if (Subtarget.hasStdExtFOrZfinx()) {
556 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
557 setOperationAction(FPRndMode, MVT::f32,
558 Subtarget.hasStdExtZfa() ? Legal : Custom);
559 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
562 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
563 setOperationAction(FPOpToExpand, MVT::f32, Expand);
564 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
565 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
566 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
567 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
569 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
570 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
571 Subtarget.isSoftFPABI() ? LibCall : Custom);
572 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
573 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
574 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
575 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
576
577 if (Subtarget.hasStdExtZfa()) {
579 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
580 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
581 } else {
582 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
583 }
584 }
585
586 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
587 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
588
589 if (Subtarget.hasStdExtDOrZdinx()) {
590 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
591
592 if (!Subtarget.is64Bit())
593 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
594
595 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
596 !Subtarget.is64Bit()) {
597 setOperationAction(ISD::LOAD, MVT::f64, Custom);
598 setOperationAction(ISD::STORE, MVT::f64, Custom);
599 }
600
601 if (Subtarget.hasStdExtZfa()) {
603 setOperationAction(FPRndMode, MVT::f64, Legal);
604 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
605 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
606 } else {
607 if (Subtarget.is64Bit())
608 setOperationAction(FPRndMode, MVT::f64, Custom);
609
610 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
611 }
612
615 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
618 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
621 setOperationAction(FPOpToExpand, MVT::f64, Expand);
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
623 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
624 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
625 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
627 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
628 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
629 Subtarget.isSoftFPABI() ? LibCall : Custom);
630 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
631 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
632 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
633 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
634 }
635
636 if (Subtarget.is64Bit()) {
639 MVT::i32, Custom);
640 setOperationAction(ISD::LROUND, MVT::i32, Custom);
641 }
642
643 if (Subtarget.hasStdExtFOrZfinx()) {
645 Custom);
646
647 // f16/bf16 require custom handling.
649 Custom);
651 Custom);
652
654 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
655 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
656 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
657 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
658 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
659 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
660 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
661 }
662
665 XLenVT, Custom);
666
668
669 if (Subtarget.is64Bit())
671
672 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
673 // Unfortunately this can't be determined just from the ISA naming string.
674 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
675 Subtarget.is64Bit() ? Legal : Custom);
676 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
677 Subtarget.is64Bit() ? Legal : Custom);
678
679 if (Subtarget.is64Bit()) {
680 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
681 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
682 }
683
684 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
686 if (Subtarget.is64Bit())
688
689 if (Subtarget.hasVendorXMIPSCBOP())
690 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
691 else if (Subtarget.hasStdExtZicbop())
692 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
693
694 if (Subtarget.hasStdExtA()) {
695 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
696 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
698 else
700 } else if (Subtarget.hasForcedAtomics()) {
701 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
702 } else {
704 }
705
706 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
707
709
710 if (getTargetMachine().getTargetTriple().isOSLinux()) {
711 // Custom lowering of llvm.clear_cache.
713 }
714
715 if (Subtarget.hasVInstructions()) {
717
718 setOperationAction(ISD::VSCALE, XLenVT, Custom);
719
720 // RVV intrinsics may have illegal operands.
721 // We also need to custom legalize vmv.x.s.
724 {MVT::i8, MVT::i16}, Custom);
725 if (Subtarget.is64Bit())
727 MVT::i32, Custom);
728 else
730 MVT::i64, Custom);
731
733 MVT::Other, Custom);
734
735 static const unsigned IntegerVPOps[] = {
736 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
737 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
738 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
739 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
740 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
741 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
742 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
743 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
744 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
745 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
746 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
747 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
748 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
749 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
750 ISD::EXPERIMENTAL_VP_SPLAT};
751
752 static const unsigned FloatingPointVPOps[] = {
753 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
754 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
755 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
756 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
757 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
758 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
759 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
760 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
761 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
762 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
763 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
764 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
765 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
766
767 static const unsigned IntegerVecReduceOps[] = {
768 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
769 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
770 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
771
772 static const unsigned FloatingPointVecReduceOps[] = {
773 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
774 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
775
776 static const unsigned FloatingPointLibCallOps[] = {
777 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
778 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
779
780 if (!Subtarget.is64Bit()) {
781 // We must custom-lower certain vXi64 operations on RV32 due to the vector
782 // element type being illegal.
784 MVT::i64, Custom);
785
786 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
787
788 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
789 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
790 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
791 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
792 MVT::i64, Custom);
793 }
794
795 for (MVT VT : BoolVecVTs) {
796 if (!isTypeLegal(VT))
797 continue;
798
800
801 // Mask VTs are custom-expanded into a series of standard nodes
805 VT, Custom);
806
808 Custom);
809
811 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
812 Expand);
813 setOperationAction(ISD::VP_MERGE, VT, Custom);
814
815 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
816 Custom);
817
818 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
819
821 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
822 Custom);
823
825 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
826 Custom);
827
828 // RVV has native int->float & float->int conversions where the
829 // element type sizes are within one power-of-two of each other. Any
830 // wider distances between type sizes have to be lowered as sequences
831 // which progressively narrow the gap in stages.
836 VT, Custom);
838 Custom);
839
840 // Expand all extending loads to types larger than this, and truncating
841 // stores from types larger than this.
843 setTruncStoreAction(VT, OtherVT, Expand);
845 OtherVT, Expand);
846 }
847
848 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
849 ISD::VP_TRUNCATE, ISD::VP_SETCC},
850 VT, Custom);
851
854
856
857 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
858 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
859 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
860
863 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
864 }
865
866 for (MVT VT : IntVecVTs) {
867 if (!isTypeLegal(VT))
868 continue;
869
872
873 // Vectors implement MULHS/MULHU.
875
876 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
877 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
879
881 Legal);
882
884
885 // Custom-lower extensions and truncations from/to mask types.
887 VT, Custom);
888
889 // RVV has native int->float & float->int conversions where the
890 // element type sizes are within one power-of-two of each other. Any
891 // wider distances between type sizes have to be lowered as sequences
892 // which progressively narrow the gap in stages.
897 VT, Custom);
899 Custom);
903 VT, Legal);
904
905 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
906 // nodes which truncate by one power of two at a time.
909 Custom);
910
911 // Custom-lower insert/extract operations to simplify patterns.
913 Custom);
914
915 // Custom-lower reduction operations to set up the corresponding custom
916 // nodes' operands.
917 setOperationAction(IntegerVecReduceOps, VT, Custom);
918
919 setOperationAction(IntegerVPOps, VT, Custom);
920
921 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
922
923 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
924 VT, Custom);
925
927 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
928 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
929 VT, Custom);
930 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
931
934 VT, Custom);
935
938
940
942 setTruncStoreAction(VT, OtherVT, Expand);
944 OtherVT, Expand);
945 }
946
949
950 // Splice
952
953 if (Subtarget.hasStdExtZvkb()) {
955 setOperationAction(ISD::VP_BSWAP, VT, Custom);
956 } else {
957 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
959 }
960
961 if (Subtarget.hasStdExtZvbb()) {
963 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
964 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
965 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
966 VT, Custom);
967 } else {
968 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
970 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
971 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
972 VT, Expand);
973
974 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
975 // range of f32.
976 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
977 if (isTypeLegal(FloatVT)) {
979 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
980 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
981 VT, Custom);
982 }
983 }
984
986 }
987
988 for (MVT VT : VecTupleVTs) {
989 if (!isTypeLegal(VT))
990 continue;
991
992 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
993 }
994
995 // Expand various CCs to best match the RVV ISA, which natively supports UNE
996 // but no other unordered comparisons, and supports all ordered comparisons
997 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
998 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
999 // and we pattern-match those back to the "original", swapping operands once
1000 // more. This way we catch both operations and both "vf" and "fv" forms with
1001 // fewer patterns.
1002 static const ISD::CondCode VFPCCToExpand[] = {
1006 };
1007
1008 // TODO: support more ops.
1009 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1010 ISD::FMINNUM,
1011 ISD::FMAXNUM,
1012 ISD::FMINIMUMNUM,
1013 ISD::FMAXIMUMNUM,
1014 ISD::FADD,
1015 ISD::FSUB,
1016 ISD::FMUL,
1017 ISD::FMA,
1018 ISD::FDIV,
1019 ISD::FSQRT,
1020 ISD::FCEIL,
1021 ISD::FTRUNC,
1022 ISD::FFLOOR,
1023 ISD::FROUND,
1024 ISD::FROUNDEVEN,
1025 ISD::FRINT,
1026 ISD::FNEARBYINT,
1028 ISD::SETCC,
1029 ISD::FMAXIMUM,
1030 ISD::FMINIMUM,
1037 ISD::VECREDUCE_FMIN,
1038 ISD::VECREDUCE_FMAX,
1039 ISD::VECREDUCE_FMINIMUM,
1040 ISD::VECREDUCE_FMAXIMUM};
1041
1042 // TODO: support more vp ops.
1043 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1044 ISD::VP_FADD,
1045 ISD::VP_FSUB,
1046 ISD::VP_FMUL,
1047 ISD::VP_FDIV,
1048 ISD::VP_FMA,
1049 ISD::VP_REDUCE_FMIN,
1050 ISD::VP_REDUCE_FMAX,
1051 ISD::VP_SQRT,
1052 ISD::VP_FMINNUM,
1053 ISD::VP_FMAXNUM,
1054 ISD::VP_FCEIL,
1055 ISD::VP_FFLOOR,
1056 ISD::VP_FROUND,
1057 ISD::VP_FROUNDEVEN,
1058 ISD::VP_FROUNDTOZERO,
1059 ISD::VP_FRINT,
1060 ISD::VP_FNEARBYINT,
1061 ISD::VP_SETCC,
1062 ISD::VP_FMINIMUM,
1063 ISD::VP_FMAXIMUM,
1064 ISD::VP_REDUCE_FMINIMUM,
1065 ISD::VP_REDUCE_FMAXIMUM};
1066
1067 // Sets common operation actions on RVV floating-point vector types.
1068 const auto SetCommonVFPActions = [&](MVT VT) {
1070 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1071 // sizes are within one power-of-two of each other. Therefore conversions
1072 // between vXf16 and vXf64 must be lowered as sequences which convert via
1073 // vXf32.
1074 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1075 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1076 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1077 // Custom-lower insert/extract operations to simplify patterns.
1079 Custom);
1080 // Expand various condition codes (explained above).
1081 setCondCodeAction(VFPCCToExpand, VT, Expand);
1082
1084 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1085 Legal);
1086 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1087
1088 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1089 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1091 VT, Custom);
1092
1093 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1094
1095 // Expand FP operations that need libcalls.
1096 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1097
1099
1100 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1101
1102 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1103 VT, Custom);
1104
1106 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1107 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1108 VT, Custom);
1109 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1110
1113
1116 VT, Custom);
1117
1120
1122 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1123 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1124
1125 setOperationAction(FloatingPointVPOps, VT, Custom);
1126
1128 Custom);
1131 VT, Legal);
1136 VT, Custom);
1137
1139 };
1140
1141 // Sets common extload/truncstore actions on RVV floating-point vector
1142 // types.
1143 const auto SetCommonVFPExtLoadTruncStoreActions =
1144 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1145 for (auto SmallVT : SmallerVTs) {
1146 setTruncStoreAction(VT, SmallVT, Expand);
1147 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1148 }
1149 };
1150
1151 // Sets common actions for f16 and bf16 for when there's only
1152 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1153 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1154 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1156 Custom);
1157 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1158 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1159 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1160 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1161 Custom);
1163 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1169 VT, Custom);
1170 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1171 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1172 MVT EltVT = VT.getVectorElementType();
1173 if (isTypeLegal(EltVT))
1174 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1176 VT, Custom);
1177 else
1178 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1179 EltVT, Custom);
1180 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1181 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1182 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1183 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1184 ISD::VP_SCATTER},
1185 VT, Custom);
1186 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1187
1188 setOperationAction(ISD::FNEG, VT, Expand);
1189 setOperationAction(ISD::FABS, VT, Expand);
1191
1192 // Expand FP operations that need libcalls.
1193 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1194
1195 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1196 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1197 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1198 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1199 } else {
1200 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1201 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1202 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1203 }
1204 };
1205
1206 if (Subtarget.hasVInstructionsF16()) {
1207 for (MVT VT : F16VecVTs) {
1208 if (!isTypeLegal(VT))
1209 continue;
1210 SetCommonVFPActions(VT);
1211 }
1212 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1213 for (MVT VT : F16VecVTs) {
1214 if (!isTypeLegal(VT))
1215 continue;
1216 SetCommonPromoteToF32Actions(VT);
1217 }
1218 }
1219
1220 if (Subtarget.hasVInstructionsBF16Minimal()) {
1221 for (MVT VT : BF16VecVTs) {
1222 if (!isTypeLegal(VT))
1223 continue;
1224 SetCommonPromoteToF32Actions(VT);
1225 }
1226 }
1227
1228 if (Subtarget.hasVInstructionsF32()) {
1229 for (MVT VT : F32VecVTs) {
1230 if (!isTypeLegal(VT))
1231 continue;
1232 SetCommonVFPActions(VT);
1233 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1234 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1235 }
1236 }
1237
1238 if (Subtarget.hasVInstructionsF64()) {
1239 for (MVT VT : F64VecVTs) {
1240 if (!isTypeLegal(VT))
1241 continue;
1242 SetCommonVFPActions(VT);
1243 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1244 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1245 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1246 }
1247 }
1248
1249 if (Subtarget.useRVVForFixedLengthVectors()) {
1251 if (!useRVVForFixedLengthVectorVT(VT))
1252 continue;
1253
1254 // By default everything must be expanded.
1255 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1258 setTruncStoreAction(VT, OtherVT, Expand);
1260 OtherVT, Expand);
1261 }
1262
1263 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1264 // expansion to a build_vector of 0s.
1266
1267 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1269 Custom);
1270
1273 Custom);
1274
1276 VT, Custom);
1277
1279 VT, Custom);
1280
1282
1283 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1284
1286
1288
1291 Custom);
1292
1293 setOperationAction(ISD::BITCAST, VT, Custom);
1294
1296 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1297 Custom);
1298
1300 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1301 Custom);
1302
1304 {
1313 },
1314 VT, Custom);
1316 Custom);
1317
1319
1320 // Operations below are different for between masks and other vectors.
1321 if (VT.getVectorElementType() == MVT::i1) {
1322 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1323 ISD::OR, ISD::XOR},
1324 VT, Custom);
1325
1326 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1327 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1328 VT, Custom);
1329
1330 setOperationAction(ISD::VP_MERGE, VT, Custom);
1331
1332 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1333 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1334 continue;
1335 }
1336
1337 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1338 // it before type legalization for i64 vectors on RV32. It will then be
1339 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1340 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1341 // improvements first.
1342 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1345
1346 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1348 }
1349
1351 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1352
1353 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1354 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1355 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1356 ISD::VP_SCATTER},
1357 VT, Custom);
1358 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1359
1363 VT, Custom);
1364
1367
1369
1370 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1371 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1373
1377 VT, Custom);
1378
1380
1383
1384 // Custom-lower reduction operations to set up the corresponding custom
1385 // nodes' operands.
1386 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1387 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1388 ISD::VECREDUCE_UMIN},
1389 VT, Custom);
1390
1391 setOperationAction(IntegerVPOps, VT, Custom);
1392
1393 if (Subtarget.hasStdExtZvkb())
1395
1396 if (Subtarget.hasStdExtZvbb()) {
1399 VT, Custom);
1400 } else {
1401 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1402 // range of f32.
1403 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1404 if (isTypeLegal(FloatVT))
1407 Custom);
1408 }
1409
1411 }
1412
1414 // There are no extending loads or truncating stores.
1415 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1416 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1417 setTruncStoreAction(VT, InnerVT, Expand);
1418 }
1419
1420 if (!useRVVForFixedLengthVectorVT(VT))
1421 continue;
1422
1423 // By default everything must be expanded.
1424 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1426
1427 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1428 // expansion to a build_vector of 0s.
1430
1435 VT, Custom);
1436 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1437 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1438
1440 VT, Custom);
1441
1442 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1443 ISD::MGATHER, ISD::MSCATTER},
1444 VT, Custom);
1445 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1446 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1447 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1448 VT, Custom);
1449 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1450
1451 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1453 Custom);
1454
1455 if (VT.getVectorElementType() == MVT::f16 &&
1456 !Subtarget.hasVInstructionsF16()) {
1457 setOperationAction(ISD::BITCAST, VT, Custom);
1458 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1460 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1461 Custom);
1462 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1463 Custom);
1464 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1465 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1466 if (Subtarget.hasStdExtZfhmin()) {
1468 } else {
1469 // We need to custom legalize f16 build vectors if Zfhmin isn't
1470 // available.
1472 }
1473 setOperationAction(ISD::FNEG, VT, Expand);
1474 setOperationAction(ISD::FABS, VT, Expand);
1476 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1477 // Don't promote f16 vector operations to f32 if f32 vector type is
1478 // not legal.
1479 // TODO: could split the f16 vector into two vectors and do promotion.
1480 if (!isTypeLegal(F32VecVT))
1481 continue;
1482 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1483 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1484 continue;
1485 }
1486
1487 if (VT.getVectorElementType() == MVT::bf16) {
1488 setOperationAction(ISD::BITCAST, VT, Custom);
1489 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1490 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1491 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1492 if (Subtarget.hasStdExtZfbfmin()) {
1494 } else {
1495 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1496 // available.
1498 }
1500 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1501 Custom);
1502 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1503 // Don't promote f16 vector operations to f32 if f32 vector type is
1504 // not legal.
1505 // TODO: could split the f16 vector into two vectors and do promotion.
1506 if (!isTypeLegal(F32VecVT))
1507 continue;
1508 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1509 // TODO: Promote VP ops to fp32.
1510 continue;
1511 }
1512
1514 Custom);
1515
1517 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1518 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1519 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1520 ISD::FMAXIMUM, ISD::FMINIMUM},
1521 VT, Custom);
1522
1523 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1524 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1525 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1526 ISD::FNEARBYINT},
1527 VT, Custom);
1528
1529 setCondCodeAction(VFPCCToExpand, VT, Expand);
1530
1533
1534 setOperationAction(ISD::BITCAST, VT, Custom);
1535
1536 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1537
1538 setOperationAction(FloatingPointVPOps, VT, Custom);
1539
1546 VT, Custom);
1547 }
1548
1549 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1550 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1551 if (Subtarget.is64Bit())
1552 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1553 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1554 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1555 if (Subtarget.hasStdExtZfbfmin())
1556 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1557 if (Subtarget.hasStdExtFOrZfinx())
1558 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1559 if (Subtarget.hasStdExtDOrZdinx())
1560 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1561 }
1562 }
1563
1564 if (Subtarget.hasStdExtA())
1565 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1566
1567 if (Subtarget.hasForcedAtomics()) {
1568 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1570 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1571 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1572 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1573 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1574 XLenVT, LibCall);
1575 }
1576
1577 if (Subtarget.hasVendorXTHeadMemIdx()) {
1578 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1579 setIndexedLoadAction(im, MVT::i8, Legal);
1580 setIndexedStoreAction(im, MVT::i8, Legal);
1581 setIndexedLoadAction(im, MVT::i16, Legal);
1582 setIndexedStoreAction(im, MVT::i16, Legal);
1583 setIndexedLoadAction(im, MVT::i32, Legal);
1584 setIndexedStoreAction(im, MVT::i32, Legal);
1585
1586 if (Subtarget.is64Bit()) {
1587 setIndexedLoadAction(im, MVT::i64, Legal);
1588 setIndexedStoreAction(im, MVT::i64, Legal);
1589 }
1590 }
1591 }
1592
1593 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1597
1601 }
1602
1603 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1604 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1605 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1606 ISD::PARTIAL_REDUCE_UMLA,
1607 ISD::PARTIAL_REDUCE_SUMLA};
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1610 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1611 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1612 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1613
1614 if (Subtarget.useRVVForFixedLengthVectors()) {
1616 if (VT.getVectorElementType() != MVT::i32 ||
1617 !useRVVForFixedLengthVectorVT(VT))
1618 continue;
1619 ElementCount EC = VT.getVectorElementCount();
1620 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1621 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1622 }
1623 }
1624 }
1625
1626 // Customize load and store operation for bf16 if zfh isn't enabled.
1627 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1628 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1629 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1630 }
1631
1632 // Function alignments.
1633 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1634 setMinFunctionAlignment(FunctionAlignment);
1635 // Set preferred alignments.
1636 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1637 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1638
1644
1645 if (Subtarget.hasStdExtFOrZfinx())
1646 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1647
1648 if (Subtarget.hasStdExtZbb())
1650
1651 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1652 Subtarget.hasVInstructions())
1654
1655 if (Subtarget.hasStdExtZbkb())
1657
1658 if (Subtarget.hasStdExtFOrZfinx())
1661 if (Subtarget.hasVInstructions())
1663 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1664 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1665 ISD::SRL, ISD::SHL, ISD::STORE,
1667 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1671 ISD::VSELECT, ISD::VECREDUCE_ADD});
1672
1673 if (Subtarget.hasVendorXTHeadMemPair())
1674 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1675 if (Subtarget.useRVVForFixedLengthVectors())
1676 setTargetDAGCombine(ISD::BITCAST);
1677
1678 // Disable strict node mutation.
1679 IsStrictFPEnabled = true;
1680 EnableExtLdPromotion = true;
1681
1682 // Let the subtarget decide if a predictable select is more expensive than the
1683 // corresponding branch. This information is used in CGP/SelectOpt to decide
1684 // when to convert selects into branches.
1685 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1686
1687 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1688 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1689
1690 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1691 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1692 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1693
1695 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1696 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1697
1698 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1699 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1700}
1701
1703 LLVMContext &Context,
1704 EVT VT) const {
1705 if (!VT.isVector())
1706 return getPointerTy(DL);
1707 if (Subtarget.hasVInstructions() &&
1708 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1709 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1711}
1712
1714 return Subtarget.getXLenVT();
1715}
1716
1717// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1718bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1719 unsigned VF,
1720 bool IsScalable) const {
1721 if (!Subtarget.hasVInstructions())
1722 return true;
1723
1724 if (!IsScalable)
1725 return true;
1726
1727 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1728 return true;
1729
1730 // Don't allow VF=1 if those types are't legal.
1731 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1732 return true;
1733
1734 // VLEN=32 support is incomplete.
1735 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1736 return true;
1737
1738 // The maximum VF is for the smallest element width with LMUL=8.
1739 // VF must be a power of 2.
1740 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1741 return VF > MaxVF || !isPowerOf2_32(VF);
1742}
1743
1745 return !Subtarget.hasVInstructions() ||
1746 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1747}
1748
1750 const CallInst &I,
1751 MachineFunction &MF,
1752 unsigned Intrinsic) const {
1753 auto &DL = I.getDataLayout();
1754
1755 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1756 bool IsUnitStrided, bool UsePtrVal = false) {
1757 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1758 // We can't use ptrVal if the intrinsic can access memory before the
1759 // pointer. This means we can't use it for strided or indexed intrinsics.
1760 if (UsePtrVal)
1761 Info.ptrVal = I.getArgOperand(PtrOp);
1762 else
1763 Info.fallbackAddressSpace =
1764 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1765 Type *MemTy;
1766 if (IsStore) {
1767 // Store value is the first operand.
1768 MemTy = I.getArgOperand(0)->getType();
1769 } else {
1770 // Use return type. If it's segment load, return type is a struct.
1771 MemTy = I.getType();
1772 if (MemTy->isStructTy())
1773 MemTy = MemTy->getStructElementType(0);
1774 }
1775 if (!IsUnitStrided)
1776 MemTy = MemTy->getScalarType();
1777
1778 Info.memVT = getValueType(DL, MemTy);
1779 if (MemTy->isTargetExtTy()) {
1780 // RISC-V vector tuple type's alignment type should be its element type.
1781 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1782 MemTy = Type::getIntNTy(
1783 MemTy->getContext(),
1784 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1785 ->getZExtValue());
1786 Info.align = DL.getABITypeAlign(MemTy);
1787 } else {
1788 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1789 }
1790 Info.size = MemoryLocation::UnknownSize;
1791 Info.flags |=
1793 return true;
1794 };
1795
1796 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1798
1800 switch (Intrinsic) {
1801 default:
1802 return false;
1803 case Intrinsic::riscv_masked_atomicrmw_xchg:
1804 case Intrinsic::riscv_masked_atomicrmw_add:
1805 case Intrinsic::riscv_masked_atomicrmw_sub:
1806 case Intrinsic::riscv_masked_atomicrmw_nand:
1807 case Intrinsic::riscv_masked_atomicrmw_max:
1808 case Intrinsic::riscv_masked_atomicrmw_min:
1809 case Intrinsic::riscv_masked_atomicrmw_umax:
1810 case Intrinsic::riscv_masked_atomicrmw_umin:
1811 case Intrinsic::riscv_masked_cmpxchg:
1812 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1813 // narrow atomic operation. These will be expanded to an LR/SC loop that
1814 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1815 // will be used to modify the appropriate part of the 4 byte data and
1816 // preserve the rest.
1817 Info.opc = ISD::INTRINSIC_W_CHAIN;
1818 Info.memVT = MVT::i32;
1819 Info.ptrVal = I.getArgOperand(0);
1820 Info.offset = 0;
1821 Info.align = Align(4);
1824 return true;
1825 case Intrinsic::riscv_seg2_load_mask:
1826 case Intrinsic::riscv_seg3_load_mask:
1827 case Intrinsic::riscv_seg4_load_mask:
1828 case Intrinsic::riscv_seg5_load_mask:
1829 case Intrinsic::riscv_seg6_load_mask:
1830 case Intrinsic::riscv_seg7_load_mask:
1831 case Intrinsic::riscv_seg8_load_mask:
1832 case Intrinsic::riscv_sseg2_load_mask:
1833 case Intrinsic::riscv_sseg3_load_mask:
1834 case Intrinsic::riscv_sseg4_load_mask:
1835 case Intrinsic::riscv_sseg5_load_mask:
1836 case Intrinsic::riscv_sseg6_load_mask:
1837 case Intrinsic::riscv_sseg7_load_mask:
1838 case Intrinsic::riscv_sseg8_load_mask:
1839 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1840 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1841 case Intrinsic::riscv_seg2_store_mask:
1842 case Intrinsic::riscv_seg3_store_mask:
1843 case Intrinsic::riscv_seg4_store_mask:
1844 case Intrinsic::riscv_seg5_store_mask:
1845 case Intrinsic::riscv_seg6_store_mask:
1846 case Intrinsic::riscv_seg7_store_mask:
1847 case Intrinsic::riscv_seg8_store_mask:
1848 // Operands are (vec, ..., vec, ptr, mask, vl)
1849 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1850 /*IsStore*/ true,
1851 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1852 case Intrinsic::riscv_sseg2_store_mask:
1853 case Intrinsic::riscv_sseg3_store_mask:
1854 case Intrinsic::riscv_sseg4_store_mask:
1855 case Intrinsic::riscv_sseg5_store_mask:
1856 case Intrinsic::riscv_sseg6_store_mask:
1857 case Intrinsic::riscv_sseg7_store_mask:
1858 case Intrinsic::riscv_sseg8_store_mask:
1859 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1860 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1861 /*IsStore*/ true,
1862 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1863 case Intrinsic::riscv_vlm:
1864 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1865 /*IsStore*/ false,
1866 /*IsUnitStrided*/ true,
1867 /*UsePtrVal*/ true);
1868 case Intrinsic::riscv_vle:
1869 case Intrinsic::riscv_vle_mask:
1870 case Intrinsic::riscv_vleff:
1871 case Intrinsic::riscv_vleff_mask:
1872 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1873 /*IsStore*/ false,
1874 /*IsUnitStrided*/ true,
1875 /*UsePtrVal*/ true);
1876 case Intrinsic::riscv_vsm:
1877 case Intrinsic::riscv_vse:
1878 case Intrinsic::riscv_vse_mask:
1879 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1880 /*IsStore*/ true,
1881 /*IsUnitStrided*/ true,
1882 /*UsePtrVal*/ true);
1883 case Intrinsic::riscv_vlse:
1884 case Intrinsic::riscv_vlse_mask:
1885 case Intrinsic::riscv_vloxei:
1886 case Intrinsic::riscv_vloxei_mask:
1887 case Intrinsic::riscv_vluxei:
1888 case Intrinsic::riscv_vluxei_mask:
1889 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1890 /*IsStore*/ false,
1891 /*IsUnitStrided*/ false);
1892 case Intrinsic::riscv_vsse:
1893 case Intrinsic::riscv_vsse_mask:
1894 case Intrinsic::riscv_vsoxei:
1895 case Intrinsic::riscv_vsoxei_mask:
1896 case Intrinsic::riscv_vsuxei:
1897 case Intrinsic::riscv_vsuxei_mask:
1898 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1899 /*IsStore*/ true,
1900 /*IsUnitStrided*/ false);
1901 case Intrinsic::riscv_vlseg2:
1902 case Intrinsic::riscv_vlseg3:
1903 case Intrinsic::riscv_vlseg4:
1904 case Intrinsic::riscv_vlseg5:
1905 case Intrinsic::riscv_vlseg6:
1906 case Intrinsic::riscv_vlseg7:
1907 case Intrinsic::riscv_vlseg8:
1908 case Intrinsic::riscv_vlseg2ff:
1909 case Intrinsic::riscv_vlseg3ff:
1910 case Intrinsic::riscv_vlseg4ff:
1911 case Intrinsic::riscv_vlseg5ff:
1912 case Intrinsic::riscv_vlseg6ff:
1913 case Intrinsic::riscv_vlseg7ff:
1914 case Intrinsic::riscv_vlseg8ff:
1915 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1916 /*IsStore*/ false,
1917 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1918 case Intrinsic::riscv_vlseg2_mask:
1919 case Intrinsic::riscv_vlseg3_mask:
1920 case Intrinsic::riscv_vlseg4_mask:
1921 case Intrinsic::riscv_vlseg5_mask:
1922 case Intrinsic::riscv_vlseg6_mask:
1923 case Intrinsic::riscv_vlseg7_mask:
1924 case Intrinsic::riscv_vlseg8_mask:
1925 case Intrinsic::riscv_vlseg2ff_mask:
1926 case Intrinsic::riscv_vlseg3ff_mask:
1927 case Intrinsic::riscv_vlseg4ff_mask:
1928 case Intrinsic::riscv_vlseg5ff_mask:
1929 case Intrinsic::riscv_vlseg6ff_mask:
1930 case Intrinsic::riscv_vlseg7ff_mask:
1931 case Intrinsic::riscv_vlseg8ff_mask:
1932 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1933 /*IsStore*/ false,
1934 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1935 case Intrinsic::riscv_vlsseg2:
1936 case Intrinsic::riscv_vlsseg3:
1937 case Intrinsic::riscv_vlsseg4:
1938 case Intrinsic::riscv_vlsseg5:
1939 case Intrinsic::riscv_vlsseg6:
1940 case Intrinsic::riscv_vlsseg7:
1941 case Intrinsic::riscv_vlsseg8:
1942 case Intrinsic::riscv_vloxseg2:
1943 case Intrinsic::riscv_vloxseg3:
1944 case Intrinsic::riscv_vloxseg4:
1945 case Intrinsic::riscv_vloxseg5:
1946 case Intrinsic::riscv_vloxseg6:
1947 case Intrinsic::riscv_vloxseg7:
1948 case Intrinsic::riscv_vloxseg8:
1949 case Intrinsic::riscv_vluxseg2:
1950 case Intrinsic::riscv_vluxseg3:
1951 case Intrinsic::riscv_vluxseg4:
1952 case Intrinsic::riscv_vluxseg5:
1953 case Intrinsic::riscv_vluxseg6:
1954 case Intrinsic::riscv_vluxseg7:
1955 case Intrinsic::riscv_vluxseg8:
1956 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1957 /*IsStore*/ false,
1958 /*IsUnitStrided*/ false);
1959 case Intrinsic::riscv_vlsseg2_mask:
1960 case Intrinsic::riscv_vlsseg3_mask:
1961 case Intrinsic::riscv_vlsseg4_mask:
1962 case Intrinsic::riscv_vlsseg5_mask:
1963 case Intrinsic::riscv_vlsseg6_mask:
1964 case Intrinsic::riscv_vlsseg7_mask:
1965 case Intrinsic::riscv_vlsseg8_mask:
1966 case Intrinsic::riscv_vloxseg2_mask:
1967 case Intrinsic::riscv_vloxseg3_mask:
1968 case Intrinsic::riscv_vloxseg4_mask:
1969 case Intrinsic::riscv_vloxseg5_mask:
1970 case Intrinsic::riscv_vloxseg6_mask:
1971 case Intrinsic::riscv_vloxseg7_mask:
1972 case Intrinsic::riscv_vloxseg8_mask:
1973 case Intrinsic::riscv_vluxseg2_mask:
1974 case Intrinsic::riscv_vluxseg3_mask:
1975 case Intrinsic::riscv_vluxseg4_mask:
1976 case Intrinsic::riscv_vluxseg5_mask:
1977 case Intrinsic::riscv_vluxseg6_mask:
1978 case Intrinsic::riscv_vluxseg7_mask:
1979 case Intrinsic::riscv_vluxseg8_mask:
1980 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1981 /*IsStore*/ false,
1982 /*IsUnitStrided*/ false);
1983 case Intrinsic::riscv_vsseg2:
1984 case Intrinsic::riscv_vsseg3:
1985 case Intrinsic::riscv_vsseg4:
1986 case Intrinsic::riscv_vsseg5:
1987 case Intrinsic::riscv_vsseg6:
1988 case Intrinsic::riscv_vsseg7:
1989 case Intrinsic::riscv_vsseg8:
1990 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1991 /*IsStore*/ true,
1992 /*IsUnitStrided*/ false);
1993 case Intrinsic::riscv_vsseg2_mask:
1994 case Intrinsic::riscv_vsseg3_mask:
1995 case Intrinsic::riscv_vsseg4_mask:
1996 case Intrinsic::riscv_vsseg5_mask:
1997 case Intrinsic::riscv_vsseg6_mask:
1998 case Intrinsic::riscv_vsseg7_mask:
1999 case Intrinsic::riscv_vsseg8_mask:
2000 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2001 /*IsStore*/ true,
2002 /*IsUnitStrided*/ false);
2003 case Intrinsic::riscv_vssseg2:
2004 case Intrinsic::riscv_vssseg3:
2005 case Intrinsic::riscv_vssseg4:
2006 case Intrinsic::riscv_vssseg5:
2007 case Intrinsic::riscv_vssseg6:
2008 case Intrinsic::riscv_vssseg7:
2009 case Intrinsic::riscv_vssseg8:
2010 case Intrinsic::riscv_vsoxseg2:
2011 case Intrinsic::riscv_vsoxseg3:
2012 case Intrinsic::riscv_vsoxseg4:
2013 case Intrinsic::riscv_vsoxseg5:
2014 case Intrinsic::riscv_vsoxseg6:
2015 case Intrinsic::riscv_vsoxseg7:
2016 case Intrinsic::riscv_vsoxseg8:
2017 case Intrinsic::riscv_vsuxseg2:
2018 case Intrinsic::riscv_vsuxseg3:
2019 case Intrinsic::riscv_vsuxseg4:
2020 case Intrinsic::riscv_vsuxseg5:
2021 case Intrinsic::riscv_vsuxseg6:
2022 case Intrinsic::riscv_vsuxseg7:
2023 case Intrinsic::riscv_vsuxseg8:
2024 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2025 /*IsStore*/ true,
2026 /*IsUnitStrided*/ false);
2027 case Intrinsic::riscv_vssseg2_mask:
2028 case Intrinsic::riscv_vssseg3_mask:
2029 case Intrinsic::riscv_vssseg4_mask:
2030 case Intrinsic::riscv_vssseg5_mask:
2031 case Intrinsic::riscv_vssseg6_mask:
2032 case Intrinsic::riscv_vssseg7_mask:
2033 case Intrinsic::riscv_vssseg8_mask:
2034 case Intrinsic::riscv_vsoxseg2_mask:
2035 case Intrinsic::riscv_vsoxseg3_mask:
2036 case Intrinsic::riscv_vsoxseg4_mask:
2037 case Intrinsic::riscv_vsoxseg5_mask:
2038 case Intrinsic::riscv_vsoxseg6_mask:
2039 case Intrinsic::riscv_vsoxseg7_mask:
2040 case Intrinsic::riscv_vsoxseg8_mask:
2041 case Intrinsic::riscv_vsuxseg2_mask:
2042 case Intrinsic::riscv_vsuxseg3_mask:
2043 case Intrinsic::riscv_vsuxseg4_mask:
2044 case Intrinsic::riscv_vsuxseg5_mask:
2045 case Intrinsic::riscv_vsuxseg6_mask:
2046 case Intrinsic::riscv_vsuxseg7_mask:
2047 case Intrinsic::riscv_vsuxseg8_mask:
2048 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2049 /*IsStore*/ true,
2050 /*IsUnitStrided*/ false);
2051 }
2052}
2053
2055 const AddrMode &AM, Type *Ty,
2056 unsigned AS,
2057 Instruction *I) const {
2058 // No global is ever allowed as a base.
2059 if (AM.BaseGV)
2060 return false;
2061
2062 // None of our addressing modes allows a scalable offset
2063 if (AM.ScalableOffset)
2064 return false;
2065
2066 // RVV instructions only support register addressing.
2067 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2068 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2069
2070 // Require a 12-bit signed offset.
2071 if (!isInt<12>(AM.BaseOffs))
2072 return false;
2073
2074 switch (AM.Scale) {
2075 case 0: // "r+i" or just "i", depending on HasBaseReg.
2076 break;
2077 case 1:
2078 if (!AM.HasBaseReg) // allow "r+i".
2079 break;
2080 return false; // disallow "r+r" or "r+r+i".
2081 default:
2082 return false;
2083 }
2084
2085 return true;
2086}
2087
2089 return isInt<12>(Imm);
2090}
2091
2093 return isInt<12>(Imm);
2094}
2095
2096// On RV32, 64-bit integers are split into their high and low parts and held
2097// in two different registers, so the trunc is free since the low register can
2098// just be used.
2099// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2100// isTruncateFree?
2102 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2103 return false;
2104 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2105 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2106 return (SrcBits == 64 && DestBits == 32);
2107}
2108
2110 // We consider i64->i32 free on RV64 since we have good selection of W
2111 // instructions that make promoting operations back to i64 free in many cases.
2112 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2113 !DstVT.isInteger())
2114 return false;
2115 unsigned SrcBits = SrcVT.getSizeInBits();
2116 unsigned DestBits = DstVT.getSizeInBits();
2117 return (SrcBits == 64 && DestBits == 32);
2118}
2119
2121 EVT SrcVT = Val.getValueType();
2122 // free truncate from vnsrl and vnsra
2123 if (Subtarget.hasVInstructions() &&
2124 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2125 SrcVT.isVector() && VT2.isVector()) {
2126 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2127 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2128 if (SrcBits == DestBits * 2) {
2129 return true;
2130 }
2131 }
2132 return TargetLowering::isTruncateFree(Val, VT2);
2133}
2134
2136 // Zexts are free if they can be combined with a load.
2137 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2138 // poorly with type legalization of compares preferring sext.
2139 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2140 EVT MemVT = LD->getMemoryVT();
2141 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2142 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2143 LD->getExtensionType() == ISD::ZEXTLOAD))
2144 return true;
2145 }
2146
2147 return TargetLowering::isZExtFree(Val, VT2);
2148}
2149
2151 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2152}
2153
2155 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2156}
2157
2159 return Subtarget.hasStdExtZbb() ||
2160 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2161}
2162
2164 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2165 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2166}
2167
2169 const Instruction &AndI) const {
2170 // We expect to be able to match a bit extraction instruction if the Zbs
2171 // extension is supported and the mask is a power of two. However, we
2172 // conservatively return false if the mask would fit in an ANDI instruction,
2173 // on the basis that it's possible the sinking+duplication of the AND in
2174 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2175 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2176 if (!Subtarget.hasBEXTILike())
2177 return false;
2179 if (!Mask)
2180 return false;
2181 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2182}
2183
2185 EVT VT = Y.getValueType();
2186
2187 if (VT.isVector())
2188 return false;
2189
2190 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2191 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2192}
2193
2195 EVT VT = Y.getValueType();
2196
2197 if (!VT.isVector())
2198 return hasAndNotCompare(Y);
2199
2200 return Subtarget.hasStdExtZvkb();
2201}
2202
2204 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2205 if (Subtarget.hasStdExtZbs())
2206 return X.getValueType().isScalarInteger();
2207 auto *C = dyn_cast<ConstantSDNode>(Y);
2208 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2209 if (Subtarget.hasVendorXTHeadBs())
2210 return C != nullptr;
2211 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2212 return C && C->getAPIntValue().ule(10);
2213}
2214
2216 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2217 SDValue Y) const {
2218 if (SelectOpcode != ISD::VSELECT)
2219 return false;
2220
2221 // Only enable for rvv.
2222 if (!VT.isVector() || !Subtarget.hasVInstructions())
2223 return false;
2224
2225 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2226 return false;
2227
2228 return true;
2229}
2230
2232 Type *Ty) const {
2233 assert(Ty->isIntegerTy());
2234
2235 unsigned BitSize = Ty->getIntegerBitWidth();
2236 if (BitSize > Subtarget.getXLen())
2237 return false;
2238
2239 // Fast path, assume 32-bit immediates are cheap.
2240 int64_t Val = Imm.getSExtValue();
2241 if (isInt<32>(Val))
2242 return true;
2243
2244 // A constant pool entry may be more aligned than the load we're trying to
2245 // replace. If we don't support unaligned scalar mem, prefer the constant
2246 // pool.
2247 // TODO: Can the caller pass down the alignment?
2248 if (!Subtarget.enableUnalignedScalarMem())
2249 return true;
2250
2251 // Prefer to keep the load if it would require many instructions.
2252 // This uses the same threshold we use for constant pools but doesn't
2253 // check useConstantPoolForLargeInts.
2254 // TODO: Should we keep the load only when we're definitely going to emit a
2255 // constant pool?
2256
2258 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2259}
2260
2264 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2265 SelectionDAG &DAG) const {
2266 // One interesting pattern that we'd want to form is 'bit extract':
2267 // ((1 >> Y) & 1) ==/!= 0
2268 // But we also need to be careful not to try to reverse that fold.
2269
2270 // Is this '((1 >> Y) & 1)'?
2271 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2272 return false; // Keep the 'bit extract' pattern.
2273
2274 // Will this be '((1 >> Y) & 1)' after the transform?
2275 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2276 return true; // Do form the 'bit extract' pattern.
2277
2278 // If 'X' is a constant, and we transform, then we will immediately
2279 // try to undo the fold, thus causing endless combine loop.
2280 // So only do the transform if X is not a constant. This matches the default
2281 // implementation of this function.
2282 return !XC;
2283}
2284
2286 unsigned Opc = VecOp.getOpcode();
2287
2288 // Assume target opcodes can't be scalarized.
2289 // TODO - do we have any exceptions?
2290 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2291 return false;
2292
2293 // If the vector op is not supported, try to convert to scalar.
2294 EVT VecVT = VecOp.getValueType();
2296 return true;
2297
2298 // If the vector op is supported, but the scalar op is not, the transform may
2299 // not be worthwhile.
2300 // Permit a vector binary operation can be converted to scalar binary
2301 // operation which is custom lowered with illegal type.
2302 EVT ScalarVT = VecVT.getScalarType();
2303 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2304 isOperationCustom(Opc, ScalarVT);
2305}
2306
2308 const GlobalAddressSDNode *GA) const {
2309 // In order to maximise the opportunity for common subexpression elimination,
2310 // keep a separate ADD node for the global address offset instead of folding
2311 // it in the global address node. Later peephole optimisations may choose to
2312 // fold it back in when profitable.
2313 return false;
2314}
2315
2316// Returns 0-31 if the fli instruction is available for the type and this is
2317// legal FP immediate for the type. Returns -1 otherwise.
2319 if (!Subtarget.hasStdExtZfa())
2320 return -1;
2321
2322 bool IsSupportedVT = false;
2323 if (VT == MVT::f16) {
2324 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2325 } else if (VT == MVT::f32) {
2326 IsSupportedVT = true;
2327 } else if (VT == MVT::f64) {
2328 assert(Subtarget.hasStdExtD() && "Expect D extension");
2329 IsSupportedVT = true;
2330 }
2331
2332 if (!IsSupportedVT)
2333 return -1;
2334
2335 return RISCVLoadFPImm::getLoadFPImm(Imm);
2336}
2337
2339 bool ForCodeSize) const {
2340 bool IsLegalVT = false;
2341 if (VT == MVT::f16)
2342 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2343 else if (VT == MVT::f32)
2344 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2345 else if (VT == MVT::f64)
2346 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2347 else if (VT == MVT::bf16)
2348 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2349
2350 if (!IsLegalVT)
2351 return false;
2352
2353 if (getLegalZfaFPImm(Imm, VT) >= 0)
2354 return true;
2355
2356 // Some constants can be produced by fli+fneg.
2357 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2358 return true;
2359
2360 // Cannot create a 64 bit floating-point immediate value for rv32.
2361 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2362 // td can handle +0.0 or -0.0 already.
2363 // -0.0 can be created by fmv + fneg.
2364 return Imm.isZero();
2365 }
2366
2367 // Special case: fmv + fneg
2368 if (Imm.isNegZero())
2369 return true;
2370
2371 // Building an integer and then converting requires a fmv at the end of
2372 // the integer sequence. The fmv is not required for Zfinx.
2373 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2374 const int Cost =
2375 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2376 Subtarget.getXLen(), Subtarget);
2377 return Cost <= FPImmCost;
2378}
2379
2380// TODO: This is very conservative.
2382 unsigned Index) const {
2384 return false;
2385
2386 // Extracts from index 0 are just subreg extracts.
2387 if (Index == 0)
2388 return true;
2389
2390 // Only support extracting a fixed from a fixed vector for now.
2391 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2392 return false;
2393
2394 EVT EltVT = ResVT.getVectorElementType();
2395 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2396
2397 // The smallest type we can slide is i8.
2398 // TODO: We can extract index 0 from a mask vector without a slide.
2399 if (EltVT == MVT::i1)
2400 return false;
2401
2402 unsigned ResElts = ResVT.getVectorNumElements();
2403 unsigned SrcElts = SrcVT.getVectorNumElements();
2404
2405 unsigned MinVLen = Subtarget.getRealMinVLen();
2406 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2407
2408 // If we're extracting only data from the first VLEN bits of the source
2409 // then we can always do this with an m1 vslidedown.vx. Restricting the
2410 // Index ensures we can use a vslidedown.vi.
2411 // TODO: We can generalize this when the exact VLEN is known.
2412 if (Index + ResElts <= MinVLMAX && Index < 31)
2413 return true;
2414
2415 // Convervatively only handle extracting half of a vector.
2416 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2417 // the upper half of a vector until we have more test coverage.
2418 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2419 // a cheap extract. However, this case is important in practice for
2420 // shuffled extracts of longer vectors. How resolve?
2421 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2422}
2423
2425 CallingConv::ID CC,
2426 EVT VT) const {
2427 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2428 // We might still end up using a GPR but that will be decided based on ABI.
2429 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2430 !Subtarget.hasStdExtZfhminOrZhinxmin())
2431 return MVT::f32;
2432
2433 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2434
2435 return PartVT;
2436}
2437
2438unsigned
2440 std::optional<MVT> RegisterVT) const {
2441 // Pair inline assembly operand
2442 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2443 *RegisterVT == MVT::Untyped)
2444 return 1;
2445
2446 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2447}
2448
2450 CallingConv::ID CC,
2451 EVT VT) const {
2452 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2453 // We might still end up using a GPR but that will be decided based on ABI.
2454 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2455 !Subtarget.hasStdExtZfhminOrZhinxmin())
2456 return 1;
2457
2458 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2459}
2460
2462 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2463 unsigned &NumIntermediates, MVT &RegisterVT) const {
2465 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2466
2467 return NumRegs;
2468}
2469
2470// Changes the condition code and swaps operands if necessary, so the SetCC
2471// operation matches one of the comparisons supported directly by branches
2472// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2473// with 1/-1.
2475 ISD::CondCode &CC, SelectionDAG &DAG,
2476 const RISCVSubtarget &Subtarget) {
2477 // If this is a single bit test that can't be handled by ANDI, shift the
2478 // bit to be tested to the MSB and perform a signed compare with 0.
2479 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2480 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2481 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2482 // XAndesPerf supports branch on test bit.
2483 !Subtarget.hasVendorXAndesPerf()) {
2484 uint64_t Mask = LHS.getConstantOperandVal(1);
2485 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2486 unsigned ShAmt = 0;
2487 if (isPowerOf2_64(Mask)) {
2488 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2489 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2490 } else {
2491 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2492 }
2493
2494 LHS = LHS.getOperand(0);
2495 if (ShAmt != 0)
2496 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2497 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2498 return;
2499 }
2500 }
2501
2502 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2503 int64_t C = RHSC->getSExtValue();
2504 switch (CC) {
2505 default: break;
2506 case ISD::SETGT:
2507 // Convert X > -1 to X >= 0.
2508 if (C == -1) {
2509 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2510 CC = ISD::SETGE;
2511 return;
2512 }
2513 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2514 // We have a branch immediate instruction for SETGE but not SETGT.
2515 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2516 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2517 CC = ISD::SETGE;
2518 return;
2519 }
2520 break;
2521 case ISD::SETLT:
2522 // Convert X < 1 to 0 >= X.
2523 if (C == 1) {
2524 RHS = LHS;
2525 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2526 CC = ISD::SETGE;
2527 return;
2528 }
2529 break;
2530 case ISD::SETUGT:
2531 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2532 // We have a branch immediate instruction for SETUGE but not SETUGT.
2533 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2534 // immediate.
2535 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2536 CC = ISD::SETUGE;
2537 return;
2538 }
2539 break;
2540 }
2541 }
2542
2543 switch (CC) {
2544 default:
2545 break;
2546 case ISD::SETGT:
2547 case ISD::SETLE:
2548 case ISD::SETUGT:
2549 case ISD::SETULE:
2551 std::swap(LHS, RHS);
2552 break;
2553 }
2554}
2555
2557 if (VT.isRISCVVectorTuple()) {
2558 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2559 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2560 return RISCVVType::LMUL_F8;
2561 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2562 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2563 return RISCVVType::LMUL_F4;
2564 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2565 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2566 return RISCVVType::LMUL_F2;
2567 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2568 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2569 return RISCVVType::LMUL_1;
2570 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2571 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2572 return RISCVVType::LMUL_2;
2573 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2574 return RISCVVType::LMUL_4;
2575 llvm_unreachable("Invalid vector tuple type LMUL.");
2576 }
2577
2578 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2579 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2580 if (VT.getVectorElementType() == MVT::i1)
2581 KnownSize *= 8;
2582
2583 switch (KnownSize) {
2584 default:
2585 llvm_unreachable("Invalid LMUL.");
2586 case 8:
2587 return RISCVVType::LMUL_F8;
2588 case 16:
2589 return RISCVVType::LMUL_F4;
2590 case 32:
2591 return RISCVVType::LMUL_F2;
2592 case 64:
2593 return RISCVVType::LMUL_1;
2594 case 128:
2595 return RISCVVType::LMUL_2;
2596 case 256:
2597 return RISCVVType::LMUL_4;
2598 case 512:
2599 return RISCVVType::LMUL_8;
2600 }
2601}
2602
2604 switch (LMul) {
2605 default:
2606 llvm_unreachable("Invalid LMUL.");
2610 case RISCVVType::LMUL_1:
2611 return RISCV::VRRegClassID;
2612 case RISCVVType::LMUL_2:
2613 return RISCV::VRM2RegClassID;
2614 case RISCVVType::LMUL_4:
2615 return RISCV::VRM4RegClassID;
2616 case RISCVVType::LMUL_8:
2617 return RISCV::VRM8RegClassID;
2618 }
2619}
2620
2621unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2622 RISCVVType::VLMUL LMUL = getLMUL(VT);
2623 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2624 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2625 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2626 "Unexpected subreg numbering");
2627 return RISCV::sub_vrm1_0 + Index;
2628 }
2629 if (LMUL == RISCVVType::LMUL_2) {
2630 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2631 "Unexpected subreg numbering");
2632 return RISCV::sub_vrm2_0 + Index;
2633 }
2634 if (LMUL == RISCVVType::LMUL_4) {
2635 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2636 "Unexpected subreg numbering");
2637 return RISCV::sub_vrm4_0 + Index;
2638 }
2639 llvm_unreachable("Invalid vector type.");
2640}
2641
2643 if (VT.isRISCVVectorTuple()) {
2644 unsigned NF = VT.getRISCVVectorTupleNumFields();
2645 unsigned RegsPerField =
2646 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2647 (NF * RISCV::RVVBitsPerBlock));
2648 switch (RegsPerField) {
2649 case 1:
2650 if (NF == 2)
2651 return RISCV::VRN2M1RegClassID;
2652 if (NF == 3)
2653 return RISCV::VRN3M1RegClassID;
2654 if (NF == 4)
2655 return RISCV::VRN4M1RegClassID;
2656 if (NF == 5)
2657 return RISCV::VRN5M1RegClassID;
2658 if (NF == 6)
2659 return RISCV::VRN6M1RegClassID;
2660 if (NF == 7)
2661 return RISCV::VRN7M1RegClassID;
2662 if (NF == 8)
2663 return RISCV::VRN8M1RegClassID;
2664 break;
2665 case 2:
2666 if (NF == 2)
2667 return RISCV::VRN2M2RegClassID;
2668 if (NF == 3)
2669 return RISCV::VRN3M2RegClassID;
2670 if (NF == 4)
2671 return RISCV::VRN4M2RegClassID;
2672 break;
2673 case 4:
2674 assert(NF == 2);
2675 return RISCV::VRN2M4RegClassID;
2676 default:
2677 break;
2678 }
2679 llvm_unreachable("Invalid vector tuple type RegClass.");
2680 }
2681
2682 if (VT.getVectorElementType() == MVT::i1)
2683 return RISCV::VRRegClassID;
2684 return getRegClassIDForLMUL(getLMUL(VT));
2685}
2686
2687// Attempt to decompose a subvector insert/extract between VecVT and
2688// SubVecVT via subregister indices. Returns the subregister index that
2689// can perform the subvector insert/extract with the given element index, as
2690// well as the index corresponding to any leftover subvectors that must be
2691// further inserted/extracted within the register class for SubVecVT.
2692std::pair<unsigned, unsigned>
2694 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2695 const RISCVRegisterInfo *TRI) {
2696 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2697 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2698 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2699 "Register classes not ordered");
2700 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2701 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2702
2703 // If VecVT is a vector tuple type, either it's the tuple type with same
2704 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2705 if (VecVT.isRISCVVectorTuple()) {
2706 if (VecRegClassID == SubRegClassID)
2707 return {RISCV::NoSubRegister, 0};
2708
2709 assert(SubVecVT.isScalableVector() &&
2710 "Only allow scalable vector subvector.");
2711 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2712 "Invalid vector tuple insert/extract for vector and subvector with "
2713 "different LMUL.");
2714 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2715 }
2716
2717 // Try to compose a subregister index that takes us from the incoming
2718 // LMUL>1 register class down to the outgoing one. At each step we half
2719 // the LMUL:
2720 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2721 // Note that this is not guaranteed to find a subregister index, such as
2722 // when we are extracting from one VR type to another.
2723 unsigned SubRegIdx = RISCV::NoSubRegister;
2724 for (const unsigned RCID :
2725 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2726 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2727 VecVT = VecVT.getHalfNumVectorElementsVT();
2728 bool IsHi =
2729 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2730 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2731 getSubregIndexByMVT(VecVT, IsHi));
2732 if (IsHi)
2733 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2734 }
2735 return {SubRegIdx, InsertExtractIdx};
2736}
2737
2738// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2739// stores for those types.
2740bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2741 return !Subtarget.useRVVForFixedLengthVectors() ||
2742 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2743}
2744
2746 if (!ScalarTy.isSimple())
2747 return false;
2748 switch (ScalarTy.getSimpleVT().SimpleTy) {
2749 case MVT::iPTR:
2750 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2751 case MVT::i8:
2752 case MVT::i16:
2753 case MVT::i32:
2754 return true;
2755 case MVT::i64:
2756 return Subtarget.hasVInstructionsI64();
2757 case MVT::f16:
2758 return Subtarget.hasVInstructionsF16Minimal();
2759 case MVT::bf16:
2760 return Subtarget.hasVInstructionsBF16Minimal();
2761 case MVT::f32:
2762 return Subtarget.hasVInstructionsF32();
2763 case MVT::f64:
2764 return Subtarget.hasVInstructionsF64();
2765 default:
2766 return false;
2767 }
2768}
2769
2770
2772 return NumRepeatedDivisors;
2773}
2774
2776 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2777 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2778 "Unexpected opcode");
2779 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2780 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2782 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2783 if (!II)
2784 return SDValue();
2785 return Op.getOperand(II->VLOperand + 1 + HasChain);
2786}
2787
2789 const RISCVSubtarget &Subtarget) {
2790 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2791 if (!Subtarget.useRVVForFixedLengthVectors())
2792 return false;
2793
2794 // We only support a set of vector types with a consistent maximum fixed size
2795 // across all supported vector element types to avoid legalization issues.
2796 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2797 // fixed-length vector type we support is 1024 bytes.
2798 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2799 return false;
2800
2801 unsigned MinVLen = Subtarget.getRealMinVLen();
2802
2803 MVT EltVT = VT.getVectorElementType();
2804
2805 // Don't use RVV for vectors we cannot scalarize if required.
2806 switch (EltVT.SimpleTy) {
2807 // i1 is supported but has different rules.
2808 default:
2809 return false;
2810 case MVT::i1:
2811 // Masks can only use a single register.
2812 if (VT.getVectorNumElements() > MinVLen)
2813 return false;
2814 MinVLen /= 8;
2815 break;
2816 case MVT::i8:
2817 case MVT::i16:
2818 case MVT::i32:
2819 break;
2820 case MVT::i64:
2821 if (!Subtarget.hasVInstructionsI64())
2822 return false;
2823 break;
2824 case MVT::f16:
2825 if (!Subtarget.hasVInstructionsF16Minimal())
2826 return false;
2827 break;
2828 case MVT::bf16:
2829 if (!Subtarget.hasVInstructionsBF16Minimal())
2830 return false;
2831 break;
2832 case MVT::f32:
2833 if (!Subtarget.hasVInstructionsF32())
2834 return false;
2835 break;
2836 case MVT::f64:
2837 if (!Subtarget.hasVInstructionsF64())
2838 return false;
2839 break;
2840 }
2841
2842 // Reject elements larger than ELEN.
2843 if (EltVT.getSizeInBits() > Subtarget.getELen())
2844 return false;
2845
2846 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2847 // Don't use RVV for types that don't fit.
2848 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2849 return false;
2850
2851 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2852 // the base fixed length RVV support in place.
2853 if (!VT.isPow2VectorType())
2854 return false;
2855
2856 return true;
2857}
2858
2859bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2860 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2861}
2862
2863// Return the largest legal scalable vector type that matches VT's element type.
2865 const RISCVSubtarget &Subtarget) {
2866 // This may be called before legal types are setup.
2867 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2868 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2869 "Expected legal fixed length vector!");
2870
2871 unsigned MinVLen = Subtarget.getRealMinVLen();
2872 unsigned MaxELen = Subtarget.getELen();
2873
2874 MVT EltVT = VT.getVectorElementType();
2875 switch (EltVT.SimpleTy) {
2876 default:
2877 llvm_unreachable("unexpected element type for RVV container");
2878 case MVT::i1:
2879 case MVT::i8:
2880 case MVT::i16:
2881 case MVT::i32:
2882 case MVT::i64:
2883 case MVT::bf16:
2884 case MVT::f16:
2885 case MVT::f32:
2886 case MVT::f64: {
2887 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2888 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2889 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2890 unsigned NumElts =
2892 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2893 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2894 return MVT::getScalableVectorVT(EltVT, NumElts);
2895 }
2896 }
2897}
2898
2900 const RISCVSubtarget &Subtarget) {
2902 Subtarget);
2903}
2904
2906 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2907}
2908
2909// Grow V to consume an entire RVV register.
2911 const RISCVSubtarget &Subtarget) {
2912 assert(VT.isScalableVector() &&
2913 "Expected to convert into a scalable vector!");
2914 assert(V.getValueType().isFixedLengthVector() &&
2915 "Expected a fixed length vector operand!");
2916 SDLoc DL(V);
2917 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2918}
2919
2920// Shrink V so it's just big enough to maintain a VT's worth of data.
2922 const RISCVSubtarget &Subtarget) {
2924 "Expected to convert into a fixed length vector!");
2925 assert(V.getValueType().isScalableVector() &&
2926 "Expected a scalable vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getExtractSubvector(DL, VT, V, 0);
2929}
2930
2931/// Return the type of the mask type suitable for masking the provided
2932/// vector type. This is simply an i1 element type vector of the same
2933/// (possibly scalable) length.
2934static MVT getMaskTypeFor(MVT VecVT) {
2935 assert(VecVT.isVector());
2937 return MVT::getVectorVT(MVT::i1, EC);
2938}
2939
2940/// Creates an all ones mask suitable for masking a vector of type VecTy with
2941/// vector length VL. .
2942static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2943 SelectionDAG &DAG) {
2944 MVT MaskVT = getMaskTypeFor(VecVT);
2945 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2946}
2947
2948static std::pair<SDValue, SDValue>
2950 const RISCVSubtarget &Subtarget) {
2951 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2952 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2953 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2954 return {Mask, VL};
2955}
2956
2957static std::pair<SDValue, SDValue>
2958getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2959 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2960 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2961 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2962 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2963 return {Mask, VL};
2964}
2965
2966// Gets the two common "VL" operands: an all-ones mask and the vector length.
2967// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2968// the vector type that the fixed-length vector is contained in. Otherwise if
2969// VecVT is scalable, then ContainerVT should be the same as VecVT.
2970static std::pair<SDValue, SDValue>
2971getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2972 const RISCVSubtarget &Subtarget) {
2973 if (VecVT.isFixedLengthVector())
2974 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2975 Subtarget);
2976 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2977 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2978}
2979
2981 SelectionDAG &DAG) const {
2982 assert(VecVT.isScalableVector() && "Expected scalable vector");
2983 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2984 VecVT.getVectorElementCount());
2985}
2986
2987std::pair<unsigned, unsigned>
2989 const RISCVSubtarget &Subtarget) {
2990 assert(VecVT.isScalableVector() && "Expected scalable vector");
2991
2992 unsigned EltSize = VecVT.getScalarSizeInBits();
2993 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2994
2995 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2996 unsigned MaxVLMAX =
2997 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2998
2999 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3000 unsigned MinVLMAX =
3001 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3002
3003 return std::make_pair(MinVLMAX, MaxVLMAX);
3004}
3005
3006// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3007// of either is (currently) supported. This can get us into an infinite loop
3008// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3009// as a ..., etc.
3010// Until either (or both) of these can reliably lower any node, reporting that
3011// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3012// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3013// which is not desirable.
3015 EVT VT, unsigned DefinedValues) const {
3016 return false;
3017}
3018
3020 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3021 // implementation-defined.
3022 if (!VT.isVector())
3024 unsigned DLenFactor = Subtarget.getDLenFactor();
3025 unsigned Cost;
3026 if (VT.isScalableVector()) {
3027 unsigned LMul;
3028 bool Fractional;
3029 std::tie(LMul, Fractional) =
3031 if (Fractional)
3032 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3033 else
3034 Cost = (LMul * DLenFactor);
3035 } else {
3036 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3037 }
3038 return Cost;
3039}
3040
3041
3042/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3043/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3044/// be by default. VRGatherCostModel reflects available options. Note that
3045/// operand (index and possibly mask) are handled separately.
3047 auto LMULCost = getLMULCost(VT);
3048 bool Log2CostModel =
3049 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3050 if (Log2CostModel && LMULCost.isValid()) {
3051 unsigned Log = Log2_64(LMULCost.getValue());
3052 if (Log > 0)
3053 return LMULCost * Log;
3054 }
3055 return LMULCost * LMULCost;
3056}
3057
3058/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3059/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3060/// or may track the vrgather.vv cost. It is implementation-dependent.
3064
3065/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3066/// for the type VT. (This does not cover the vslide1up or vslide1down
3067/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3068/// or may track the vrgather.vv cost. It is implementation-dependent.
3072
3073/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3074/// for the type VT. (This does not cover the vslide1up or vslide1down
3075/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3076/// or may track the vrgather.vv cost. It is implementation-dependent.
3080
3082 const RISCVSubtarget &Subtarget) {
3083 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3084 // bf16 conversions are always promoted to f32.
3085 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3086 Op.getValueType() == MVT::bf16) {
3087 bool IsStrict = Op->isStrictFPOpcode();
3088
3089 SDLoc DL(Op);
3090 if (IsStrict) {
3091 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3092 {Op.getOperand(0), Op.getOperand(1)});
3093 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3094 {Op.getValueType(), MVT::Other},
3095 {Val.getValue(1), Val.getValue(0),
3096 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3097 }
3098 return DAG.getNode(
3099 ISD::FP_ROUND, DL, Op.getValueType(),
3100 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3101 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3102 }
3103
3104 // Other operations are legal.
3105 return Op;
3106}
3107
3109 const RISCVSubtarget &Subtarget) {
3110 // RISC-V FP-to-int conversions saturate to the destination register size, but
3111 // don't produce 0 for nan. We can use a conversion instruction and fix the
3112 // nan case with a compare and a select.
3113 SDValue Src = Op.getOperand(0);
3114
3115 MVT DstVT = Op.getSimpleValueType();
3116 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3117
3118 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3119
3120 if (!DstVT.isVector()) {
3121 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3122 // the result.
3123 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3124 Src.getValueType() == MVT::bf16) {
3125 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3126 }
3127
3128 unsigned Opc;
3129 if (SatVT == DstVT)
3130 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3131 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3132 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3133 else
3134 return SDValue();
3135 // FIXME: Support other SatVTs by clamping before or after the conversion.
3136
3137 SDLoc DL(Op);
3138 SDValue FpToInt = DAG.getNode(
3139 Opc, DL, DstVT, Src,
3141
3142 if (Opc == RISCVISD::FCVT_WU_RV64)
3143 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3144
3145 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3146 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3148 }
3149
3150 // Vectors.
3151
3152 MVT DstEltVT = DstVT.getVectorElementType();
3153 MVT SrcVT = Src.getSimpleValueType();
3154 MVT SrcEltVT = SrcVT.getVectorElementType();
3155 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3156 unsigned DstEltSize = DstEltVT.getSizeInBits();
3157
3158 // Only handle saturating to the destination type.
3159 if (SatVT != DstEltVT)
3160 return SDValue();
3161
3162 MVT DstContainerVT = DstVT;
3163 MVT SrcContainerVT = SrcVT;
3164 if (DstVT.isFixedLengthVector()) {
3165 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3166 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3167 assert(DstContainerVT.getVectorElementCount() ==
3168 SrcContainerVT.getVectorElementCount() &&
3169 "Expected same element count");
3170 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3171 }
3172
3173 SDLoc DL(Op);
3174
3175 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3176
3177 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3178 {Src, Src, DAG.getCondCode(ISD::SETNE),
3179 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3180
3181 // Need to widen by more than 1 step, promote the FP type, then do a widening
3182 // convert.
3183 if (DstEltSize > (2 * SrcEltSize)) {
3184 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3185 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3186 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3187 }
3188
3189 MVT CvtContainerVT = DstContainerVT;
3190 MVT CvtEltVT = DstEltVT;
3191 if (SrcEltSize > (2 * DstEltSize)) {
3192 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3193 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3194 }
3195
3196 unsigned RVVOpc =
3197 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3198 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3199
3200 while (CvtContainerVT != DstContainerVT) {
3201 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3202 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3203 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3204 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3205 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3206 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3207 }
3208
3209 SDValue SplatZero = DAG.getNode(
3210 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3211 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3212 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3213 Res, DAG.getUNDEF(DstContainerVT), VL);
3214
3215 if (DstVT.isFixedLengthVector())
3216 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3217
3218 return Res;
3219}
3220
3222 const RISCVSubtarget &Subtarget) {
3223 bool IsStrict = Op->isStrictFPOpcode();
3224 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3225
3226 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3227 // bf16 conversions are always promoted to f32.
3228 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3229 SrcVal.getValueType() == MVT::bf16) {
3230 SDLoc DL(Op);
3231 if (IsStrict) {
3232 SDValue Ext =
3233 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3234 {Op.getOperand(0), SrcVal});
3235 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3236 {Ext.getValue(1), Ext.getValue(0)});
3237 }
3238 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3239 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3240 }
3241
3242 // Other operations are legal.
3243 return Op;
3244}
3245
3247 switch (Opc) {
3248 case ISD::FROUNDEVEN:
3250 case ISD::VP_FROUNDEVEN:
3251 return RISCVFPRndMode::RNE;
3252 case ISD::FTRUNC:
3253 case ISD::STRICT_FTRUNC:
3254 case ISD::VP_FROUNDTOZERO:
3255 return RISCVFPRndMode::RTZ;
3256 case ISD::FFLOOR:
3257 case ISD::STRICT_FFLOOR:
3258 case ISD::VP_FFLOOR:
3259 return RISCVFPRndMode::RDN;
3260 case ISD::FCEIL:
3261 case ISD::STRICT_FCEIL:
3262 case ISD::VP_FCEIL:
3263 return RISCVFPRndMode::RUP;
3264 case ISD::FROUND:
3265 case ISD::LROUND:
3266 case ISD::LLROUND:
3267 case ISD::STRICT_FROUND:
3268 case ISD::STRICT_LROUND:
3270 case ISD::VP_FROUND:
3271 return RISCVFPRndMode::RMM;
3272 case ISD::FRINT:
3273 case ISD::LRINT:
3274 case ISD::LLRINT:
3275 case ISD::STRICT_FRINT:
3276 case ISD::STRICT_LRINT:
3277 case ISD::STRICT_LLRINT:
3278 case ISD::VP_FRINT:
3279 case ISD::VP_LRINT:
3280 case ISD::VP_LLRINT:
3281 return RISCVFPRndMode::DYN;
3282 }
3283
3285}
3286
3287// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3288// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3289// the integer domain and back. Taking care to avoid converting values that are
3290// nan or already correct.
3291static SDValue
3293 const RISCVSubtarget &Subtarget) {
3294 MVT VT = Op.getSimpleValueType();
3295 assert(VT.isVector() && "Unexpected type");
3296
3297 SDLoc DL(Op);
3298
3299 SDValue Src = Op.getOperand(0);
3300
3301 // Freeze the source since we are increasing the number of uses.
3302 Src = DAG.getFreeze(Src);
3303
3304 MVT ContainerVT = VT;
3305 if (VT.isFixedLengthVector()) {
3306 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3307 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3308 }
3309
3310 SDValue Mask, VL;
3311 if (Op->isVPOpcode()) {
3312 Mask = Op.getOperand(1);
3313 if (VT.isFixedLengthVector())
3314 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3315 Subtarget);
3316 VL = Op.getOperand(2);
3317 } else {
3318 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3319 }
3320
3321 // We do the conversion on the absolute value and fix the sign at the end.
3322 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3323
3324 // Determine the largest integer that can be represented exactly. This and
3325 // values larger than it don't have any fractional bits so don't need to
3326 // be converted.
3327 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3328 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3329 APFloat MaxVal = APFloat(FltSem);
3330 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3331 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3332 SDValue MaxValNode =
3333 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3334 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3335 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3336
3337 // If abs(Src) was larger than MaxVal or nan, keep it.
3338 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3339 Mask =
3340 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3341 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3342 Mask, Mask, VL});
3343
3344 // Truncate to integer and convert back to FP.
3345 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3346 MVT XLenVT = Subtarget.getXLenVT();
3347 SDValue Truncated;
3348
3349 switch (Op.getOpcode()) {
3350 default:
3351 llvm_unreachable("Unexpected opcode");
3352 case ISD::FRINT:
3353 case ISD::VP_FRINT:
3354 case ISD::FCEIL:
3355 case ISD::VP_FCEIL:
3356 case ISD::FFLOOR:
3357 case ISD::VP_FFLOOR:
3358 case ISD::FROUND:
3359 case ISD::FROUNDEVEN:
3360 case ISD::VP_FROUND:
3361 case ISD::VP_FROUNDEVEN:
3362 case ISD::VP_FROUNDTOZERO: {
3365 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3366 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3367 break;
3368 }
3369 case ISD::FTRUNC:
3370 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3371 Mask, VL);
3372 break;
3373 case ISD::FNEARBYINT:
3374 case ISD::VP_FNEARBYINT:
3375 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3376 Mask, VL);
3377 break;
3378 }
3379
3380 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3381 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3382 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3383 Mask, VL);
3384
3385 // Restore the original sign so that -0.0 is preserved.
3386 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3387 Src, Src, Mask, VL);
3388
3389 if (!VT.isFixedLengthVector())
3390 return Truncated;
3391
3392 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3393}
3394
3395// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3396// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3397// qNan and converting the new source to integer and back to FP.
3398static SDValue
3400 const RISCVSubtarget &Subtarget) {
3401 SDLoc DL(Op);
3402 MVT VT = Op.getSimpleValueType();
3403 SDValue Chain = Op.getOperand(0);
3404 SDValue Src = Op.getOperand(1);
3405
3406 MVT ContainerVT = VT;
3407 if (VT.isFixedLengthVector()) {
3408 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3409 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3410 }
3411
3412 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3413
3414 // Freeze the source since we are increasing the number of uses.
3415 Src = DAG.getFreeze(Src);
3416
3417 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3418 MVT MaskVT = Mask.getSimpleValueType();
3419 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3420 DAG.getVTList(MaskVT, MVT::Other),
3421 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3422 DAG.getUNDEF(MaskVT), Mask, VL});
3423 Chain = Unorder.getValue(1);
3424 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3425 DAG.getVTList(ContainerVT, MVT::Other),
3426 {Chain, Src, Src, Src, Unorder, VL});
3427 Chain = Src.getValue(1);
3428
3429 // We do the conversion on the absolute value and fix the sign at the end.
3430 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3431
3432 // Determine the largest integer that can be represented exactly. This and
3433 // values larger than it don't have any fractional bits so don't need to
3434 // be converted.
3435 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3436 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3437 APFloat MaxVal = APFloat(FltSem);
3438 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3439 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3440 SDValue MaxValNode =
3441 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3442 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3443 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3444
3445 // If abs(Src) was larger than MaxVal or nan, keep it.
3446 Mask = DAG.getNode(
3447 RISCVISD::SETCC_VL, DL, MaskVT,
3448 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3449
3450 // Truncate to integer and convert back to FP.
3451 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3452 MVT XLenVT = Subtarget.getXLenVT();
3453 SDValue Truncated;
3454
3455 switch (Op.getOpcode()) {
3456 default:
3457 llvm_unreachable("Unexpected opcode");
3458 case ISD::STRICT_FCEIL:
3459 case ISD::STRICT_FFLOOR:
3460 case ISD::STRICT_FROUND:
3464 Truncated = DAG.getNode(
3465 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3466 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3467 break;
3468 }
3469 case ISD::STRICT_FTRUNC:
3470 Truncated =
3471 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3472 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3473 break;
3475 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3476 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3477 Mask, VL);
3478 break;
3479 }
3480 Chain = Truncated.getValue(1);
3481
3482 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3483 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3484 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3485 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3486 Truncated, Mask, VL);
3487 Chain = Truncated.getValue(1);
3488 }
3489
3490 // Restore the original sign so that -0.0 is preserved.
3491 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3492 Src, Src, Mask, VL);
3493
3494 if (VT.isFixedLengthVector())
3495 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3496 return DAG.getMergeValues({Truncated, Chain}, DL);
3497}
3498
3499static SDValue
3501 const RISCVSubtarget &Subtarget) {
3502 MVT VT = Op.getSimpleValueType();
3503 if (VT.isVector())
3504 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3505
3506 if (DAG.shouldOptForSize())
3507 return SDValue();
3508
3509 SDLoc DL(Op);
3510 SDValue Src = Op.getOperand(0);
3511
3512 // Create an integer the size of the mantissa with the MSB set. This and all
3513 // values larger than it don't have any fractional bits so don't need to be
3514 // converted.
3515 const fltSemantics &FltSem = VT.getFltSemantics();
3516 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3517 APFloat MaxVal = APFloat(FltSem);
3518 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3519 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3520 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3521
3523 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3524 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3525}
3526
3527// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3529 const RISCVSubtarget &Subtarget) {
3530 SDLoc DL(Op);
3531 MVT DstVT = Op.getSimpleValueType();
3532 SDValue Src = Op.getOperand(0);
3533 MVT SrcVT = Src.getSimpleValueType();
3534 assert(SrcVT.isVector() && DstVT.isVector() &&
3535 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3536 "Unexpected type");
3537
3538 MVT DstContainerVT = DstVT;
3539 MVT SrcContainerVT = SrcVT;
3540
3541 if (DstVT.isFixedLengthVector()) {
3542 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3543 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3544 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3545 }
3546
3547 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3548
3549 // [b]f16 -> f32
3550 MVT SrcElemType = SrcVT.getVectorElementType();
3551 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3552 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3553 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3554 }
3555
3556 SDValue Res =
3557 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3558 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3559 Subtarget.getXLenVT()),
3560 VL);
3561
3562 if (!DstVT.isFixedLengthVector())
3563 return Res;
3564
3565 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3566}
3567
3568static SDValue
3570 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3571 SDValue Offset, SDValue Mask, SDValue VL,
3573 if (Passthru.isUndef())
3575 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3576 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3577 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3578}
3579
3580static SDValue
3581getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3582 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3583 SDValue VL,
3585 if (Passthru.isUndef())
3587 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3588 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3589 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3590}
3591
3595 int64_t Addend;
3596};
3597
3598static std::optional<APInt> getExactInteger(const APFloat &APF,
3600 // We will use a SINT_TO_FP to materialize this constant so we should use a
3601 // signed APSInt here.
3602 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3603 // We use an arbitrary rounding mode here. If a floating-point is an exact
3604 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3605 // the rounding mode changes the output value, then it is not an exact
3606 // integer.
3608 bool IsExact;
3609 // If it is out of signed integer range, it will return an invalid operation.
3610 // If it is not an exact integer, IsExact is false.
3611 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3613 !IsExact)
3614 return std::nullopt;
3615 return ValInt.extractBits(BitWidth, 0);
3616}
3617
3618// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3619// to the (non-zero) step S and start value X. This can be then lowered as the
3620// RVV sequence (VID * S) + X, for example.
3621// The step S is represented as an integer numerator divided by a positive
3622// denominator. Note that the implementation currently only identifies
3623// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3624// cannot detect 2/3, for example.
3625// Note that this method will also match potentially unappealing index
3626// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3627// determine whether this is worth generating code for.
3628//
3629// EltSizeInBits is the size of the type that the sequence will be calculated
3630// in, i.e. SEW for build_vectors or XLEN for address calculations.
3631static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3632 unsigned EltSizeInBits) {
3633 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3635 return std::nullopt;
3636 bool IsInteger = Op.getValueType().isInteger();
3637
3638 std::optional<unsigned> SeqStepDenom;
3639 std::optional<APInt> SeqStepNum;
3640 std::optional<APInt> SeqAddend;
3641 std::optional<std::pair<APInt, unsigned>> PrevElt;
3642 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3643
3644 // First extract the ops into a list of constant integer values. This may not
3645 // be possible for floats if they're not all representable as integers.
3646 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3647 const unsigned OpSize = Op.getScalarValueSizeInBits();
3648 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3649 if (Elt.isUndef()) {
3650 Elts[Idx] = std::nullopt;
3651 continue;
3652 }
3653 if (IsInteger) {
3654 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3655 } else {
3656 auto ExactInteger =
3657 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3658 if (!ExactInteger)
3659 return std::nullopt;
3660 Elts[Idx] = *ExactInteger;
3661 }
3662 }
3663
3664 for (auto [Idx, Elt] : enumerate(Elts)) {
3665 // Assume undef elements match the sequence; we just have to be careful
3666 // when interpolating across them.
3667 if (!Elt)
3668 continue;
3669
3670 if (PrevElt) {
3671 // Calculate the step since the last non-undef element, and ensure
3672 // it's consistent across the entire sequence.
3673 unsigned IdxDiff = Idx - PrevElt->second;
3674 APInt ValDiff = *Elt - PrevElt->first;
3675
3676 // A zero-value value difference means that we're somewhere in the middle
3677 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3678 // step change before evaluating the sequence.
3679 if (ValDiff == 0)
3680 continue;
3681
3682 int64_t Remainder = ValDiff.srem(IdxDiff);
3683 // Normalize the step if it's greater than 1.
3684 if (Remainder != ValDiff.getSExtValue()) {
3685 // The difference must cleanly divide the element span.
3686 if (Remainder != 0)
3687 return std::nullopt;
3688 ValDiff = ValDiff.sdiv(IdxDiff);
3689 IdxDiff = 1;
3690 }
3691
3692 if (!SeqStepNum)
3693 SeqStepNum = ValDiff;
3694 else if (ValDiff != SeqStepNum)
3695 return std::nullopt;
3696
3697 if (!SeqStepDenom)
3698 SeqStepDenom = IdxDiff;
3699 else if (IdxDiff != *SeqStepDenom)
3700 return std::nullopt;
3701 }
3702
3703 // Record this non-undef element for later.
3704 if (!PrevElt || PrevElt->first != *Elt)
3705 PrevElt = std::make_pair(*Elt, Idx);
3706 }
3707
3708 // We need to have logged a step for this to count as a legal index sequence.
3709 if (!SeqStepNum || !SeqStepDenom)
3710 return std::nullopt;
3711
3712 // Loop back through the sequence and validate elements we might have skipped
3713 // while waiting for a valid step. While doing this, log any sequence addend.
3714 for (auto [Idx, Elt] : enumerate(Elts)) {
3715 if (!Elt)
3716 continue;
3717 APInt ExpectedVal =
3718 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3719 *SeqStepNum)
3720 .sdiv(*SeqStepDenom);
3721
3722 APInt Addend = *Elt - ExpectedVal;
3723 if (!SeqAddend)
3724 SeqAddend = Addend;
3725 else if (Addend != SeqAddend)
3726 return std::nullopt;
3727 }
3728
3729 assert(SeqAddend && "Must have an addend if we have a step");
3730
3731 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3732 SeqAddend->getSExtValue()};
3733}
3734
3735// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3736// and lower it as a VRGATHER_VX_VL from the source vector.
3737static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3738 SelectionDAG &DAG,
3739 const RISCVSubtarget &Subtarget) {
3740 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3741 return SDValue();
3742 SDValue Src = SplatVal.getOperand(0);
3743 // Don't perform this optimization for i1 vectors, or if the element types are
3744 // different
3745 // FIXME: Support i1 vectors, maybe by promoting to i8?
3746 MVT EltTy = VT.getVectorElementType();
3747 if (EltTy == MVT::i1 ||
3748 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3749 return SDValue();
3750 MVT SrcVT = Src.getSimpleValueType();
3751 if (EltTy != SrcVT.getVectorElementType())
3752 return SDValue();
3753 SDValue Idx = SplatVal.getOperand(1);
3754 // The index must be a legal type.
3755 if (Idx.getValueType() != Subtarget.getXLenVT())
3756 return SDValue();
3757
3758 // Check that we know Idx lies within VT
3759 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3760 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3761 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3762 return SDValue();
3763 }
3764
3765 // Convert fixed length vectors to scalable
3766 MVT ContainerVT = VT;
3767 if (VT.isFixedLengthVector())
3768 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3769
3770 MVT SrcContainerVT = SrcVT;
3771 if (SrcVT.isFixedLengthVector()) {
3772 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3773 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3774 }
3775
3776 // Put Vec in a VT sized vector
3777 if (SrcContainerVT.getVectorMinNumElements() <
3778 ContainerVT.getVectorMinNumElements())
3779 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3780 else
3781 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3782
3783 // We checked that Idx fits inside VT earlier
3784 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3785 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3786 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3787 if (VT.isFixedLengthVector())
3788 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3789 return Gather;
3790}
3791
3793 const RISCVSubtarget &Subtarget) {
3794 MVT VT = Op.getSimpleValueType();
3795 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3796
3797 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3798
3799 SDLoc DL(Op);
3800 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3801
3802 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3803 int64_t StepNumerator = SimpleVID->StepNumerator;
3804 unsigned StepDenominator = SimpleVID->StepDenominator;
3805 int64_t Addend = SimpleVID->Addend;
3806
3807 assert(StepNumerator != 0 && "Invalid step");
3808 bool Negate = false;
3809 int64_t SplatStepVal = StepNumerator;
3810 unsigned StepOpcode = ISD::MUL;
3811 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3812 // anyway as the shift of 63 won't fit in uimm5.
3813 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3814 isPowerOf2_64(std::abs(StepNumerator))) {
3815 Negate = StepNumerator < 0;
3816 StepOpcode = ISD::SHL;
3817 SplatStepVal = Log2_64(std::abs(StepNumerator));
3818 }
3819
3820 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3821 // since it's the immediate value many RVV instructions accept. There is
3822 // no vmul.vi instruction so ensure multiply constant can fit in a
3823 // single addi instruction. For the addend, we allow up to 32 bits..
3824 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3825 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3826 isPowerOf2_32(StepDenominator) &&
3827 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3828 MVT VIDVT =
3830 MVT VIDContainerVT =
3831 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3832 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3833 // Convert right out of the scalable type so we can use standard ISD
3834 // nodes for the rest of the computation. If we used scalable types with
3835 // these, we'd lose the fixed-length vector info and generate worse
3836 // vsetvli code.
3837 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3838 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3839 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3840 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3841 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3842 }
3843 if (StepDenominator != 1) {
3844 SDValue SplatStep =
3845 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3846 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3847 }
3848 if (Addend != 0 || Negate) {
3849 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3850 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3851 VID);
3852 }
3853 if (VT.isFloatingPoint()) {
3854 // TODO: Use vfwcvt to reduce register pressure.
3855 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3856 }
3857 return VID;
3858 }
3859 }
3860
3861 return SDValue();
3862}
3863
3864/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3865/// which constitute a large proportion of the elements. In such cases we can
3866/// splat a vector with the dominant element and make up the shortfall with
3867/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3868/// Note that this includes vectors of 2 elements by association. The
3869/// upper-most element is the "dominant" one, allowing us to use a splat to
3870/// "insert" the upper element, and an insert of the lower element at position
3871/// 0, which improves codegen.
3873 const RISCVSubtarget &Subtarget) {
3874 MVT VT = Op.getSimpleValueType();
3875 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3876
3877 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3878
3879 SDLoc DL(Op);
3880 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3881
3882 MVT XLenVT = Subtarget.getXLenVT();
3883 unsigned NumElts = Op.getNumOperands();
3884
3885 SDValue DominantValue;
3886 unsigned MostCommonCount = 0;
3887 DenseMap<SDValue, unsigned> ValueCounts;
3888 unsigned NumUndefElts =
3889 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3890
3891 // Track the number of scalar loads we know we'd be inserting, estimated as
3892 // any non-zero floating-point constant. Other kinds of element are either
3893 // already in registers or are materialized on demand. The threshold at which
3894 // a vector load is more desirable than several scalar materializion and
3895 // vector-insertion instructions is not known.
3896 unsigned NumScalarLoads = 0;
3897
3898 for (SDValue V : Op->op_values()) {
3899 if (V.isUndef())
3900 continue;
3901
3902 unsigned &Count = ValueCounts[V];
3903 if (0 == Count)
3904 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3905 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3906
3907 // Is this value dominant? In case of a tie, prefer the highest element as
3908 // it's cheaper to insert near the beginning of a vector than it is at the
3909 // end.
3910 if (++Count >= MostCommonCount) {
3911 DominantValue = V;
3912 MostCommonCount = Count;
3913 }
3914 }
3915
3916 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3917 unsigned NumDefElts = NumElts - NumUndefElts;
3918 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3919
3920 // Don't perform this optimization when optimizing for size, since
3921 // materializing elements and inserting them tends to cause code bloat.
3922 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3923 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3924 ((MostCommonCount > DominantValueCountThreshold) ||
3925 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3926 // Start by splatting the most common element.
3927 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3928
3929 DenseSet<SDValue> Processed{DominantValue};
3930
3931 // We can handle an insert into the last element (of a splat) via
3932 // v(f)slide1down. This is slightly better than the vslideup insert
3933 // lowering as it avoids the need for a vector group temporary. It
3934 // is also better than using vmerge.vx as it avoids the need to
3935 // materialize the mask in a vector register.
3936 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3937 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3938 LastOp != DominantValue) {
3939 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3940 auto OpCode =
3941 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3942 if (!VT.isFloatingPoint())
3943 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3944 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3945 LastOp, Mask, VL);
3946 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3947 Processed.insert(LastOp);
3948 }
3949
3950 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3951 for (const auto &OpIdx : enumerate(Op->ops())) {
3952 const SDValue &V = OpIdx.value();
3953 if (V.isUndef() || !Processed.insert(V).second)
3954 continue;
3955 if (ValueCounts[V] == 1) {
3956 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3957 } else {
3958 // Blend in all instances of this value using a VSELECT, using a
3959 // mask where each bit signals whether that element is the one
3960 // we're after.
3962 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3963 return DAG.getConstant(V == V1, DL, XLenVT);
3964 });
3965 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3966 DAG.getBuildVector(SelMaskTy, DL, Ops),
3967 DAG.getSplatBuildVector(VT, DL, V), Vec);
3968 }
3969 }
3970
3971 return Vec;
3972 }
3973
3974 return SDValue();
3975}
3976
3978 const RISCVSubtarget &Subtarget) {
3979 MVT VT = Op.getSimpleValueType();
3980 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3981
3982 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3983
3984 SDLoc DL(Op);
3985 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3986
3987 MVT XLenVT = Subtarget.getXLenVT();
3988 unsigned NumElts = Op.getNumOperands();
3989
3990 if (VT.getVectorElementType() == MVT::i1) {
3991 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3992 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3993 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3994 }
3995
3996 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3997 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3998 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3999 }
4000
4001 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4002 // scalar integer chunks whose bit-width depends on the number of mask
4003 // bits and XLEN.
4004 // First, determine the most appropriate scalar integer type to use. This
4005 // is at most XLenVT, but may be shrunk to a smaller vector element type
4006 // according to the size of the final vector - use i8 chunks rather than
4007 // XLenVT if we're producing a v8i1. This results in more consistent
4008 // codegen across RV32 and RV64.
4009 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4010 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4011 // If we have to use more than one INSERT_VECTOR_ELT then this
4012 // optimization is likely to increase code size; avoid performing it in
4013 // such a case. We can use a load from a constant pool in this case.
4014 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4015 return SDValue();
4016 // Now we can create our integer vector type. Note that it may be larger
4017 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4018 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4019 MVT IntegerViaVecVT =
4020 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4021 IntegerViaVecElts);
4022
4023 uint64_t Bits = 0;
4024 unsigned BitPos = 0, IntegerEltIdx = 0;
4025 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4026
4027 for (unsigned I = 0; I < NumElts;) {
4028 SDValue V = Op.getOperand(I);
4029 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4030 Bits |= ((uint64_t)BitValue << BitPos);
4031 ++BitPos;
4032 ++I;
4033
4034 // Once we accumulate enough bits to fill our scalar type or process the
4035 // last element, insert into our vector and clear our accumulated data.
4036 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4037 if (NumViaIntegerBits <= 32)
4038 Bits = SignExtend64<32>(Bits);
4039 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4040 Elts[IntegerEltIdx] = Elt;
4041 Bits = 0;
4042 BitPos = 0;
4043 IntegerEltIdx++;
4044 }
4045 }
4046
4047 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4048
4049 if (NumElts < NumViaIntegerBits) {
4050 // If we're producing a smaller vector than our minimum legal integer
4051 // type, bitcast to the equivalent (known-legal) mask type, and extract
4052 // our final mask.
4053 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4054 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4055 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4056 } else {
4057 // Else we must have produced an integer type with the same size as the
4058 // mask type; bitcast for the final result.
4059 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4060 Vec = DAG.getBitcast(VT, Vec);
4061 }
4062
4063 return Vec;
4064 }
4065
4067 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4068 : RISCVISD::VMV_V_X_VL;
4069 if (!VT.isFloatingPoint())
4070 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4071 Splat =
4072 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4073 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4074 }
4075
4076 // Try and match index sequences, which we can lower to the vid instruction
4077 // with optional modifications. An all-undef vector is matched by
4078 // getSplatValue, above.
4079 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4080 return Res;
4081
4082 // For very small build_vectors, use a single scalar insert of a constant.
4083 // TODO: Base this on constant rematerialization cost, not size.
4084 const unsigned EltBitSize = VT.getScalarSizeInBits();
4085 if (VT.getSizeInBits() <= 32 &&
4087 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4088 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4089 "Unexpected sequence type");
4090 // If we can use the original VL with the modified element type, this
4091 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4092 // be moved into InsertVSETVLI?
4093 unsigned ViaVecLen =
4094 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4095 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4096
4097 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4098 uint64_t SplatValue = 0;
4099 // Construct the amalgamated value at this larger vector type.
4100 for (const auto &OpIdx : enumerate(Op->op_values())) {
4101 const auto &SeqV = OpIdx.value();
4102 if (!SeqV.isUndef())
4103 SplatValue |=
4104 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4105 }
4106
4107 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4108 // achieve better constant materializion.
4109 // On RV32, we need to sign-extend to use getSignedConstant.
4110 if (ViaIntVT == MVT::i32)
4111 SplatValue = SignExtend64<32>(SplatValue);
4112
4113 SDValue Vec = DAG.getInsertVectorElt(
4114 DL, DAG.getUNDEF(ViaVecVT),
4115 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4116 if (ViaVecLen != 1)
4117 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4118 return DAG.getBitcast(VT, Vec);
4119 }
4120
4121
4122 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4123 // when re-interpreted as a vector with a larger element type. For example,
4124 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4125 // could be instead splat as
4126 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4127 // TODO: This optimization could also work on non-constant splats, but it
4128 // would require bit-manipulation instructions to construct the splat value.
4129 SmallVector<SDValue> Sequence;
4130 const auto *BV = cast<BuildVectorSDNode>(Op);
4131 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4133 BV->getRepeatedSequence(Sequence) &&
4134 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4135 unsigned SeqLen = Sequence.size();
4136 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4137 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4138 ViaIntVT == MVT::i64) &&
4139 "Unexpected sequence type");
4140
4141 // If we can use the original VL with the modified element type, this
4142 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4143 // be moved into InsertVSETVLI?
4144 const unsigned RequiredVL = NumElts / SeqLen;
4145 const unsigned ViaVecLen =
4146 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4147 NumElts : RequiredVL;
4148 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4149
4150 unsigned EltIdx = 0;
4151 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4152 uint64_t SplatValue = 0;
4153 // Construct the amalgamated value which can be splatted as this larger
4154 // vector type.
4155 for (const auto &SeqV : Sequence) {
4156 if (!SeqV.isUndef())
4157 SplatValue |=
4158 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4159 EltIdx++;
4160 }
4161
4162 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4163 // achieve better constant materializion.
4164 // On RV32, we need to sign-extend to use getSignedConstant.
4165 if (ViaIntVT == MVT::i32)
4166 SplatValue = SignExtend64<32>(SplatValue);
4167
4168 // Since we can't introduce illegal i64 types at this stage, we can only
4169 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4170 // way we can use RVV instructions to splat.
4171 assert((ViaIntVT.bitsLE(XLenVT) ||
4172 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4173 "Unexpected bitcast sequence");
4174 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4175 SDValue ViaVL =
4176 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4177 MVT ViaContainerVT =
4178 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4179 SDValue Splat =
4180 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4181 DAG.getUNDEF(ViaContainerVT),
4182 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4183 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4184 if (ViaVecLen != RequiredVL)
4186 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4187 return DAG.getBitcast(VT, Splat);
4188 }
4189 }
4190
4191 // If the number of signbits allows, see if we can lower as a <N x i8>.
4192 // Our main goal here is to reduce LMUL (and thus work) required to
4193 // build the constant, but we will also narrow if the resulting
4194 // narrow vector is known to materialize cheaply.
4195 // TODO: We really should be costing the smaller vector. There are
4196 // profitable cases this misses.
4197 if (EltBitSize > 8 && VT.isInteger() &&
4198 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4199 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4200 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4201 DL, Op->ops());
4202 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4203 Source, DAG, Subtarget);
4204 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4205 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4206 }
4207
4208 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4209 return Res;
4210
4211 // For constant vectors, use generic constant pool lowering. Otherwise,
4212 // we'd have to materialize constants in GPRs just to move them into the
4213 // vector.
4214 return SDValue();
4215}
4216
4217static unsigned getPACKOpcode(unsigned DestBW,
4218 const RISCVSubtarget &Subtarget) {
4219 switch (DestBW) {
4220 default:
4221 llvm_unreachable("Unsupported pack size");
4222 case 16:
4223 return RISCV::PACKH;
4224 case 32:
4225 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4226 case 64:
4227 assert(Subtarget.is64Bit());
4228 return RISCV::PACK;
4229 }
4230}
4231
4232/// Double the element size of the build vector to reduce the number
4233/// of vslide1down in the build vector chain. In the worst case, this
4234/// trades three scalar operations for 1 vector operation. Scalar
4235/// operations are generally lower latency, and for out-of-order cores
4236/// we also benefit from additional parallelism.
4238 const RISCVSubtarget &Subtarget) {
4239 SDLoc DL(Op);
4240 MVT VT = Op.getSimpleValueType();
4241 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4242 MVT ElemVT = VT.getVectorElementType();
4243 if (!ElemVT.isInteger())
4244 return SDValue();
4245
4246 // TODO: Relax these architectural restrictions, possibly with costing
4247 // of the actual instructions required.
4248 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4249 return SDValue();
4250
4251 unsigned NumElts = VT.getVectorNumElements();
4252 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4253 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4254 NumElts % 2 != 0)
4255 return SDValue();
4256
4257 // Produce [B,A] packed into a type twice as wide. Note that all
4258 // scalars are XLenVT, possibly masked (see below).
4259 MVT XLenVT = Subtarget.getXLenVT();
4260 SDValue Mask = DAG.getConstant(
4261 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4262 auto pack = [&](SDValue A, SDValue B) {
4263 // Bias the scheduling of the inserted operations to near the
4264 // definition of the element - this tends to reduce register
4265 // pressure overall.
4266 SDLoc ElemDL(B);
4267 if (Subtarget.hasStdExtZbkb())
4268 // Note that we're relying on the high bits of the result being
4269 // don't care. For PACKW, the result is *sign* extended.
4270 return SDValue(
4271 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4272 ElemDL, XLenVT, A, B),
4273 0);
4274
4275 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4276 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4277 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4278 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4279 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4281 };
4282
4283 SmallVector<SDValue> NewOperands;
4284 NewOperands.reserve(NumElts / 2);
4285 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4286 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4287 assert(NumElts == NewOperands.size() * 2);
4288 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4289 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4290 return DAG.getNode(ISD::BITCAST, DL, VT,
4291 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4292}
4293
4295 const RISCVSubtarget &Subtarget) {
4296 MVT VT = Op.getSimpleValueType();
4297 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4298
4299 MVT EltVT = VT.getVectorElementType();
4300 MVT XLenVT = Subtarget.getXLenVT();
4301
4302 SDLoc DL(Op);
4303
4304 // Proper support for f16 requires Zvfh. bf16 always requires special
4305 // handling. We need to cast the scalar to integer and create an integer
4306 // build_vector.
4307 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4308 MVT IVT = VT.changeVectorElementType(MVT::i16);
4309 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4310 for (const auto &[I, U] : enumerate(Op->ops())) {
4311 SDValue Elem = U.get();
4312 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4313 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4314 // Called by LegalizeDAG, we need to use XLenVT operations since we
4315 // can't create illegal types.
4316 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4317 // Manually constant fold so the integer build_vector can be lowered
4318 // better. Waiting for DAGCombine will be too late.
4319 APInt V =
4320 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4321 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4322 } else {
4323 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4324 }
4325 } else {
4326 // Called by scalar type legalizer, we can use i16.
4327 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4328 }
4329 }
4330 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4331 return DAG.getBitcast(VT, Res);
4332 }
4333
4334 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4336 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4337
4338 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4339
4340 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4341
4342 if (VT.getVectorElementType() == MVT::i1) {
4343 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4344 // vector type, we have a legal equivalently-sized i8 type, so we can use
4345 // that.
4346 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4347 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4348
4349 SDValue WideVec;
4351 // For a splat, perform a scalar truncate before creating the wider
4352 // vector.
4353 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4354 DAG.getConstant(1, DL, Splat.getValueType()));
4355 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4356 } else {
4357 SmallVector<SDValue, 8> Ops(Op->op_values());
4358 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4359 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4360 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4361 }
4362
4363 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4364 }
4365
4367 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4368 return Gather;
4369
4370 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4371 // pressure at high LMUL.
4372 if (all_of(Op->ops().drop_front(),
4373 [](const SDUse &U) { return U.get().isUndef(); })) {
4374 unsigned Opc =
4375 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4376 if (!VT.isFloatingPoint())
4377 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4378 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4379 Splat, VL);
4380 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4381 }
4382
4383 unsigned Opc =
4384 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4385 if (!VT.isFloatingPoint())
4386 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4387 Splat =
4388 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4389 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4390 }
4391
4392 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4393 return Res;
4394
4395 // If we're compiling for an exact VLEN value, we can split our work per
4396 // register in the register group.
4397 if (const auto VLen = Subtarget.getRealVLen();
4398 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4399 MVT ElemVT = VT.getVectorElementType();
4400 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4401 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4402 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4403 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4404 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4405
4406 // The following semantically builds up a fixed length concat_vector
4407 // of the component build_vectors. We eagerly lower to scalable and
4408 // insert_subvector here to avoid DAG combining it back to a large
4409 // build_vector.
4410 SmallVector<SDValue> BuildVectorOps(Op->ops());
4411 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4412 SDValue Vec = DAG.getUNDEF(ContainerVT);
4413 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4414 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4415 SDValue SubBV =
4416 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4417 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4418 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4419 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4420 }
4421 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4422 }
4423
4424 // If we're about to resort to vslide1down (or stack usage), pack our
4425 // elements into the widest scalar type we can. This will force a VL/VTYPE
4426 // toggle, but reduces the critical path, the number of vslide1down ops
4427 // required, and possibly enables scalar folds of the values.
4428 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4429 return Res;
4430
4431 // For m1 vectors, if we have non-undef values in both halves of our vector,
4432 // split the vector into low and high halves, build them separately, then
4433 // use a vselect to combine them. For long vectors, this cuts the critical
4434 // path of the vslide1down sequence in half, and gives us an opportunity
4435 // to special case each half independently. Note that we don't change the
4436 // length of the sub-vectors here, so if both fallback to the generic
4437 // vslide1down path, we should be able to fold the vselect into the final
4438 // vslidedown (for the undef tail) for the first half w/ masking.
4439 unsigned NumElts = VT.getVectorNumElements();
4440 unsigned NumUndefElts =
4441 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4442 unsigned NumDefElts = NumElts - NumUndefElts;
4443 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4444 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4445 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4446 SmallVector<SDValue> MaskVals;
4447 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4448 SubVecAOps.reserve(NumElts);
4449 SubVecBOps.reserve(NumElts);
4450 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4451 SDValue Elem = U.get();
4452 if (Idx < NumElts / 2) {
4453 SubVecAOps.push_back(Elem);
4454 SubVecBOps.push_back(UndefElem);
4455 } else {
4456 SubVecAOps.push_back(UndefElem);
4457 SubVecBOps.push_back(Elem);
4458 }
4459 bool SelectMaskVal = (Idx < NumElts / 2);
4460 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4461 }
4462 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4463 MaskVals.size() == NumElts);
4464
4465 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4466 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4467 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4468 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4469 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4470 }
4471
4472 // Cap the cost at a value linear to the number of elements in the vector.
4473 // The default lowering is to use the stack. The vector store + scalar loads
4474 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4475 // being (at least) linear in LMUL. As a result, using the vslidedown
4476 // lowering for every element ends up being VL*LMUL..
4477 // TODO: Should we be directly costing the stack alternative? Doing so might
4478 // give us a more accurate upper bound.
4479 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4480
4481 // TODO: unify with TTI getSlideCost.
4482 InstructionCost PerSlideCost = 1;
4483 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4484 default: break;
4485 case RISCVVType::LMUL_2:
4486 PerSlideCost = 2;
4487 break;
4488 case RISCVVType::LMUL_4:
4489 PerSlideCost = 4;
4490 break;
4491 case RISCVVType::LMUL_8:
4492 PerSlideCost = 8;
4493 break;
4494 }
4495
4496 // TODO: Should we be using the build instseq then cost + evaluate scheme
4497 // we use for integer constants here?
4498 unsigned UndefCount = 0;
4499 for (const SDValue &V : Op->ops()) {
4500 if (V.isUndef()) {
4501 UndefCount++;
4502 continue;
4503 }
4504 if (UndefCount) {
4505 LinearBudget -= PerSlideCost;
4506 UndefCount = 0;
4507 }
4508 LinearBudget -= PerSlideCost;
4509 }
4510 if (UndefCount) {
4511 LinearBudget -= PerSlideCost;
4512 }
4513
4514 if (LinearBudget < 0)
4515 return SDValue();
4516
4517 assert((!VT.isFloatingPoint() ||
4518 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4519 "Illegal type which will result in reserved encoding");
4520
4521 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4522
4523 // General case: splat the first operand and slide other operands down one
4524 // by one to form a vector. Alternatively, if every operand is an
4525 // extraction from element 0 of a vector, we use that vector from the last
4526 // extraction as the start value and slide up instead of slide down. Such that
4527 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4528 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4529 // something we cannot do with vslide1down/vslidedown.
4530 // Of course, using vslide1up/vslideup might increase the register pressure,
4531 // and that's why we conservatively limit to cases where every operand is an
4532 // extraction from the first element.
4533 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4534 SDValue EVec;
4535 bool SlideUp = false;
4536 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4537 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4538 if (SlideUp)
4539 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4540 Mask, VL, Policy);
4541 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4542 Mask, VL, Policy);
4543 };
4544
4545 // The reason we don't use all_of here is because we're also capturing EVec
4546 // from the last non-undef operand. If the std::execution_policy of the
4547 // underlying std::all_of is anything but std::sequenced_policy we might
4548 // capture the wrong EVec.
4549 for (SDValue V : Operands) {
4550 using namespace SDPatternMatch;
4551 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4552 if (!SlideUp)
4553 break;
4554 }
4555
4556 if (SlideUp) {
4557 MVT EVecContainerVT = EVec.getSimpleValueType();
4558 // Make sure the original vector has scalable vector type.
4559 if (EVecContainerVT.isFixedLengthVector()) {
4560 EVecContainerVT =
4561 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4562 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4563 }
4564
4565 // Adapt EVec's type into ContainerVT.
4566 if (EVecContainerVT.getVectorMinNumElements() <
4567 ContainerVT.getVectorMinNumElements())
4568 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4569 else
4570 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4571
4572 // Reverse the elements as we're going to slide up from the last element.
4573 std::reverse(Operands.begin(), Operands.end());
4574 }
4575
4576 SDValue Vec;
4577 UndefCount = 0;
4578 for (SDValue V : Operands) {
4579 if (V.isUndef()) {
4580 UndefCount++;
4581 continue;
4582 }
4583
4584 // Start our sequence with either a TA splat or extract source in the
4585 // hopes that hardware is able to recognize there's no dependency on the
4586 // prior value of our temporary register.
4587 if (!Vec) {
4588 if (SlideUp) {
4589 Vec = EVec;
4590 } else {
4591 Vec = DAG.getSplatVector(VT, DL, V);
4592 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4593 }
4594
4595 UndefCount = 0;
4596 continue;
4597 }
4598
4599 if (UndefCount) {
4600 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4601 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4602 VL);
4603 UndefCount = 0;
4604 }
4605
4606 unsigned Opcode;
4607 if (VT.isFloatingPoint())
4608 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4609 else
4610 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4611
4612 if (!VT.isFloatingPoint())
4613 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4614 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4615 V, Mask, VL);
4616 }
4617 if (UndefCount) {
4618 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4619 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4620 VL);
4621 }
4622 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4623}
4624
4625static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4627 SelectionDAG &DAG) {
4628 if (!Passthru)
4629 Passthru = DAG.getUNDEF(VT);
4631 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4632 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4633 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4634 // node in order to try and match RVV vector/scalar instructions.
4635 if ((LoC >> 31) == HiC)
4636 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4637
4638 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4639 // VL. This can temporarily increase VL if VL less than VLMAX.
4640 if (LoC == HiC) {
4641 SDValue NewVL;
4642 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4643 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4644 else
4645 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4646 MVT InterVT =
4647 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4648 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4649 DAG.getUNDEF(InterVT), Lo, NewVL);
4650 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4651 }
4652 }
4653
4654 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4655 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4656 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4657 Hi.getConstantOperandVal(1) == 31)
4658 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4659
4660 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4661 // even if it might be sign extended.
4662 if (Hi.isUndef())
4663 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4664
4665 // Fall back to a stack store and stride x0 vector load.
4666 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4667 Hi, VL);
4668}
4669
4670// Called by type legalization to handle splat of i64 on RV32.
4671// FIXME: We can optimize this when the type has sign or zero bits in one
4672// of the halves.
4673static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4674 SDValue Scalar, SDValue VL,
4675 SelectionDAG &DAG) {
4676 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4677 SDValue Lo, Hi;
4678 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4679 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4680}
4681
4682// This function lowers a splat of a scalar operand Splat with the vector
4683// length VL. It ensures the final sequence is type legal, which is useful when
4684// lowering a splat after type legalization.
4685static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4686 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4687 const RISCVSubtarget &Subtarget) {
4688 bool HasPassthru = Passthru && !Passthru.isUndef();
4689 if (!HasPassthru && !Passthru)
4690 Passthru = DAG.getUNDEF(VT);
4691
4692 MVT EltVT = VT.getVectorElementType();
4693 MVT XLenVT = Subtarget.getXLenVT();
4694
4695 if (VT.isFloatingPoint()) {
4696 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4697 EltVT == MVT::bf16) {
4698 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4699 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4700 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4701 else
4702 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4703 MVT IVT = VT.changeVectorElementType(MVT::i16);
4704 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4705 SDValue Splat =
4706 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4707 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4708 }
4709 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4710 }
4711
4712 // Simplest case is that the operand needs to be promoted to XLenVT.
4713 if (Scalar.getValueType().bitsLE(XLenVT)) {
4714 // If the operand is a constant, sign extend to increase our chances
4715 // of being able to use a .vi instruction. ANY_EXTEND would become a
4716 // a zero extend and the simm5 check in isel would fail.
4717 // FIXME: Should we ignore the upper bits in isel instead?
4718 unsigned ExtOpc =
4720 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4721 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4722 }
4723
4724 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4725 "Unexpected scalar for splat lowering!");
4726
4727 if (isOneConstant(VL) && isNullConstant(Scalar))
4728 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4729 DAG.getConstant(0, DL, XLenVT), VL);
4730
4731 // Otherwise use the more complicated splatting algorithm.
4732 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4733}
4734
4735// This function lowers an insert of a scalar operand Scalar into lane
4736// 0 of the vector regardless of the value of VL. The contents of the
4737// remaining lanes of the result vector are unspecified. VL is assumed
4738// to be non-zero.
4740 const SDLoc &DL, SelectionDAG &DAG,
4741 const RISCVSubtarget &Subtarget) {
4742 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4743
4744 const MVT XLenVT = Subtarget.getXLenVT();
4745 SDValue Passthru = DAG.getUNDEF(VT);
4746
4747 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4748 isNullConstant(Scalar.getOperand(1))) {
4749 SDValue ExtractedVal = Scalar.getOperand(0);
4750 // The element types must be the same.
4751 if (ExtractedVal.getValueType().getVectorElementType() ==
4752 VT.getVectorElementType()) {
4753 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4754 MVT ExtractedContainerVT = ExtractedVT;
4755 if (ExtractedContainerVT.isFixedLengthVector()) {
4756 ExtractedContainerVT = getContainerForFixedLengthVector(
4757 DAG, ExtractedContainerVT, Subtarget);
4758 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4759 ExtractedVal, DAG, Subtarget);
4760 }
4761 if (ExtractedContainerVT.bitsLE(VT))
4762 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4763 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4764 }
4765 }
4766
4767 if (VT.isFloatingPoint())
4768 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4769 VL);
4770
4771 // Avoid the tricky legalization cases by falling back to using the
4772 // splat code which already handles it gracefully.
4773 if (!Scalar.getValueType().bitsLE(XLenVT))
4774 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4775 DAG.getConstant(1, DL, XLenVT),
4776 VT, DL, DAG, Subtarget);
4777
4778 // If the operand is a constant, sign extend to increase our chances
4779 // of being able to use a .vi instruction. ANY_EXTEND would become a
4780 // a zero extend and the simm5 check in isel would fail.
4781 // FIXME: Should we ignore the upper bits in isel instead?
4782 unsigned ExtOpc =
4784 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4785 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4786 VL);
4787}
4788
4789/// If concat_vector(V1,V2) could be folded away to some existing
4790/// vector source, return it. Note that the source may be larger
4791/// than the requested concat_vector (i.e. a extract_subvector
4792/// might be required.)
4794 EVT VT = V1.getValueType();
4795 assert(VT == V2.getValueType() && "argument types must match");
4796 // Both input must be extracts.
4797 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4799 return SDValue();
4800
4801 // Extracting from the same source.
4802 SDValue Src = V1.getOperand(0);
4803 if (Src != V2.getOperand(0) ||
4804 VT.isScalableVector() != Src.getValueType().isScalableVector())
4805 return SDValue();
4806
4807 // The extracts must extract the two halves of the source.
4808 if (V1.getConstantOperandVal(1) != 0 ||
4810 return SDValue();
4811
4812 return Src;
4813}
4814
4815// Can this shuffle be performed on exactly one (possibly larger) input?
4817
4818 if (V2.isUndef())
4819 return V1;
4820
4821 unsigned NumElts = VT.getVectorNumElements();
4822 // Src needs to have twice the number of elements.
4823 // TODO: Update shuffle lowering to add the extract subvector
4824 if (SDValue Src = foldConcatVector(V1, V2);
4825 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4826 return Src;
4827
4828 return SDValue();
4829}
4830
4831/// Is this shuffle interleaving contiguous elements from one vector into the
4832/// even elements and contiguous elements from another vector into the odd
4833/// elements. \p EvenSrc will contain the element that should be in the first
4834/// even element. \p OddSrc will contain the element that should be in the first
4835/// odd element. These can be the first element in a source or the element half
4836/// way through the source.
4837static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4838 int &OddSrc, const RISCVSubtarget &Subtarget) {
4839 // We need to be able to widen elements to the next larger integer type or
4840 // use the zip2a instruction at e64.
4841 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4842 !Subtarget.hasVendorXRivosVizip())
4843 return false;
4844
4845 int Size = Mask.size();
4846 int NumElts = VT.getVectorNumElements();
4847 assert(Size == (int)NumElts && "Unexpected mask size");
4848
4849 SmallVector<unsigned, 2> StartIndexes;
4850 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4851 return false;
4852
4853 EvenSrc = StartIndexes[0];
4854 OddSrc = StartIndexes[1];
4855
4856 // One source should be low half of first vector.
4857 if (EvenSrc != 0 && OddSrc != 0)
4858 return false;
4859
4860 // Subvectors will be subtracted from either at the start of the two input
4861 // vectors, or at the start and middle of the first vector if it's an unary
4862 // interleave.
4863 // In both cases, HalfNumElts will be extracted.
4864 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4865 // we'll create an illegal extract_subvector.
4866 // FIXME: We could support other values using a slidedown first.
4867 int HalfNumElts = NumElts / 2;
4868 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4869}
4870
4871/// Is this mask representing a masked combination of two slides?
4873 std::array<std::pair<int, int>, 2> &SrcInfo) {
4874 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4875 return false;
4876
4877 // Avoid matching vselect idioms
4878 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4879 return false;
4880 // Prefer vslideup as the second instruction, and identity
4881 // only as the initial instruction.
4882 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4883 SrcInfo[1].second == 0)
4884 std::swap(SrcInfo[0], SrcInfo[1]);
4885 assert(SrcInfo[0].first != -1 && "Must find one slide");
4886 return true;
4887}
4888
4889// Exactly matches the semantics of a previously existing custom matcher
4890// to allow migration to new matcher without changing output.
4891static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4892 unsigned NumElts) {
4893 if (SrcInfo[1].first == -1)
4894 return true;
4895 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4896 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4897}
4898
4899static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4900 ArrayRef<int> Mask, unsigned Factor,
4901 bool RequiredPolarity) {
4902 int NumElts = Mask.size();
4903 for (const auto &[Idx, M] : enumerate(Mask)) {
4904 if (M < 0)
4905 continue;
4906 int Src = M >= NumElts;
4907 int Diff = (int)Idx - (M % NumElts);
4908 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4909 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4910 "Must match exactly one of the two slides");
4911 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4912 return false;
4913 }
4914 return true;
4915}
4916
4917/// Given a shuffle which can be represented as a pair of two slides,
4918/// see if it is a zipeven idiom. Zipeven is:
4919/// vs2: a0 a1 a2 a3
4920/// vs1: b0 b1 b2 b3
4921/// vd: a0 b0 a2 b2
4922static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4923 ArrayRef<int> Mask, unsigned &Factor) {
4924 Factor = SrcInfo[1].second;
4925 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4926 Mask.size() % Factor == 0 &&
4927 isAlternating(SrcInfo, Mask, Factor, true);
4928}
4929
4930/// Given a shuffle which can be represented as a pair of two slides,
4931/// see if it is a zipodd idiom. Zipodd is:
4932/// vs2: a0 a1 a2 a3
4933/// vs1: b0 b1 b2 b3
4934/// vd: a1 b1 a3 b3
4935/// Note that the operand order is swapped due to the way we canonicalize
4936/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4937static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4938 ArrayRef<int> Mask, unsigned &Factor) {
4939 Factor = -SrcInfo[1].second;
4940 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4941 Mask.size() % Factor == 0 &&
4942 isAlternating(SrcInfo, Mask, Factor, false);
4943}
4944
4945// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4946// 2, 4, 8 and the integer type Factor-times larger than VT's
4947// element type must be a legal element type.
4948// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4949// -> [p, q, r, s] (Factor=2, Index=1)
4951 SDValue Src, unsigned Factor,
4952 unsigned Index, SelectionDAG &DAG) {
4953 unsigned EltBits = VT.getScalarSizeInBits();
4954 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4955 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4956 SrcEC.divideCoefficientBy(Factor));
4957 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4958 SrcEC.divideCoefficientBy(Factor));
4959 Src = DAG.getBitcast(WideSrcVT, Src);
4960
4961 unsigned Shift = Index * EltBits;
4962 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4963 DAG.getConstant(Shift, DL, WideSrcVT));
4964 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4966 Res = DAG.getBitcast(CastVT, Res);
4967 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4968}
4969
4970/// Match a single source shuffle which is an identity except that some
4971/// particular element is repeated. This can be lowered as a masked
4972/// vrgather.vi/vx. Note that the two source form of this is handled
4973/// by the recursive splitting logic and doesn't need special handling.
4975 const RISCVSubtarget &Subtarget,
4976 SelectionDAG &DAG) {
4977
4978 SDLoc DL(SVN);
4979 MVT VT = SVN->getSimpleValueType(0);
4980 SDValue V1 = SVN->getOperand(0);
4981 assert(SVN->getOperand(1).isUndef());
4982 ArrayRef<int> Mask = SVN->getMask();
4983 const unsigned NumElts = VT.getVectorNumElements();
4984 MVT XLenVT = Subtarget.getXLenVT();
4985
4986 std::optional<int> SplatIdx;
4987 for (auto [I, M] : enumerate(Mask)) {
4988 if (M == -1 || I == (unsigned)M)
4989 continue;
4990 if (SplatIdx && *SplatIdx != M)
4991 return SDValue();
4992 SplatIdx = M;
4993 }
4994
4995 if (!SplatIdx)
4996 return SDValue();
4997
4998 SmallVector<SDValue> MaskVals;
4999 for (int MaskIndex : Mask) {
5000 bool SelectMaskVal = MaskIndex == *SplatIdx;
5001 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5002 }
5003 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5004 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5005 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5006 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5007 SmallVector<int>(NumElts, *SplatIdx));
5008 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5009}
5010
5011// Lower the following shuffle to vslidedown.
5012// a)
5013// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5014// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5015// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5016// b)
5017// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5018// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5019// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5020// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5021// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5022// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5024 SDValue V1, SDValue V2,
5025 ArrayRef<int> Mask,
5026 const RISCVSubtarget &Subtarget,
5027 SelectionDAG &DAG) {
5028 auto findNonEXTRACT_SUBVECTORParent =
5029 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5030 uint64_t Offset = 0;
5031 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5032 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5033 // a scalable vector. But we don't want to match the case.
5034 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5035 Offset += Parent.getConstantOperandVal(1);
5036 Parent = Parent.getOperand(0);
5037 }
5038 return std::make_pair(Parent, Offset);
5039 };
5040
5041 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5042 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5043
5044 // Extracting from the same source.
5045 SDValue Src = V1Src;
5046 if (Src != V2Src)
5047 return SDValue();
5048
5049 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5050 SmallVector<int, 16> NewMask(Mask);
5051 for (size_t i = 0; i != NewMask.size(); ++i) {
5052 if (NewMask[i] == -1)
5053 continue;
5054
5055 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5056 NewMask[i] = NewMask[i] + V1IndexOffset;
5057 } else {
5058 // Minus NewMask.size() is needed. Otherwise, the b case would be
5059 // <5,6,7,12> instead of <5,6,7,8>.
5060 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5061 }
5062 }
5063
5064 // First index must be known and non-zero. It will be used as the slidedown
5065 // amount.
5066 if (NewMask[0] <= 0)
5067 return SDValue();
5068
5069 // NewMask is also continuous.
5070 for (unsigned i = 1; i != NewMask.size(); ++i)
5071 if (NewMask[i - 1] + 1 != NewMask[i])
5072 return SDValue();
5073
5074 MVT XLenVT = Subtarget.getXLenVT();
5075 MVT SrcVT = Src.getSimpleValueType();
5076 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5077 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5078 SDValue Slidedown =
5079 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5080 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5081 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5082 return DAG.getExtractSubvector(
5083 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5084}
5085
5086// Because vslideup leaves the destination elements at the start intact, we can
5087// use it to perform shuffles that insert subvectors:
5088//
5089// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5090// ->
5091// vsetvli zero, 8, e8, mf2, ta, ma
5092// vslideup.vi v8, v9, 4
5093//
5094// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5095// ->
5096// vsetvli zero, 5, e8, mf2, tu, ma
5097// vslideup.v1 v8, v9, 2
5099 SDValue V1, SDValue V2,
5100 ArrayRef<int> Mask,
5101 const RISCVSubtarget &Subtarget,
5102 SelectionDAG &DAG) {
5103 unsigned NumElts = VT.getVectorNumElements();
5104 int NumSubElts, Index;
5105 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5106 Index))
5107 return SDValue();
5108
5109 bool OpsSwapped = Mask[Index] < (int)NumElts;
5110 SDValue InPlace = OpsSwapped ? V2 : V1;
5111 SDValue ToInsert = OpsSwapped ? V1 : V2;
5112
5113 MVT XLenVT = Subtarget.getXLenVT();
5114 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5115 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5116 // We slide up by the index that the subvector is being inserted at, and set
5117 // VL to the index + the number of elements being inserted.
5118 unsigned Policy =
5120 // If the we're adding a suffix to the in place vector, i.e. inserting right
5121 // up to the very end of it, then we don't actually care about the tail.
5122 if (NumSubElts + Index >= (int)NumElts)
5123 Policy |= RISCVVType::TAIL_AGNOSTIC;
5124
5125 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5126 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5127 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5128
5129 SDValue Res;
5130 // If we're inserting into the lowest elements, use a tail undisturbed
5131 // vmv.v.v.
5132 if (Index == 0)
5133 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5134 VL);
5135 else
5136 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5137 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5138 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5139}
5140
5141/// Match v(f)slide1up/down idioms. These operations involve sliding
5142/// N-1 elements to make room for an inserted scalar at one end.
5144 SDValue V1, SDValue V2,
5145 ArrayRef<int> Mask,
5146 const RISCVSubtarget &Subtarget,
5147 SelectionDAG &DAG) {
5148 bool OpsSwapped = false;
5149 if (!isa<BuildVectorSDNode>(V1)) {
5150 if (!isa<BuildVectorSDNode>(V2))
5151 return SDValue();
5152 std::swap(V1, V2);
5153 OpsSwapped = true;
5154 }
5155 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5156 if (!Splat)
5157 return SDValue();
5158
5159 // Return true if the mask could describe a slide of Mask.size() - 1
5160 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5161 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5162 const unsigned S = (Offset > 0) ? 0 : -Offset;
5163 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5164 for (unsigned i = S; i != E; ++i)
5165 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5166 return false;
5167 return true;
5168 };
5169
5170 const unsigned NumElts = VT.getVectorNumElements();
5171 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5172 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5173 return SDValue();
5174
5175 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5176 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5177 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5178 return SDValue();
5179
5180 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5181 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5182
5183 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5184 // vslide1{down,up}.vx instead.
5185 if (VT.getVectorElementType() == MVT::bf16 ||
5186 (VT.getVectorElementType() == MVT::f16 &&
5187 !Subtarget.hasVInstructionsF16())) {
5188 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5189 Splat =
5190 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5191 V2 = DAG.getBitcast(
5192 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5193 SDValue Vec = DAG.getNode(
5194 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5195 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5196 Vec = DAG.getBitcast(ContainerVT, Vec);
5197 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5198 }
5199
5200 auto OpCode = IsVSlidedown ?
5201 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5202 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5203 if (!VT.isFloatingPoint())
5204 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5205 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5206 DAG.getUNDEF(ContainerVT),
5207 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5208 Splat, TrueMask, VL);
5209 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5210}
5211
5212/// Match a mask which "spreads" the leading elements of a vector evenly
5213/// across the result. Factor is the spread amount, and Index is the
5214/// offset applied. (on success, Index < Factor) This is the inverse
5215/// of a deinterleave with the same Factor and Index. This is analogous
5216/// to an interleave, except that all but one lane is undef.
5218 unsigned &Index) {
5219 SmallVector<bool> LaneIsUndef(Factor, true);
5220 for (unsigned i = 0; i < Mask.size(); i++)
5221 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5222
5223 bool Found = false;
5224 for (unsigned i = 0; i < Factor; i++) {
5225 if (LaneIsUndef[i])
5226 continue;
5227 if (Found)
5228 return false;
5229 Index = i;
5230 Found = true;
5231 }
5232 if (!Found)
5233 return false;
5234
5235 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5236 unsigned j = i * Factor + Index;
5237 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5238 return false;
5239 }
5240 return true;
5241}
5242
5243static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5244 const SDLoc &DL, SelectionDAG &DAG,
5245 const RISCVSubtarget &Subtarget) {
5246 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5247 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5248 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5250
5251 MVT VT = Op0.getSimpleValueType();
5253 Op0 = DAG.getBitcast(IntVT, Op0);
5254 Op1 = DAG.getBitcast(IntVT, Op1);
5255
5256 MVT ContainerVT = IntVT;
5257 if (VT.isFixedLengthVector()) {
5258 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5259 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5260 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5261 }
5262
5263 MVT InnerVT = ContainerVT;
5264 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5265 if (Op1.isUndef() &&
5266 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5267 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5268 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5269 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5270 Subtarget.getXLenVT());
5271 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5272 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5273 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5274 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5275 }
5276
5277 SDValue Passthru = DAG.getUNDEF(InnerVT);
5278 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5279 if (InnerVT.bitsLT(ContainerVT))
5280 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5281 if (IntVT.isFixedLengthVector())
5282 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5283 Res = DAG.getBitcast(VT, Res);
5284 return Res;
5285}
5286
5287// Given a vector a, b, c, d return a vector Factor times longer
5288// with Factor-1 undef's between elements. Ex:
5289// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5290// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5291static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5292 const SDLoc &DL, SelectionDAG &DAG) {
5293
5294 MVT VT = V.getSimpleValueType();
5295 unsigned EltBits = VT.getScalarSizeInBits();
5297 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5298
5299 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5300
5301 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5302 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5303 // allow the SHL to fold away if Index is 0.
5304 if (Index != 0)
5305 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5306 DAG.getConstant(EltBits * Index, DL, WideVT));
5307 // Make sure to use original element type
5309 EC.multiplyCoefficientBy(Factor));
5310 return DAG.getBitcast(ResultVT, Result);
5311}
5312
5313// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5314// to create an interleaved vector of <[vscale x] n*2 x ty>.
5315// This requires that the size of ty is less than the subtarget's maximum ELEN.
5317 const SDLoc &DL, SelectionDAG &DAG,
5318 const RISCVSubtarget &Subtarget) {
5319
5320 // FIXME: Not only does this optimize the code, it fixes some correctness
5321 // issues because MIR does not have freeze.
5322 if (EvenV.isUndef())
5323 return getWideningSpread(OddV, 2, 1, DL, DAG);
5324 if (OddV.isUndef())
5325 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5326
5327 MVT VecVT = EvenV.getSimpleValueType();
5328 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5329 // Convert fixed vectors to scalable if needed
5330 if (VecContainerVT.isFixedLengthVector()) {
5331 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5332 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5333 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5334 }
5335
5336 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5337
5338 // We're working with a vector of the same size as the resulting
5339 // interleaved vector, but with half the number of elements and
5340 // twice the SEW (Hence the restriction on not using the maximum
5341 // ELEN)
5342 MVT WideVT =
5344 VecVT.getVectorElementCount());
5345 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5346 if (WideContainerVT.isFixedLengthVector())
5347 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5348
5349 // Bitcast the input vectors to integers in case they are FP
5350 VecContainerVT = VecContainerVT.changeTypeToInteger();
5351 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5352 OddV = DAG.getBitcast(VecContainerVT, OddV);
5353
5354 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5355 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5356
5357 SDValue Interleaved;
5358 if (Subtarget.hasStdExtZvbb()) {
5359 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5360 SDValue OffsetVec =
5361 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5362 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5363 OffsetVec, Passthru, Mask, VL);
5364 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5365 Interleaved, EvenV, Passthru, Mask, VL);
5366 } else {
5367 // FIXME: We should freeze the odd vector here. We already handled the case
5368 // of provably undef/poison above.
5369
5370 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5371 // vwaddu.vv
5372 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5373 OddV, Passthru, Mask, VL);
5374
5375 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5376 SDValue AllOnesVec = DAG.getSplatVector(
5377 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5378 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5379 OddV, AllOnesVec, Passthru, Mask, VL);
5380
5381 // Add the two together so we get
5382 // (OddV * 0xff...ff) + (OddV + EvenV)
5383 // = (OddV * 0x100...00) + EvenV
5384 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5385 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5386 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5387 Interleaved, OddsMul, Passthru, Mask, VL);
5388 }
5389
5390 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5391 MVT ResultContainerVT = MVT::getVectorVT(
5392 VecVT.getVectorElementType(), // Make sure to use original type
5393 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5394 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5395
5396 // Convert back to a fixed vector if needed
5397 MVT ResultVT =
5400 if (ResultVT.isFixedLengthVector())
5401 Interleaved =
5402 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5403
5404 return Interleaved;
5405}
5406
5407// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5408// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5410 SelectionDAG &DAG,
5411 const RISCVSubtarget &Subtarget) {
5412 SDLoc DL(SVN);
5413 MVT VT = SVN->getSimpleValueType(0);
5414 SDValue V = SVN->getOperand(0);
5415 unsigned NumElts = VT.getVectorNumElements();
5416
5417 assert(VT.getVectorElementType() == MVT::i1);
5418
5420 SVN->getMask().size()) ||
5421 !SVN->getOperand(1).isUndef())
5422 return SDValue();
5423
5424 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5425 EVT ViaVT = EVT::getVectorVT(
5426 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5427 EVT ViaBitVT =
5428 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5429
5430 // If we don't have zvbb or the larger element type > ELEN, the operation will
5431 // be illegal.
5433 ViaVT) ||
5434 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5435 return SDValue();
5436
5437 // If the bit vector doesn't fit exactly into the larger element type, we need
5438 // to insert it into the larger vector and then shift up the reversed bits
5439 // afterwards to get rid of the gap introduced.
5440 if (ViaEltSize > NumElts)
5441 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5442
5443 SDValue Res =
5444 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5445
5446 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5447 // element type.
5448 if (ViaEltSize > NumElts)
5449 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5450 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5451
5452 Res = DAG.getBitcast(ViaBitVT, Res);
5453
5454 if (ViaEltSize > NumElts)
5455 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5456 return Res;
5457}
5458
5460 const RISCVSubtarget &Subtarget,
5461 MVT &RotateVT, unsigned &RotateAmt) {
5462 unsigned NumElts = VT.getVectorNumElements();
5463 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5464 unsigned NumSubElts;
5465 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5466 NumElts, NumSubElts, RotateAmt))
5467 return false;
5468 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5469 NumElts / NumSubElts);
5470
5471 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5472 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5473}
5474
5475// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5476// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5477// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5479 SelectionDAG &DAG,
5480 const RISCVSubtarget &Subtarget) {
5481 SDLoc DL(SVN);
5482
5483 EVT VT = SVN->getValueType(0);
5484 unsigned RotateAmt;
5485 MVT RotateVT;
5486 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5487 return SDValue();
5488
5489 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5490
5491 SDValue Rotate;
5492 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5493 // so canonicalize to vrev8.
5494 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5495 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5496 else
5497 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5498 DAG.getConstant(RotateAmt, DL, RotateVT));
5499
5500 return DAG.getBitcast(VT, Rotate);
5501}
5502
5503// If compiling with an exactly known VLEN, see if we can split a
5504// shuffle on m2 or larger into a small number of m1 sized shuffles
5505// which write each destination registers exactly once.
5507 SelectionDAG &DAG,
5508 const RISCVSubtarget &Subtarget) {
5509 SDLoc DL(SVN);
5510 MVT VT = SVN->getSimpleValueType(0);
5511 SDValue V1 = SVN->getOperand(0);
5512 SDValue V2 = SVN->getOperand(1);
5513 ArrayRef<int> Mask = SVN->getMask();
5514
5515 // If we don't know exact data layout, not much we can do. If this
5516 // is already m1 or smaller, no point in splitting further.
5517 const auto VLen = Subtarget.getRealVLen();
5518 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5519 return SDValue();
5520
5521 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5522 // expansion for.
5523 unsigned RotateAmt;
5524 MVT RotateVT;
5525 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5526 return SDValue();
5527
5528 MVT ElemVT = VT.getVectorElementType();
5529 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5530
5531 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5532 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5533 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5534 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5535 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5536 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5537 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5538 unsigned NumOfDestRegs = NumElts / NumOpElts;
5539 // The following semantically builds up a fixed length concat_vector
5540 // of the component shuffle_vectors. We eagerly lower to scalable here
5541 // to avoid DAG combining it back to a large shuffle_vector again.
5542 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5543 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5545 Operands;
5547 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5548 [&]() { Operands.emplace_back(); },
5549 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5550 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5551 SmallVector<int>(SrcSubMask));
5552 },
5553 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5554 if (NewReg)
5555 Operands.emplace_back();
5556 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5557 });
5558 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5559 // Note: check that we do not emit too many shuffles here to prevent code
5560 // size explosion.
5561 // TODO: investigate, if it can be improved by extra analysis of the masks to
5562 // check if the code is more profitable.
5563 unsigned NumShuffles = std::accumulate(
5564 Operands.begin(), Operands.end(), 0u,
5565 [&](unsigned N,
5566 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5567 if (Data.empty())
5568 return N;
5569 N += Data.size();
5570 for (const auto &P : Data) {
5571 unsigned Idx2 = std::get<1>(P);
5572 ArrayRef<int> Mask = std::get<2>(P);
5573 if (Idx2 != UINT_MAX)
5574 ++N;
5575 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5576 --N;
5577 }
5578 return N;
5579 });
5580 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5581 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5582 return SDValue();
5583 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5584 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5585 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5586 return SubVec;
5587 };
5588 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5590 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5591 return SubVec;
5592 };
5593 SDValue Vec = DAG.getUNDEF(ContainerVT);
5594 for (auto [I, Data] : enumerate(Operands)) {
5595 if (Data.empty())
5596 continue;
5598 for (unsigned I : seq<unsigned>(Data.size())) {
5599 const auto &[Idx1, Idx2, _] = Data[I];
5600 // If the shuffle contains permutation of odd number of elements,
5601 // Idx1 might be used already in the first iteration.
5602 //
5603 // Idx1 = shuffle Idx1, Idx2
5604 // Idx1 = shuffle Idx1, Idx3
5605 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5606 if (!V)
5607 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5608 (Idx1 % NumOfSrcRegs) * NumOpElts);
5609 if (Idx2 != UINT_MAX) {
5610 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5611 if (!V)
5612 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5613 (Idx2 % NumOfSrcRegs) * NumOpElts);
5614 }
5615 }
5616 SDValue V;
5617 for (const auto &[Idx1, Idx2, Mask] : Data) {
5618 SDValue V1 = Values.at(Idx1);
5619 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5620 V = PerformShuffle(V1, V2, Mask);
5621 Values[Idx1] = V;
5622 }
5623
5624 unsigned InsertIdx = I * NumOpElts;
5625 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5626 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5627 }
5628 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5629}
5630
5631// Matches a subset of compress masks with a contiguous prefix of output
5632// elements. This could be extended to allow gaps by deciding which
5633// source elements to spuriously demand.
5635 int Last = -1;
5636 bool SawUndef = false;
5637 for (const auto &[Idx, M] : enumerate(Mask)) {
5638 if (M == -1) {
5639 SawUndef = true;
5640 continue;
5641 }
5642 if (SawUndef)
5643 return false;
5644 if (Idx > (unsigned)M)
5645 return false;
5646 if (M <= Last)
5647 return false;
5648 Last = M;
5649 }
5650 return true;
5651}
5652
5653/// Given a shuffle where the indices are disjoint between the two sources,
5654/// e.g.:
5655///
5656/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5657///
5658/// Merge the two sources into one and do a single source shuffle:
5659///
5660/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5661/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5662///
5663/// A vselect will either be merged into a masked instruction or be lowered as a
5664/// vmerge.vvm, which is cheaper than a vrgather.vv.
5666 SelectionDAG &DAG,
5667 const RISCVSubtarget &Subtarget) {
5668 MVT VT = SVN->getSimpleValueType(0);
5669 MVT XLenVT = Subtarget.getXLenVT();
5670 SDLoc DL(SVN);
5671
5672 const ArrayRef<int> Mask = SVN->getMask();
5673
5674 // Work out which source each lane will come from.
5675 SmallVector<int, 16> Srcs(Mask.size(), -1);
5676
5677 for (int Idx : Mask) {
5678 if (Idx == -1)
5679 continue;
5680 unsigned SrcIdx = Idx % Mask.size();
5681 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5682 if (Srcs[SrcIdx] == -1)
5683 // Mark this source as using this lane.
5684 Srcs[SrcIdx] = Src;
5685 else if (Srcs[SrcIdx] != Src)
5686 // The other source is using this lane: not disjoint.
5687 return SDValue();
5688 }
5689
5690 SmallVector<SDValue> SelectMaskVals;
5691 for (int Lane : Srcs) {
5692 if (Lane == -1)
5693 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5694 else
5695 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5696 }
5697 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5698 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5699 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5700 SVN->getOperand(0), SVN->getOperand(1));
5701
5702 // Move all indices relative to the first source.
5703 SmallVector<int> NewMask(Mask.size());
5704 for (unsigned I = 0; I < Mask.size(); I++) {
5705 if (Mask[I] == -1)
5706 NewMask[I] = -1;
5707 else
5708 NewMask[I] = Mask[I] % Mask.size();
5709 }
5710
5711 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5712}
5713
5714/// Is this mask local (i.e. elements only move within their local span), and
5715/// repeating (that is, the same rearrangement is being done within each span)?
5716static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5717 // Require a prefix from the original mask until the consumer code
5718 // is adjusted to rewrite the mask instead of just taking a prefix.
5719 for (auto [I, M] : enumerate(Mask)) {
5720 if (M == -1)
5721 continue;
5722 if ((M / Span) != (int)(I / Span))
5723 return false;
5724 int SpanIdx = I % Span;
5725 int Expected = M % Span;
5726 if (Mask[SpanIdx] != Expected)
5727 return false;
5728 }
5729 return true;
5730}
5731
5732/// Is this mask only using elements from the first span of the input?
5733static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5734 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5735}
5736
5737/// Return true for a mask which performs an arbitrary shuffle within the first
5738/// span, and then repeats that same result across all remaining spans. Note
5739/// that this doesn't check if all the inputs come from a single span!
5740static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5741 // Require a prefix from the original mask until the consumer code
5742 // is adjusted to rewrite the mask instead of just taking a prefix.
5743 for (auto [I, M] : enumerate(Mask)) {
5744 if (M == -1)
5745 continue;
5746 int SpanIdx = I % Span;
5747 if (Mask[SpanIdx] != M)
5748 return false;
5749 }
5750 return true;
5751}
5752
5753/// Try to widen element type to get a new mask value for a better permutation
5754/// sequence. This doesn't try to inspect the widened mask for profitability;
5755/// we speculate the widened form is equal or better. This has the effect of
5756/// reducing mask constant sizes - allowing cheaper materialization sequences
5757/// - and index sequence sizes - reducing register pressure and materialization
5758/// cost, at the cost of (possibly) an extra VTYPE toggle.
5760 SDLoc DL(Op);
5761 MVT VT = Op.getSimpleValueType();
5762 MVT ScalarVT = VT.getVectorElementType();
5763 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5764 SDValue V0 = Op.getOperand(0);
5765 SDValue V1 = Op.getOperand(1);
5766 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5767
5768 // Avoid wasted work leading to isTypeLegal check failing below
5769 if (ElementSize > 32)
5770 return SDValue();
5771
5772 SmallVector<int, 8> NewMask;
5773 if (!widenShuffleMaskElts(Mask, NewMask))
5774 return SDValue();
5775
5776 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5777 : MVT::getIntegerVT(ElementSize * 2);
5778 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5779 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5780 return SDValue();
5781 V0 = DAG.getBitcast(NewVT, V0);
5782 V1 = DAG.getBitcast(NewVT, V1);
5783 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5784}
5785
5787 const RISCVSubtarget &Subtarget) {
5788 SDValue V1 = Op.getOperand(0);
5789 SDValue V2 = Op.getOperand(1);
5790 SDLoc DL(Op);
5791 MVT XLenVT = Subtarget.getXLenVT();
5792 MVT VT = Op.getSimpleValueType();
5793 unsigned NumElts = VT.getVectorNumElements();
5795
5796 if (VT.getVectorElementType() == MVT::i1) {
5797 // Lower to a vror.vi of a larger element type if possible before we promote
5798 // i1s to i8s.
5799 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5800 return V;
5801 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5802 return V;
5803
5804 // Promote i1 shuffle to i8 shuffle.
5805 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5806 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5807 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5808 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5809 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5810 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5811 ISD::SETNE);
5812 }
5813
5814 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5815
5816 // Store the return value in a single variable instead of structured bindings
5817 // so that we can pass it to GetSlide below, which cannot capture structured
5818 // bindings until C++20.
5819 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5820 auto [TrueMask, VL] = TrueMaskVL;
5821
5822 if (SVN->isSplat()) {
5823 const int Lane = SVN->getSplatIndex();
5824 if (Lane >= 0) {
5825 MVT SVT = VT.getVectorElementType();
5826
5827 // Turn splatted vector load into a strided load with an X0 stride.
5828 SDValue V = V1;
5829 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5830 // with undef.
5831 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5832 int Offset = Lane;
5833 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5834 int OpElements =
5835 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5836 V = V.getOperand(Offset / OpElements);
5837 Offset %= OpElements;
5838 }
5839
5840 // We need to ensure the load isn't atomic or volatile.
5841 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5842 auto *Ld = cast<LoadSDNode>(V);
5843 Offset *= SVT.getStoreSize();
5844 SDValue NewAddr = DAG.getMemBasePlusOffset(
5845 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5846
5847 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5848 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5849 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5850 SDValue IntID =
5851 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5852 SDValue Ops[] = {Ld->getChain(),
5853 IntID,
5854 DAG.getUNDEF(ContainerVT),
5855 NewAddr,
5856 DAG.getRegister(RISCV::X0, XLenVT),
5857 VL};
5858 SDValue NewLoad = DAG.getMemIntrinsicNode(
5859 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5861 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5862 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5863 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5864 }
5865
5866 MVT SplatVT = ContainerVT;
5867
5868 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5869 if (SVT == MVT::bf16 ||
5870 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5871 SVT = MVT::i16;
5872 SplatVT = ContainerVT.changeVectorElementType(SVT);
5873 }
5874
5875 // Otherwise use a scalar load and splat. This will give the best
5876 // opportunity to fold a splat into the operation. ISel can turn it into
5877 // the x0 strided load if we aren't able to fold away the select.
5878 if (SVT.isFloatingPoint())
5879 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5880 Ld->getPointerInfo().getWithOffset(Offset),
5881 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5882 else
5883 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5884 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5885 Ld->getBaseAlign(),
5886 Ld->getMemOperand()->getFlags());
5888
5889 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5890 : RISCVISD::VMV_V_X_VL;
5891 SDValue Splat =
5892 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5893 Splat = DAG.getBitcast(ContainerVT, Splat);
5894 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5895 }
5896
5897 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5898 assert(Lane < (int)NumElts && "Unexpected lane!");
5899 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5900 V1, DAG.getConstant(Lane, DL, XLenVT),
5901 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5902 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5903 }
5904 }
5905
5906 // For exact VLEN m2 or greater, try to split to m1 operations if we
5907 // can split cleanly.
5908 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5909 return V;
5910
5911 ArrayRef<int> Mask = SVN->getMask();
5912
5913 if (SDValue V =
5914 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5915 return V;
5916
5917 if (SDValue V =
5918 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5919 return V;
5920
5921 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5922 // available.
5923 if (Subtarget.hasStdExtZvkb())
5924 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5925 return V;
5926
5927 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5928 NumElts != 2)
5929 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5930
5931 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5932 // use shift and truncate to perform the shuffle.
5933 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5934 // shift-and-trunc reducing total cost for everything except an mf8 result.
5935 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5936 // to do the entire operation.
5937 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5938 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5939 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5940 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5941 unsigned Index = 0;
5942 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5943 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5944 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5945 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5946 if (1 < count_if(Mask,
5947 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5948 1 < count_if(Mask, [&Mask](int Idx) {
5949 return Idx >= (int)Mask.size();
5950 })) {
5951 // Narrow each source and concatenate them.
5952 // FIXME: For small LMUL it is better to concatenate first.
5953 MVT EltVT = VT.getVectorElementType();
5954 auto EltCnt = VT.getVectorElementCount();
5955 MVT SubVT =
5956 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5957
5958 SDValue Lo =
5959 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5960 SDValue Hi =
5961 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5962
5963 SDValue Concat =
5966 if (Factor == 2)
5967 return Concat;
5968
5969 SDValue Vec = DAG.getUNDEF(VT);
5970 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5971 }
5972 }
5973 }
5974 }
5975
5976 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5977 // e64 which can't match above.
5978 unsigned Index = 0;
5979 if (Subtarget.hasVendorXRivosVizip() &&
5981 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5982 unsigned Opc =
5983 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5984 if (V2.isUndef())
5985 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5986 if (auto VLEN = Subtarget.getRealVLen();
5987 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5988 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5989 if (SDValue Src = foldConcatVector(V1, V2)) {
5990 EVT NewVT = VT.getDoubleNumVectorElementsVT();
5991 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
5992 SDValue Res =
5993 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
5994 return DAG.getExtractSubvector(DL, VT, Res, 0);
5995 }
5996 // Deinterleave each source and concatenate them, or concat first, then
5997 // deinterleave.
5998 if (1 < count_if(Mask,
5999 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6000 1 < count_if(Mask,
6001 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6002
6003 const unsigned EltSize = VT.getScalarSizeInBits();
6004 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6005 if (NumElts < MinVLMAX) {
6006 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6007 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6008 SDValue Res =
6009 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6010 return DAG.getExtractSubvector(DL, VT, Res, 0);
6011 }
6012
6013 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6014 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6015
6016 MVT SubVT = VT.getHalfNumVectorElementsVT();
6017 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6018 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6019 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6020 }
6021 }
6022
6023 if (SDValue V =
6024 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6025 return V;
6026
6027 // Detect an interleave shuffle and lower to
6028 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6029 int EvenSrc, OddSrc;
6030 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6031 !(NumElts == 2 &&
6032 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6033 // Extract the halves of the vectors.
6034 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6035
6036 // Recognize if one half is actually undef; the matching above will
6037 // otherwise reuse the even stream for the undef one. This improves
6038 // spread(2) shuffles.
6039 bool LaneIsUndef[2] = { true, true};
6040 for (const auto &[Idx, M] : enumerate(Mask))
6041 LaneIsUndef[Idx % 2] &= (M == -1);
6042
6043 int Size = Mask.size();
6044 SDValue EvenV, OddV;
6045 if (LaneIsUndef[0]) {
6046 EvenV = DAG.getUNDEF(HalfVT);
6047 } else {
6048 assert(EvenSrc >= 0 && "Undef source?");
6049 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6050 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6051 }
6052
6053 if (LaneIsUndef[1]) {
6054 OddV = DAG.getUNDEF(HalfVT);
6055 } else {
6056 assert(OddSrc >= 0 && "Undef source?");
6057 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6058 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6059 }
6060
6061 // Prefer vzip2a if available.
6062 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6063 if (Subtarget.hasVendorXRivosVizip()) {
6064 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6065 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6066 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6067 }
6068 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6069 }
6070
6071 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6072 // instructions (in any combination) with masking on the second instruction.
6073 // Also handles masked slides into an identity source, and single slides
6074 // without masking. Avoid matching bit rotates (which are not also element
6075 // rotates) as slide pairs. This is a performance heuristic, not a
6076 // functional check.
6077 std::array<std::pair<int, int>, 2> SrcInfo;
6078 unsigned RotateAmt;
6079 MVT RotateVT;
6080 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6081 (isElementRotate(SrcInfo, NumElts) ||
6082 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6083 SDValue Sources[2];
6084 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6085 int SrcIdx = Info.first;
6086 assert(SrcIdx == 0 || SrcIdx == 1);
6087 SDValue &Src = Sources[SrcIdx];
6088 if (!Src) {
6089 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6090 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6091 }
6092 return Src;
6093 };
6094 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6095 SDValue Passthru) {
6096 auto [TrueMask, VL] = TrueMaskVL;
6097 SDValue SrcV = GetSourceFor(Src);
6098 int SlideAmt = Src.second;
6099 if (SlideAmt == 0) {
6100 // Should never be second operation
6101 assert(Mask == TrueMask);
6102 return SrcV;
6103 }
6104 if (SlideAmt < 0)
6105 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6106 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6108 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6109 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6111 };
6112
6113 if (SrcInfo[1].first == -1) {
6114 SDValue Res = DAG.getUNDEF(ContainerVT);
6115 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6116 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6117 }
6118
6119 if (Subtarget.hasVendorXRivosVizip()) {
6120 bool TryWiden = false;
6121 unsigned Factor;
6122 if (isZipEven(SrcInfo, Mask, Factor)) {
6123 if (Factor == 1) {
6124 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6125 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6126 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6127 Subtarget);
6128 }
6129 TryWiden = true;
6130 }
6131 if (isZipOdd(SrcInfo, Mask, Factor)) {
6132 if (Factor == 1) {
6133 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6134 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6135 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6136 Subtarget);
6137 }
6138 TryWiden = true;
6139 }
6140 // If we found a widening oppurtunity which would let us form a
6141 // zipeven or zipodd, use the generic code to widen the shuffle
6142 // and recurse through this logic.
6143 if (TryWiden)
6144 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6145 return V;
6146 }
6147
6148 // Build the mask. Note that vslideup unconditionally preserves elements
6149 // below the slide amount in the destination, and thus those elements are
6150 // undefined in the mask. If the mask ends up all true (or undef), it
6151 // will be folded away by general logic.
6152 SmallVector<SDValue> MaskVals;
6153 for (const auto &[Idx, M] : enumerate(Mask)) {
6154 if (M < 0 ||
6155 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6156 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6157 continue;
6158 }
6159 int Src = M >= (int)NumElts;
6160 int Diff = (int)Idx - (M % NumElts);
6161 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6162 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6163 "Must match exactly one of the two slides");
6164 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6165 }
6166 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6167 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6168 SDValue SelectMask = convertToScalableVector(
6169 ContainerVT.changeVectorElementType(MVT::i1),
6170 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6171
6172 SDValue Res = DAG.getUNDEF(ContainerVT);
6173 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6174 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6175 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6176 }
6177
6178 // Handle any remaining single source shuffles
6179 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6180 if (V2.isUndef()) {
6181 // We might be able to express the shuffle as a bitrotate. But even if we
6182 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6183 // shifts and a vor will have a higher throughput than a vrgather.
6184 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6185 return V;
6186
6187 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6188 return V;
6189
6190 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6191 // is fully covered in interleave(2) above, so it is ignored here.
6192 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6193 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6194 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6195 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6196 unsigned Index;
6197 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6198 MVT NarrowVT =
6199 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6200 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6201 return getWideningSpread(Src, Factor, Index, DL, DAG);
6202 }
6203 }
6204 }
6205
6206 // If only a prefix of the source elements influence a prefix of the
6207 // destination elements, try to see if we can reduce the required LMUL
6208 unsigned MinVLen = Subtarget.getRealMinVLen();
6209 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6210 if (NumElts > MinVLMAX) {
6211 unsigned MaxIdx = 0;
6212 for (auto [I, M] : enumerate(Mask)) {
6213 if (M == -1)
6214 continue;
6215 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6216 }
6217 unsigned NewNumElts =
6218 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6219 if (NewNumElts != NumElts) {
6220 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6221 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6222 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6223 Mask.take_front(NewNumElts));
6224 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6225 }
6226 }
6227
6228 // Before hitting generic lowering fallbacks, try to widen the mask
6229 // to a wider SEW.
6230 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6231 return V;
6232
6233 // Can we generate a vcompress instead of a vrgather? These scale better
6234 // at high LMUL, at the cost of not being able to fold a following select
6235 // into them. The mask constants are also smaller than the index vector
6236 // constants, and thus easier to materialize.
6237 if (isCompressMask(Mask)) {
6238 SmallVector<SDValue> MaskVals(NumElts,
6239 DAG.getConstant(false, DL, XLenVT));
6240 for (auto Idx : Mask) {
6241 if (Idx == -1)
6242 break;
6243 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6244 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6245 }
6246 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6247 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6248 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6249 DAG.getUNDEF(VT));
6250 }
6251
6252 if (VT.getScalarSizeInBits() == 8 &&
6253 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6254 // On such a vector we're unable to use i8 as the index type.
6255 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6256 // may involve vector splitting if we're already at LMUL=8, or our
6257 // user-supplied maximum fixed-length LMUL.
6258 return SDValue();
6259 }
6260
6261 // Base case for the two operand recursion below - handle the worst case
6262 // single source shuffle.
6263 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6264 MVT IndexVT = VT.changeTypeToInteger();
6265 // Since we can't introduce illegal index types at this stage, use i16 and
6266 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6267 // than XLenVT.
6268 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6269 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6270 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6271 }
6272
6273 // If the mask allows, we can do all the index computation in 16 bits. This
6274 // requires less work and less register pressure at high LMUL, and creates
6275 // smaller constants which may be cheaper to materialize.
6276 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6277 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6278 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6279 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6280 }
6281
6282 MVT IndexContainerVT =
6283 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6284
6285 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6286 SmallVector<SDValue> GatherIndicesLHS;
6287 for (int MaskIndex : Mask) {
6288 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6289 GatherIndicesLHS.push_back(IsLHSIndex
6290 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6291 : DAG.getUNDEF(XLenVT));
6292 }
6293 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6294 LHSIndices =
6295 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6296 // At m1 and less, there's no point trying any of the high LMUL splitting
6297 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6298 if (NumElts <= MinVLMAX) {
6299 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6300 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6301 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6302 }
6303
6304 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6305 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6306 auto [InnerTrueMask, InnerVL] =
6307 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6308 int N =
6309 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6310 assert(isPowerOf2_32(N) && N <= 8);
6311
6312 // If we have a locally repeating mask, then we can reuse the first
6313 // register in the index register group for all registers within the
6314 // source register group. TODO: This generalizes to m2, and m4.
6315 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6316 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6317 SDValue Gather = DAG.getUNDEF(ContainerVT);
6318 for (int i = 0; i < N; i++) {
6319 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6320 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6321 SDValue SubVec =
6322 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6323 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6324 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6325 }
6326 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6327 }
6328
6329 // If we have a shuffle which only uses the first register in our source
6330 // register group, and repeats the same index across all spans, we can
6331 // use a single vrgather (and possibly some register moves).
6332 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6333 // which we can do a linear number of shuffles to form an m1 which
6334 // contains all the output elements.
6335 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6336 isSpanSplatShuffle(Mask, MinVLMAX)) {
6337 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6338 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6339 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6340 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6341 SDValue Gather = DAG.getUNDEF(ContainerVT);
6342 for (int i = 0; i < N; i++)
6343 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6344 M1VT.getVectorMinNumElements() * i);
6345 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6346 }
6347
6348 // If we have a shuffle which only uses the first register in our
6349 // source register group, we can do a linear number of m1 vrgathers
6350 // reusing the same source register (but with different indices)
6351 // TODO: This can be generalized for m2 or m4, or for any shuffle
6352 // for which we can do a vslidedown followed by this expansion.
6353 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6354 SDValue SlideAmt =
6355 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6356 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6357 SDValue Gather = DAG.getUNDEF(ContainerVT);
6358 for (int i = 0; i < N; i++) {
6359 if (i != 0)
6360 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6361 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6362 SlideAmt, TrueMask, VL);
6363 SDValue SubIndex =
6364 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6365 SDValue SubVec =
6366 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6367 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6368 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6369 M1VT.getVectorMinNumElements() * i);
6370 }
6371 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6372 }
6373
6374 // Fallback to generic vrgather if we can't find anything better.
6375 // On many machines, this will be O(LMUL^2)
6376 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6377 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6378 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6379 }
6380
6381 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6382 // merged with a second vrgather.
6383 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6384
6385 // Now construct the mask that will be used by the blended vrgather operation.
6386 // Construct the appropriate indices into each vector.
6387 for (int MaskIndex : Mask) {
6388 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6389 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6390 ? MaskIndex : -1);
6391 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6392 }
6393
6394 // If the mask indices are disjoint between the two sources, we can lower it
6395 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6396 // operands may end up being lowered to something cheaper than a vrgather.vv.
6397 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6398 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6399 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6400 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6401 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6402 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6403 return V;
6404
6405 // Before hitting generic lowering fallbacks, try to widen the mask
6406 // to a wider SEW.
6407 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6408 return V;
6409
6410 // Try to pick a profitable operand order.
6411 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6412 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6413
6414 // Recursively invoke lowering for each operand if we had two
6415 // independent single source shuffles, and then combine the result via a
6416 // vselect. Note that the vselect will likely be folded back into the
6417 // second permute (vrgather, or other) by the post-isel combine.
6418 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6419 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6420
6421 SmallVector<SDValue> MaskVals;
6422 for (int MaskIndex : Mask) {
6423 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6424 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6425 }
6426
6427 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6428 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6429 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6430
6431 if (SwapOps)
6432 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6433 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6434}
6435
6437 // Only support legal VTs for other shuffles for now.
6438 if (!isTypeLegal(VT))
6439 return false;
6440
6441 // Support splats for any type. These should type legalize well.
6443 return true;
6444
6445 const unsigned NumElts = M.size();
6446 MVT SVT = VT.getSimpleVT();
6447
6448 // Not for i1 vectors.
6449 if (SVT.getScalarType() == MVT::i1)
6450 return false;
6451
6452 std::array<std::pair<int, int>, 2> SrcInfo;
6453 int Dummy1, Dummy2;
6454 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6455 (::isMaskedSlidePair(M, SrcInfo) &&
6456 isElementRotate(SrcInfo, NumElts)) ||
6457 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6458}
6459
6460// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6461// the exponent.
6462SDValue
6463RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6464 SelectionDAG &DAG) const {
6465 MVT VT = Op.getSimpleValueType();
6466 unsigned EltSize = VT.getScalarSizeInBits();
6467 SDValue Src = Op.getOperand(0);
6468 SDLoc DL(Op);
6469 MVT ContainerVT = VT;
6470
6471 SDValue Mask, VL;
6472 if (Op->isVPOpcode()) {
6473 Mask = Op.getOperand(1);
6474 if (VT.isFixedLengthVector())
6475 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6476 Subtarget);
6477 VL = Op.getOperand(2);
6478 }
6479
6480 // We choose FP type that can represent the value if possible. Otherwise, we
6481 // use rounding to zero conversion for correct exponent of the result.
6482 // TODO: Use f16 for i8 when possible?
6483 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6484 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6485 FloatEltVT = MVT::f32;
6486 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6487
6488 // Legal types should have been checked in the RISCVTargetLowering
6489 // constructor.
6490 // TODO: Splitting may make sense in some cases.
6491 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6492 "Expected legal float type!");
6493
6494 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6495 // The trailing zero count is equal to log2 of this single bit value.
6496 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6497 SDValue Neg = DAG.getNegative(Src, DL, VT);
6498 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6499 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6500 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6501 Src, Mask, VL);
6502 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6503 }
6504
6505 // We have a legal FP type, convert to it.
6506 SDValue FloatVal;
6507 if (FloatVT.bitsGT(VT)) {
6508 if (Op->isVPOpcode())
6509 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6510 else
6511 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6512 } else {
6513 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6514 if (VT.isFixedLengthVector()) {
6515 ContainerVT = getContainerForFixedLengthVector(VT);
6516 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6517 }
6518 if (!Op->isVPOpcode())
6519 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6520 SDValue RTZRM =
6521 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6522 MVT ContainerFloatVT =
6523 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6524 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6525 Src, Mask, RTZRM, VL);
6526 if (VT.isFixedLengthVector())
6527 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6528 }
6529 // Bitcast to integer and shift the exponent to the LSB.
6530 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6531 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6532 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6533
6534 SDValue Exp;
6535 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6536 if (Op->isVPOpcode()) {
6537 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6538 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6539 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6540 } else {
6541 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6542 DAG.getConstant(ShiftAmt, DL, IntVT));
6543 if (IntVT.bitsLT(VT))
6544 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6545 else if (IntVT.bitsGT(VT))
6546 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6547 }
6548
6549 // The exponent contains log2 of the value in biased form.
6550 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6551 // For trailing zeros, we just need to subtract the bias.
6552 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6553 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6554 DAG.getConstant(ExponentBias, DL, VT));
6555 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6556 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6557 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6558
6559 // For leading zeros, we need to remove the bias and convert from log2 to
6560 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6561 unsigned Adjust = ExponentBias + (EltSize - 1);
6562 SDValue Res;
6563 if (Op->isVPOpcode())
6564 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6565 Mask, VL);
6566 else
6567 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6568
6569 // The above result with zero input equals to Adjust which is greater than
6570 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6571 if (Op.getOpcode() == ISD::CTLZ)
6572 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6573 else if (Op.getOpcode() == ISD::VP_CTLZ)
6574 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6575 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6576 return Res;
6577}
6578
6579SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6580 SelectionDAG &DAG) const {
6581 SDLoc DL(Op);
6582 MVT XLenVT = Subtarget.getXLenVT();
6583 SDValue Source = Op->getOperand(0);
6584 MVT SrcVT = Source.getSimpleValueType();
6585 SDValue Mask = Op->getOperand(1);
6586 SDValue EVL = Op->getOperand(2);
6587
6588 if (SrcVT.isFixedLengthVector()) {
6589 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6590 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6591 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6592 Subtarget);
6593 SrcVT = ContainerVT;
6594 }
6595
6596 // Convert to boolean vector.
6597 if (SrcVT.getScalarType() != MVT::i1) {
6598 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6599 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6600 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6601 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6602 DAG.getUNDEF(SrcVT), Mask, EVL});
6603 }
6604
6605 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6606 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6607 // In this case, we can interpret poison as -1, so nothing to do further.
6608 return Res;
6609
6610 // Convert -1 to VL.
6611 SDValue SetCC =
6612 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6613 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6614 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6615}
6616
6617// While RVV has alignment restrictions, we should always be able to load as a
6618// legal equivalently-sized byte-typed vector instead. This method is
6619// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6620// the load is already correctly-aligned, it returns SDValue().
6621SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6622 SelectionDAG &DAG) const {
6623 auto *Load = cast<LoadSDNode>(Op);
6624 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6625
6627 Load->getMemoryVT(),
6628 *Load->getMemOperand()))
6629 return SDValue();
6630
6631 SDLoc DL(Op);
6632 MVT VT = Op.getSimpleValueType();
6633 unsigned EltSizeBits = VT.getScalarSizeInBits();
6634 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6635 "Unexpected unaligned RVV load type");
6636 MVT NewVT =
6637 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6638 assert(NewVT.isValid() &&
6639 "Expecting equally-sized RVV vector types to be legal");
6640 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6641 Load->getPointerInfo(), Load->getBaseAlign(),
6642 Load->getMemOperand()->getFlags());
6643 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6644}
6645
6646// While RVV has alignment restrictions, we should always be able to store as a
6647// legal equivalently-sized byte-typed vector instead. This method is
6648// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6649// returns SDValue() if the store is already correctly aligned.
6650SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6651 SelectionDAG &DAG) const {
6652 auto *Store = cast<StoreSDNode>(Op);
6653 assert(Store && Store->getValue().getValueType().isVector() &&
6654 "Expected vector store");
6655
6657 Store->getMemoryVT(),
6658 *Store->getMemOperand()))
6659 return SDValue();
6660
6661 SDLoc DL(Op);
6662 SDValue StoredVal = Store->getValue();
6663 MVT VT = StoredVal.getSimpleValueType();
6664 unsigned EltSizeBits = VT.getScalarSizeInBits();
6665 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6666 "Unexpected unaligned RVV store type");
6667 MVT NewVT =
6668 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6669 assert(NewVT.isValid() &&
6670 "Expecting equally-sized RVV vector types to be legal");
6671 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6672 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6673 Store->getPointerInfo(), Store->getBaseAlign(),
6674 Store->getMemOperand()->getFlags());
6675}
6676
6678 const RISCVSubtarget &Subtarget) {
6679 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6680
6681 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6682
6683 // All simm32 constants should be handled by isel.
6684 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6685 // this check redundant, but small immediates are common so this check
6686 // should have better compile time.
6687 if (isInt<32>(Imm))
6688 return Op;
6689
6690 // We only need to cost the immediate, if constant pool lowering is enabled.
6691 if (!Subtarget.useConstantPoolForLargeInts())
6692 return Op;
6693
6695 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6696 return Op;
6697
6698 // Optimizations below are disabled for opt size. If we're optimizing for
6699 // size, use a constant pool.
6700 if (DAG.shouldOptForSize())
6701 return SDValue();
6702
6703 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6704 // that if it will avoid a constant pool.
6705 // It will require an extra temporary register though.
6706 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6707 // low and high 32 bits are the same and bit 31 and 63 are set.
6708 unsigned ShiftAmt, AddOpc;
6709 RISCVMatInt::InstSeq SeqLo =
6710 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6711 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6712 return Op;
6713
6714 return SDValue();
6715}
6716
6717SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6718 SelectionDAG &DAG) const {
6719 MVT VT = Op.getSimpleValueType();
6720 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6721
6722 // Can this constant be selected by a Zfa FLI instruction?
6723 bool Negate = false;
6724 int Index = getLegalZfaFPImm(Imm, VT);
6725
6726 // If the constant is negative, try negating.
6727 if (Index < 0 && Imm.isNegative()) {
6728 Index = getLegalZfaFPImm(-Imm, VT);
6729 Negate = true;
6730 }
6731
6732 // If we couldn't find a FLI lowering, fall back to generic code.
6733 if (Index < 0)
6734 return SDValue();
6735
6736 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6737 SDLoc DL(Op);
6738 SDValue Const =
6739 DAG.getNode(RISCVISD::FLI, DL, VT,
6740 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6741 if (!Negate)
6742 return Const;
6743
6744 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6745}
6746
6748 SelectionDAG &DAG) {
6749
6750 unsigned IsData = Op.getConstantOperandVal(4);
6751
6752 // mips-p8700 we support data prefetch for now.
6753 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6754 return Op.getOperand(0);
6755 return Op;
6756}
6757
6759 const RISCVSubtarget &Subtarget) {
6760 SDLoc dl(Op);
6761 AtomicOrdering FenceOrdering =
6762 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6763 SyncScope::ID FenceSSID =
6764 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6765
6766 if (Subtarget.hasStdExtZtso()) {
6767 // The only fence that needs an instruction is a sequentially-consistent
6768 // cross-thread fence.
6769 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6770 FenceSSID == SyncScope::System)
6771 return Op;
6772
6773 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6774 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6775 }
6776
6777 // singlethread fences only synchronize with signal handlers on the same
6778 // thread and thus only need to preserve instruction order, not actually
6779 // enforce memory ordering.
6780 if (FenceSSID == SyncScope::SingleThread)
6781 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6782 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6783
6784 return Op;
6785}
6786
6787SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6788 SelectionDAG &DAG) const {
6789 SDLoc DL(Op);
6790 MVT VT = Op.getSimpleValueType();
6791 MVT XLenVT = Subtarget.getXLenVT();
6792 unsigned Check = Op.getConstantOperandVal(1);
6793 unsigned TDCMask = 0;
6794 if (Check & fcSNan)
6795 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6796 if (Check & fcQNan)
6797 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6798 if (Check & fcPosInf)
6800 if (Check & fcNegInf)
6802 if (Check & fcPosNormal)
6804 if (Check & fcNegNormal)
6806 if (Check & fcPosSubnormal)
6808 if (Check & fcNegSubnormal)
6810 if (Check & fcPosZero)
6811 TDCMask |= RISCV::FPMASK_Positive_Zero;
6812 if (Check & fcNegZero)
6813 TDCMask |= RISCV::FPMASK_Negative_Zero;
6814
6815 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6816
6817 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6818
6819 if (VT.isVector()) {
6820 SDValue Op0 = Op.getOperand(0);
6821 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6822
6823 if (VT.isScalableVector()) {
6824 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6825 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6826 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6827 Mask = Op.getOperand(2);
6828 VL = Op.getOperand(3);
6829 }
6830 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6831 VL, Op->getFlags());
6832 if (IsOneBitMask)
6833 return DAG.getSetCC(DL, VT, FPCLASS,
6834 DAG.getConstant(TDCMask, DL, DstVT),
6836 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6837 DAG.getConstant(TDCMask, DL, DstVT));
6838 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6839 ISD::SETNE);
6840 }
6841
6842 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6843 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6844 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6845 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6846 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6847 Mask = Op.getOperand(2);
6848 MVT MaskContainerVT =
6849 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6850 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6851 VL = Op.getOperand(3);
6852 }
6853 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6854
6855 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6856 Mask, VL, Op->getFlags());
6857
6858 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6859 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6860 if (IsOneBitMask) {
6861 SDValue VMSEQ =
6862 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6863 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6864 DAG.getUNDEF(ContainerVT), Mask, VL});
6865 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6866 }
6867 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6868 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6869
6870 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6871 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6872 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6873
6874 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6875 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6876 DAG.getUNDEF(ContainerVT), Mask, VL});
6877 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6878 }
6879
6880 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6881 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6882 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6884 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6885}
6886
6887// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6888// operations propagate nans.
6890 const RISCVSubtarget &Subtarget) {
6891 SDLoc DL(Op);
6892 MVT VT = Op.getSimpleValueType();
6893
6894 SDValue X = Op.getOperand(0);
6895 SDValue Y = Op.getOperand(1);
6896
6897 if (!VT.isVector()) {
6898 MVT XLenVT = Subtarget.getXLenVT();
6899
6900 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6901 // ensures that when one input is a nan, the other will also be a nan
6902 // allowing the nan to propagate. If both inputs are nan, this will swap the
6903 // inputs which is harmless.
6904
6905 SDValue NewY = Y;
6906 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6907 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6908 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6909 }
6910
6911 SDValue NewX = X;
6912 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6913 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6914 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6915 }
6916
6917 unsigned Opc =
6918 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6919 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6920 }
6921
6922 // Check no NaNs before converting to fixed vector scalable.
6923 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6924 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6925
6926 MVT ContainerVT = VT;
6927 if (VT.isFixedLengthVector()) {
6928 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6929 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6930 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6931 }
6932
6933 SDValue Mask, VL;
6934 if (Op->isVPOpcode()) {
6935 Mask = Op.getOperand(2);
6936 if (VT.isFixedLengthVector())
6937 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6938 Subtarget);
6939 VL = Op.getOperand(3);
6940 } else {
6941 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6942 }
6943
6944 SDValue NewY = Y;
6945 if (!XIsNeverNan) {
6946 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6947 {X, X, DAG.getCondCode(ISD::SETOEQ),
6948 DAG.getUNDEF(ContainerVT), Mask, VL});
6949 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6950 DAG.getUNDEF(ContainerVT), VL);
6951 }
6952
6953 SDValue NewX = X;
6954 if (!YIsNeverNan) {
6955 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6956 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6957 DAG.getUNDEF(ContainerVT), Mask, VL});
6958 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6959 DAG.getUNDEF(ContainerVT), VL);
6960 }
6961
6962 unsigned Opc =
6963 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6964 ? RISCVISD::VFMAX_VL
6965 : RISCVISD::VFMIN_VL;
6966 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6967 DAG.getUNDEF(ContainerVT), Mask, VL);
6968 if (VT.isFixedLengthVector())
6969 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6970 return Res;
6971}
6972
6974 const RISCVSubtarget &Subtarget) {
6975 bool IsFABS = Op.getOpcode() == ISD::FABS;
6976 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6977 "Wrong opcode for lowering FABS or FNEG.");
6978
6979 MVT XLenVT = Subtarget.getXLenVT();
6980 MVT VT = Op.getSimpleValueType();
6981 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6982
6983 SDLoc DL(Op);
6984 SDValue Fmv =
6985 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6986
6987 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6988 Mask = Mask.sext(Subtarget.getXLen());
6989
6990 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6991 SDValue Logic =
6992 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6993 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6994}
6995
6997 const RISCVSubtarget &Subtarget) {
6998 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6999
7000 MVT XLenVT = Subtarget.getXLenVT();
7001 MVT VT = Op.getSimpleValueType();
7002 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7003
7004 SDValue Mag = Op.getOperand(0);
7005 SDValue Sign = Op.getOperand(1);
7006
7007 SDLoc DL(Op);
7008
7009 // Get sign bit into an integer value.
7010 unsigned SignSize = Sign.getValueSizeInBits();
7011 SDValue SignAsInt = [&]() {
7012 if (SignSize == Subtarget.getXLen())
7013 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7014 switch (SignSize) {
7015 case 16:
7016 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7017 case 32:
7018 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7019 case 64: {
7020 assert(XLenVT == MVT::i32 && "Unexpected type");
7021 // Copy the upper word to integer.
7022 SignSize = 32;
7023 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7024 .getValue(1);
7025 }
7026 default:
7027 llvm_unreachable("Unexpected sign size");
7028 }
7029 }();
7030
7031 // Get the signbit at the right position for MagAsInt.
7032 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7033 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7034 SignAsInt,
7035 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7036
7037 // Mask the sign bit and any bits above it. The extra bits will be dropped
7038 // when we convert back to FP.
7039 SDValue SignMask = DAG.getConstant(
7040 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7041 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7042
7043 // Transform Mag value to integer, and clear the sign bit.
7044 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7045 SDValue ClearSignMask = DAG.getConstant(
7046 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7047 SDValue ClearedSign =
7048 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7049
7050 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7052
7053 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7054}
7055
7056/// Get a RISC-V target specified VL op for a given SDNode.
7057static unsigned getRISCVVLOp(SDValue Op) {
7058#define OP_CASE(NODE) \
7059 case ISD::NODE: \
7060 return RISCVISD::NODE##_VL;
7061#define VP_CASE(NODE) \
7062 case ISD::VP_##NODE: \
7063 return RISCVISD::NODE##_VL;
7064 // clang-format off
7065 switch (Op.getOpcode()) {
7066 default:
7067 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7068 OP_CASE(ADD)
7069 OP_CASE(SUB)
7070 OP_CASE(MUL)
7071 OP_CASE(MULHS)
7072 OP_CASE(MULHU)
7073 OP_CASE(SDIV)
7074 OP_CASE(SREM)
7075 OP_CASE(UDIV)
7076 OP_CASE(UREM)
7077 OP_CASE(SHL)
7078 OP_CASE(SRA)
7079 OP_CASE(SRL)
7080 OP_CASE(ROTL)
7081 OP_CASE(ROTR)
7082 OP_CASE(BSWAP)
7083 OP_CASE(CTTZ)
7084 OP_CASE(CTLZ)
7085 OP_CASE(CTPOP)
7086 OP_CASE(BITREVERSE)
7087 OP_CASE(SADDSAT)
7088 OP_CASE(UADDSAT)
7089 OP_CASE(SSUBSAT)
7090 OP_CASE(USUBSAT)
7091 OP_CASE(AVGFLOORS)
7092 OP_CASE(AVGFLOORU)
7093 OP_CASE(AVGCEILS)
7094 OP_CASE(AVGCEILU)
7095 OP_CASE(FADD)
7096 OP_CASE(FSUB)
7097 OP_CASE(FMUL)
7098 OP_CASE(FDIV)
7099 OP_CASE(FNEG)
7100 OP_CASE(FABS)
7101 OP_CASE(FCOPYSIGN)
7102 OP_CASE(FSQRT)
7103 OP_CASE(SMIN)
7104 OP_CASE(SMAX)
7105 OP_CASE(UMIN)
7106 OP_CASE(UMAX)
7107 OP_CASE(STRICT_FADD)
7108 OP_CASE(STRICT_FSUB)
7109 OP_CASE(STRICT_FMUL)
7110 OP_CASE(STRICT_FDIV)
7111 OP_CASE(STRICT_FSQRT)
7112 VP_CASE(ADD) // VP_ADD
7113 VP_CASE(SUB) // VP_SUB
7114 VP_CASE(MUL) // VP_MUL
7115 VP_CASE(SDIV) // VP_SDIV
7116 VP_CASE(SREM) // VP_SREM
7117 VP_CASE(UDIV) // VP_UDIV
7118 VP_CASE(UREM) // VP_UREM
7119 VP_CASE(SHL) // VP_SHL
7120 VP_CASE(FADD) // VP_FADD
7121 VP_CASE(FSUB) // VP_FSUB
7122 VP_CASE(FMUL) // VP_FMUL
7123 VP_CASE(FDIV) // VP_FDIV
7124 VP_CASE(FNEG) // VP_FNEG
7125 VP_CASE(FABS) // VP_FABS
7126 VP_CASE(SMIN) // VP_SMIN
7127 VP_CASE(SMAX) // VP_SMAX
7128 VP_CASE(UMIN) // VP_UMIN
7129 VP_CASE(UMAX) // VP_UMAX
7130 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7131 VP_CASE(SETCC) // VP_SETCC
7132 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7133 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7134 VP_CASE(BITREVERSE) // VP_BITREVERSE
7135 VP_CASE(SADDSAT) // VP_SADDSAT
7136 VP_CASE(UADDSAT) // VP_UADDSAT
7137 VP_CASE(SSUBSAT) // VP_SSUBSAT
7138 VP_CASE(USUBSAT) // VP_USUBSAT
7139 VP_CASE(BSWAP) // VP_BSWAP
7140 VP_CASE(CTLZ) // VP_CTLZ
7141 VP_CASE(CTTZ) // VP_CTTZ
7142 VP_CASE(CTPOP) // VP_CTPOP
7144 case ISD::VP_CTLZ_ZERO_UNDEF:
7145 return RISCVISD::CTLZ_VL;
7147 case ISD::VP_CTTZ_ZERO_UNDEF:
7148 return RISCVISD::CTTZ_VL;
7149 case ISD::FMA:
7150 case ISD::VP_FMA:
7151 return RISCVISD::VFMADD_VL;
7152 case ISD::STRICT_FMA:
7153 return RISCVISD::STRICT_VFMADD_VL;
7154 case ISD::AND:
7155 case ISD::VP_AND:
7156 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7157 return RISCVISD::VMAND_VL;
7158 return RISCVISD::AND_VL;
7159 case ISD::OR:
7160 case ISD::VP_OR:
7161 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7162 return RISCVISD::VMOR_VL;
7163 return RISCVISD::OR_VL;
7164 case ISD::XOR:
7165 case ISD::VP_XOR:
7166 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7167 return RISCVISD::VMXOR_VL;
7168 return RISCVISD::XOR_VL;
7169 case ISD::ANY_EXTEND:
7170 case ISD::ZERO_EXTEND:
7171 return RISCVISD::VZEXT_VL;
7172 case ISD::SIGN_EXTEND:
7173 return RISCVISD::VSEXT_VL;
7174 case ISD::SETCC:
7175 return RISCVISD::SETCC_VL;
7176 case ISD::VSELECT:
7177 return RISCVISD::VMERGE_VL;
7178 case ISD::VP_SELECT:
7179 case ISD::VP_MERGE:
7180 return RISCVISD::VMERGE_VL;
7181 case ISD::VP_SRA:
7182 return RISCVISD::SRA_VL;
7183 case ISD::VP_SRL:
7184 return RISCVISD::SRL_VL;
7185 case ISD::VP_SQRT:
7186 return RISCVISD::FSQRT_VL;
7187 case ISD::VP_SIGN_EXTEND:
7188 return RISCVISD::VSEXT_VL;
7189 case ISD::VP_ZERO_EXTEND:
7190 return RISCVISD::VZEXT_VL;
7191 case ISD::VP_FP_TO_SINT:
7192 return RISCVISD::VFCVT_RTZ_X_F_VL;
7193 case ISD::VP_FP_TO_UINT:
7194 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7195 case ISD::FMINNUM:
7196 case ISD::FMINIMUMNUM:
7197 case ISD::VP_FMINNUM:
7198 return RISCVISD::VFMIN_VL;
7199 case ISD::FMAXNUM:
7200 case ISD::FMAXIMUMNUM:
7201 case ISD::VP_FMAXNUM:
7202 return RISCVISD::VFMAX_VL;
7203 case ISD::LRINT:
7204 case ISD::VP_LRINT:
7205 case ISD::LLRINT:
7206 case ISD::VP_LLRINT:
7207 return RISCVISD::VFCVT_RM_X_F_VL;
7208 }
7209 // clang-format on
7210#undef OP_CASE
7211#undef VP_CASE
7212}
7213
7215 const RISCVSubtarget &Subtarget) {
7216 return (Op.getValueType() == MVT::nxv32f16 &&
7217 (Subtarget.hasVInstructionsF16Minimal() &&
7218 !Subtarget.hasVInstructionsF16())) ||
7219 Op.getValueType() == MVT::nxv32bf16;
7220}
7221
7223 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7224 SDLoc DL(Op);
7225
7226 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7227 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7228
7229 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7230 if (!Op.getOperand(j).getValueType().isVector()) {
7231 LoOperands[j] = Op.getOperand(j);
7232 HiOperands[j] = Op.getOperand(j);
7233 continue;
7234 }
7235 std::tie(LoOperands[j], HiOperands[j]) =
7236 DAG.SplitVector(Op.getOperand(j), DL);
7237 }
7238
7239 SDValue LoRes =
7240 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7241 SDValue HiRes =
7242 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7243
7244 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7245}
7246
7248 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7249 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7250 SDLoc DL(Op);
7251
7252 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7253 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7254
7255 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7256 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7257 std::tie(LoOperands[j], HiOperands[j]) =
7258 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7259 continue;
7260 }
7261 if (!Op.getOperand(j).getValueType().isVector()) {
7262 LoOperands[j] = Op.getOperand(j);
7263 HiOperands[j] = Op.getOperand(j);
7264 continue;
7265 }
7266 std::tie(LoOperands[j], HiOperands[j]) =
7267 DAG.SplitVector(Op.getOperand(j), DL);
7268 }
7269
7270 SDValue LoRes =
7271 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7272 SDValue HiRes =
7273 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7274
7275 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7276}
7277
7279 SDLoc DL(Op);
7280
7281 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7282 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7283 auto [EVLLo, EVLHi] =
7284 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7285
7286 SDValue ResLo =
7287 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7288 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7289 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7290 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7291}
7292
7294
7295 assert(Op->isStrictFPOpcode());
7296
7297 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7298
7299 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7300 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7301
7302 SDLoc DL(Op);
7303
7304 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7305 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7306
7307 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7308 if (!Op.getOperand(j).getValueType().isVector()) {
7309 LoOperands[j] = Op.getOperand(j);
7310 HiOperands[j] = Op.getOperand(j);
7311 continue;
7312 }
7313 std::tie(LoOperands[j], HiOperands[j]) =
7314 DAG.SplitVector(Op.getOperand(j), DL);
7315 }
7316
7317 SDValue LoRes =
7318 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7319 HiOperands[0] = LoRes.getValue(1);
7320 SDValue HiRes =
7321 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7322
7323 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7324 LoRes.getValue(0), HiRes.getValue(0));
7325 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7326}
7327
7328SDValue
7329RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7330 SelectionDAG &DAG) const {
7331 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7332 "Unexpected bfloat16 load lowering");
7333
7334 SDLoc DL(Op);
7335 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7336 EVT MemVT = LD->getMemoryVT();
7337 SDValue Load = DAG.getExtLoad(
7338 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7339 LD->getBasePtr(),
7341 LD->getMemOperand());
7342 // Using mask to make bf16 nan-boxing valid when we don't have flh
7343 // instruction. -65536 would be treat as a small number and thus it can be
7344 // directly used lui to get the constant.
7345 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7346 SDValue OrSixteenOne =
7347 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7348 SDValue ConvertedResult =
7349 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7350 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7351}
7352
7353SDValue
7354RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7355 SelectionDAG &DAG) const {
7356 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7357 "Unexpected bfloat16 store lowering");
7358
7359 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7360 SDLoc DL(Op);
7361 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7362 Subtarget.getXLenVT(), ST->getValue());
7363 return DAG.getTruncStore(
7364 ST->getChain(), DL, FMV, ST->getBasePtr(),
7365 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7366 ST->getMemOperand());
7367}
7368
7370 SelectionDAG &DAG) const {
7371 switch (Op.getOpcode()) {
7372 default:
7374 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7375 case ISD::PREFETCH:
7376 return LowerPREFETCH(Op, Subtarget, DAG);
7377 case ISD::ATOMIC_FENCE:
7378 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7379 case ISD::GlobalAddress:
7380 return lowerGlobalAddress(Op, DAG);
7381 case ISD::BlockAddress:
7382 return lowerBlockAddress(Op, DAG);
7383 case ISD::ConstantPool:
7384 return lowerConstantPool(Op, DAG);
7385 case ISD::JumpTable:
7386 return lowerJumpTable(Op, DAG);
7388 return lowerGlobalTLSAddress(Op, DAG);
7389 case ISD::Constant:
7390 return lowerConstant(Op, DAG, Subtarget);
7391 case ISD::ConstantFP:
7392 return lowerConstantFP(Op, DAG);
7393 case ISD::SELECT:
7394 return lowerSELECT(Op, DAG);
7395 case ISD::BRCOND:
7396 return lowerBRCOND(Op, DAG);
7397 case ISD::VASTART:
7398 return lowerVASTART(Op, DAG);
7399 case ISD::FRAMEADDR:
7400 return lowerFRAMEADDR(Op, DAG);
7401 case ISD::RETURNADDR:
7402 return lowerRETURNADDR(Op, DAG);
7403 case ISD::SHL_PARTS:
7404 return lowerShiftLeftParts(Op, DAG);
7405 case ISD::SRA_PARTS:
7406 return lowerShiftRightParts(Op, DAG, true);
7407 case ISD::SRL_PARTS:
7408 return lowerShiftRightParts(Op, DAG, false);
7409 case ISD::ROTL:
7410 case ISD::ROTR:
7411 if (Op.getValueType().isFixedLengthVector()) {
7412 assert(Subtarget.hasStdExtZvkb());
7413 return lowerToScalableOp(Op, DAG);
7414 }
7415 assert(Subtarget.hasVendorXTHeadBb() &&
7416 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7417 "Unexpected custom legalization");
7418 // XTHeadBb only supports rotate by constant.
7419 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7420 return SDValue();
7421 return Op;
7422 case ISD::BITCAST: {
7423 SDLoc DL(Op);
7424 EVT VT = Op.getValueType();
7425 SDValue Op0 = Op.getOperand(0);
7426 EVT Op0VT = Op0.getValueType();
7427 MVT XLenVT = Subtarget.getXLenVT();
7428 if (Op0VT == MVT::i16 &&
7429 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7430 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7431 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7432 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7433 }
7434 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7435 Subtarget.hasStdExtFOrZfinx()) {
7436 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7437 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7438 }
7439 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7440 Subtarget.hasStdExtDOrZdinx()) {
7441 SDValue Lo, Hi;
7442 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7443 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7444 }
7445
7446 // Consider other scalar<->scalar casts as legal if the types are legal.
7447 // Otherwise expand them.
7448 if (!VT.isVector() && !Op0VT.isVector()) {
7449 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7450 return Op;
7451 return SDValue();
7452 }
7453
7454 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7455 "Unexpected types");
7456
7457 if (VT.isFixedLengthVector()) {
7458 // We can handle fixed length vector bitcasts with a simple replacement
7459 // in isel.
7460 if (Op0VT.isFixedLengthVector())
7461 return Op;
7462 // When bitcasting from scalar to fixed-length vector, insert the scalar
7463 // into a one-element vector of the result type, and perform a vector
7464 // bitcast.
7465 if (!Op0VT.isVector()) {
7466 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7467 if (!isTypeLegal(BVT))
7468 return SDValue();
7469 return DAG.getBitcast(
7470 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7471 }
7472 return SDValue();
7473 }
7474 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7475 // thus: bitcast the vector to a one-element vector type whose element type
7476 // is the same as the result type, and extract the first element.
7477 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7478 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7479 if (!isTypeLegal(BVT))
7480 return SDValue();
7481 SDValue BVec = DAG.getBitcast(BVT, Op0);
7482 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7483 }
7484 return SDValue();
7485 }
7487 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7489 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7491 return LowerINTRINSIC_VOID(Op, DAG);
7492 case ISD::IS_FPCLASS:
7493 return LowerIS_FPCLASS(Op, DAG);
7494 case ISD::BITREVERSE: {
7495 MVT VT = Op.getSimpleValueType();
7496 if (VT.isFixedLengthVector()) {
7497 assert(Subtarget.hasStdExtZvbb());
7498 return lowerToScalableOp(Op, DAG);
7499 }
7500 SDLoc DL(Op);
7501 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7502 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7503 // Expand bitreverse to a bswap(rev8) followed by brev8.
7504 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7505 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7506 }
7507 case ISD::TRUNCATE:
7510 // Only custom-lower vector truncates
7511 if (!Op.getSimpleValueType().isVector())
7512 return Op;
7513 return lowerVectorTruncLike(Op, DAG);
7514 case ISD::ANY_EXTEND:
7515 case ISD::ZERO_EXTEND:
7516 if (Op.getOperand(0).getValueType().isVector() &&
7517 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7518 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7519 if (Op.getValueType().isScalableVector())
7520 return Op;
7521 return lowerToScalableOp(Op, DAG);
7522 case ISD::SIGN_EXTEND:
7523 if (Op.getOperand(0).getValueType().isVector() &&
7524 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7525 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7526 if (Op.getValueType().isScalableVector())
7527 return Op;
7528 return lowerToScalableOp(Op, DAG);
7530 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7532 return lowerINSERT_VECTOR_ELT(Op, DAG);
7534 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7535 case ISD::SCALAR_TO_VECTOR: {
7536 MVT VT = Op.getSimpleValueType();
7537 SDLoc DL(Op);
7538 SDValue Scalar = Op.getOperand(0);
7539 if (VT.getVectorElementType() == MVT::i1) {
7540 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7541 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7542 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7543 }
7544 MVT ContainerVT = VT;
7545 if (VT.isFixedLengthVector())
7546 ContainerVT = getContainerForFixedLengthVector(VT);
7547 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7548
7549 SDValue V;
7550 if (VT.isFloatingPoint()) {
7551 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7552 DAG.getUNDEF(ContainerVT), Scalar, VL);
7553 } else {
7554 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7555 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7556 DAG.getUNDEF(ContainerVT), Scalar, VL);
7557 }
7558 if (VT.isFixedLengthVector())
7559 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7560 return V;
7561 }
7562 case ISD::VSCALE: {
7563 MVT XLenVT = Subtarget.getXLenVT();
7564 MVT VT = Op.getSimpleValueType();
7565 SDLoc DL(Op);
7566 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7567 // We define our scalable vector types for lmul=1 to use a 64 bit known
7568 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7569 // vscale as VLENB / 8.
7570 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7571 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7572 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7573 // We assume VLENB is a multiple of 8. We manually choose the best shift
7574 // here because SimplifyDemandedBits isn't always able to simplify it.
7575 uint64_t Val = Op.getConstantOperandVal(0);
7576 if (isPowerOf2_64(Val)) {
7577 uint64_t Log2 = Log2_64(Val);
7578 if (Log2 < 3) {
7579 SDNodeFlags Flags;
7580 Flags.setExact(true);
7581 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7582 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7583 } else if (Log2 > 3) {
7584 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7585 DAG.getConstant(Log2 - 3, DL, XLenVT));
7586 }
7587 } else if ((Val % 8) == 0) {
7588 // If the multiplier is a multiple of 8, scale it down to avoid needing
7589 // to shift the VLENB value.
7590 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7591 DAG.getConstant(Val / 8, DL, XLenVT));
7592 } else {
7593 SDNodeFlags Flags;
7594 Flags.setExact(true);
7595 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7596 DAG.getConstant(3, DL, XLenVT), Flags);
7597 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7598 DAG.getConstant(Val, DL, XLenVT));
7599 }
7600 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7601 }
7602 case ISD::FPOWI: {
7603 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7604 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7605 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7606 Op.getOperand(1).getValueType() == MVT::i32) {
7607 SDLoc DL(Op);
7608 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7609 SDValue Powi =
7610 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7611 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7612 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7613 }
7614 return SDValue();
7615 }
7616 case ISD::FMAXIMUM:
7617 case ISD::FMINIMUM:
7618 if (isPromotedOpNeedingSplit(Op, Subtarget))
7619 return SplitVectorOp(Op, DAG);
7620 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7621 case ISD::FP_EXTEND:
7622 case ISD::FP_ROUND:
7623 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7626 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7627 case ISD::SINT_TO_FP:
7628 case ISD::UINT_TO_FP:
7629 if (Op.getValueType().isVector() &&
7630 ((Op.getValueType().getScalarType() == MVT::f16 &&
7631 (Subtarget.hasVInstructionsF16Minimal() &&
7632 !Subtarget.hasVInstructionsF16())) ||
7633 Op.getValueType().getScalarType() == MVT::bf16)) {
7634 if (isPromotedOpNeedingSplit(Op, Subtarget))
7635 return SplitVectorOp(Op, DAG);
7636 // int -> f32
7637 SDLoc DL(Op);
7638 MVT NVT =
7639 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7640 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7641 // f32 -> [b]f16
7642 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7643 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7644 }
7645 [[fallthrough]];
7646 case ISD::FP_TO_SINT:
7647 case ISD::FP_TO_UINT:
7648 if (SDValue Op1 = Op.getOperand(0);
7649 Op1.getValueType().isVector() &&
7650 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7651 (Subtarget.hasVInstructionsF16Minimal() &&
7652 !Subtarget.hasVInstructionsF16())) ||
7653 Op1.getValueType().getScalarType() == MVT::bf16)) {
7654 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7655 return SplitVectorOp(Op, DAG);
7656 // [b]f16 -> f32
7657 SDLoc DL(Op);
7658 MVT NVT = MVT::getVectorVT(MVT::f32,
7659 Op1.getValueType().getVectorElementCount());
7660 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7661 // f32 -> int
7662 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7663 }
7664 [[fallthrough]];
7669 // RVV can only do fp<->int conversions to types half/double the size as
7670 // the source. We custom-lower any conversions that do two hops into
7671 // sequences.
7672 MVT VT = Op.getSimpleValueType();
7673 if (VT.isScalarInteger())
7674 return lowerFP_TO_INT(Op, DAG, Subtarget);
7675 bool IsStrict = Op->isStrictFPOpcode();
7676 SDValue Src = Op.getOperand(0 + IsStrict);
7677 MVT SrcVT = Src.getSimpleValueType();
7678 if (SrcVT.isScalarInteger())
7679 return lowerINT_TO_FP(Op, DAG, Subtarget);
7680 if (!VT.isVector())
7681 return Op;
7682 SDLoc DL(Op);
7683 MVT EltVT = VT.getVectorElementType();
7684 MVT SrcEltVT = SrcVT.getVectorElementType();
7685 unsigned EltSize = EltVT.getSizeInBits();
7686 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7687 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7688 "Unexpected vector element types");
7689
7690 bool IsInt2FP = SrcEltVT.isInteger();
7691 // Widening conversions
7692 if (EltSize > (2 * SrcEltSize)) {
7693 if (IsInt2FP) {
7694 // Do a regular integer sign/zero extension then convert to float.
7695 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7697 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7698 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7701 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7702 if (IsStrict)
7703 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7704 Op.getOperand(0), Ext);
7705 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7706 }
7707 // FP2Int
7708 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7709 // Do one doubling fp_extend then complete the operation by converting
7710 // to int.
7711 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7712 if (IsStrict) {
7713 auto [FExt, Chain] =
7714 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7715 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7716 }
7717 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7718 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7719 }
7720
7721 // Narrowing conversions
7722 if (SrcEltSize > (2 * EltSize)) {
7723 if (IsInt2FP) {
7724 // One narrowing int_to_fp, then an fp_round.
7725 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7726 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7727 if (IsStrict) {
7728 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7729 DAG.getVTList(InterimFVT, MVT::Other),
7730 Op.getOperand(0), Src);
7731 SDValue Chain = Int2FP.getValue(1);
7732 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7733 }
7734 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7735 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7736 }
7737 // FP2Int
7738 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7739 // representable by the integer, the result is poison.
7740 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7742 if (IsStrict) {
7743 SDValue FP2Int =
7744 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7745 Op.getOperand(0), Src);
7746 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7747 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7748 }
7749 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7750 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7751 }
7752
7753 // Scalable vectors can exit here. Patterns will handle equally-sized
7754 // conversions halving/doubling ones.
7755 if (!VT.isFixedLengthVector())
7756 return Op;
7757
7758 // For fixed-length vectors we lower to a custom "VL" node.
7759 unsigned RVVOpc = 0;
7760 switch (Op.getOpcode()) {
7761 default:
7762 llvm_unreachable("Impossible opcode");
7763 case ISD::FP_TO_SINT:
7764 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7765 break;
7766 case ISD::FP_TO_UINT:
7767 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7768 break;
7769 case ISD::SINT_TO_FP:
7770 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7771 break;
7772 case ISD::UINT_TO_FP:
7773 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7774 break;
7776 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7777 break;
7779 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7780 break;
7782 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7783 break;
7785 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7786 break;
7787 }
7788
7789 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7790 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7791 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7792 "Expected same element count");
7793
7794 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7795
7796 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7797 if (IsStrict) {
7798 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7799 Op.getOperand(0), Src, Mask, VL);
7800 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7801 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7802 }
7803 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7804 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7805 }
7808 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7809 case ISD::FP_TO_BF16: {
7810 // Custom lower to ensure the libcall return is passed in an FPR on hard
7811 // float ABIs.
7812 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7813 SDLoc DL(Op);
7814 MakeLibCallOptions CallOptions;
7815 RTLIB::Libcall LC =
7816 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7817 SDValue Res =
7818 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7819 if (Subtarget.is64Bit())
7820 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7821 return DAG.getBitcast(MVT::i32, Res);
7822 }
7823 case ISD::BF16_TO_FP: {
7824 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7825 MVT VT = Op.getSimpleValueType();
7826 SDLoc DL(Op);
7827 Op = DAG.getNode(
7828 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7829 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7830 SDValue Res = Subtarget.is64Bit()
7831 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7832 : DAG.getBitcast(MVT::f32, Op);
7833 // fp_extend if the target VT is bigger than f32.
7834 if (VT != MVT::f32)
7835 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7836 return Res;
7837 }
7838 case ISD::STRICT_FP_TO_FP16:
7839 case ISD::FP_TO_FP16: {
7840 // Custom lower to ensure the libcall return is passed in an FPR on hard
7841 // float ABIs.
7842 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7843 SDLoc DL(Op);
7844 MakeLibCallOptions CallOptions;
7845 bool IsStrict = Op->isStrictFPOpcode();
7846 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7847 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7848 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7849 SDValue Res;
7850 std::tie(Res, Chain) =
7851 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7852 if (Subtarget.is64Bit())
7853 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7854 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7855 if (IsStrict)
7856 return DAG.getMergeValues({Result, Chain}, DL);
7857 return Result;
7858 }
7859 case ISD::STRICT_FP16_TO_FP:
7860 case ISD::FP16_TO_FP: {
7861 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7862 // float ABIs.
7863 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7864 SDLoc DL(Op);
7865 MakeLibCallOptions CallOptions;
7866 bool IsStrict = Op->isStrictFPOpcode();
7867 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7868 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7869 SDValue Arg = Subtarget.is64Bit()
7870 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7871 : DAG.getBitcast(MVT::f32, Op0);
7872 SDValue Res;
7873 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7874 CallOptions, DL, Chain);
7875 if (IsStrict)
7876 return DAG.getMergeValues({Res, Chain}, DL);
7877 return Res;
7878 }
7879 case ISD::FTRUNC:
7880 case ISD::FCEIL:
7881 case ISD::FFLOOR:
7882 case ISD::FNEARBYINT:
7883 case ISD::FRINT:
7884 case ISD::FROUND:
7885 case ISD::FROUNDEVEN:
7886 if (isPromotedOpNeedingSplit(Op, Subtarget))
7887 return SplitVectorOp(Op, DAG);
7888 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7889 case ISD::LRINT:
7890 case ISD::LLRINT:
7891 case ISD::LROUND:
7892 case ISD::LLROUND: {
7893 if (Op.getValueType().isVector())
7894 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7895 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7896 "Unexpected custom legalisation");
7897 SDLoc DL(Op);
7898 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7899 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7900 }
7901 case ISD::STRICT_LRINT:
7902 case ISD::STRICT_LLRINT:
7903 case ISD::STRICT_LROUND:
7904 case ISD::STRICT_LLROUND: {
7905 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7906 "Unexpected custom legalisation");
7907 SDLoc DL(Op);
7908 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7909 {Op.getOperand(0), Op.getOperand(1)});
7910 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7911 {Ext.getValue(1), Ext.getValue(0)});
7912 }
7913 case ISD::VECREDUCE_ADD:
7914 case ISD::VECREDUCE_UMAX:
7915 case ISD::VECREDUCE_SMAX:
7916 case ISD::VECREDUCE_UMIN:
7917 case ISD::VECREDUCE_SMIN:
7918 return lowerVECREDUCE(Op, DAG);
7919 case ISD::VECREDUCE_AND:
7920 case ISD::VECREDUCE_OR:
7921 case ISD::VECREDUCE_XOR:
7922 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7923 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7924 return lowerVECREDUCE(Op, DAG);
7925 case ISD::VECREDUCE_FADD:
7926 case ISD::VECREDUCE_SEQ_FADD:
7927 case ISD::VECREDUCE_FMIN:
7928 case ISD::VECREDUCE_FMAX:
7929 case ISD::VECREDUCE_FMAXIMUM:
7930 case ISD::VECREDUCE_FMINIMUM:
7931 return lowerFPVECREDUCE(Op, DAG);
7932 case ISD::VP_REDUCE_ADD:
7933 case ISD::VP_REDUCE_UMAX:
7934 case ISD::VP_REDUCE_SMAX:
7935 case ISD::VP_REDUCE_UMIN:
7936 case ISD::VP_REDUCE_SMIN:
7937 case ISD::VP_REDUCE_FADD:
7938 case ISD::VP_REDUCE_SEQ_FADD:
7939 case ISD::VP_REDUCE_FMIN:
7940 case ISD::VP_REDUCE_FMAX:
7941 case ISD::VP_REDUCE_FMINIMUM:
7942 case ISD::VP_REDUCE_FMAXIMUM:
7943 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7944 return SplitVectorReductionOp(Op, DAG);
7945 return lowerVPREDUCE(Op, DAG);
7946 case ISD::VP_REDUCE_AND:
7947 case ISD::VP_REDUCE_OR:
7948 case ISD::VP_REDUCE_XOR:
7949 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7950 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7951 return lowerVPREDUCE(Op, DAG);
7952 case ISD::VP_CTTZ_ELTS:
7953 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7954 return lowerVPCttzElements(Op, DAG);
7955 case ISD::UNDEF: {
7956 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7957 return convertFromScalableVector(Op.getSimpleValueType(),
7958 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7959 }
7961 return lowerINSERT_SUBVECTOR(Op, DAG);
7963 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7965 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7967 return lowerVECTOR_INTERLEAVE(Op, DAG);
7968 case ISD::STEP_VECTOR:
7969 return lowerSTEP_VECTOR(Op, DAG);
7971 return lowerVECTOR_REVERSE(Op, DAG);
7972 case ISD::VECTOR_SPLICE:
7973 return lowerVECTOR_SPLICE(Op, DAG);
7974 case ISD::BUILD_VECTOR: {
7975 MVT VT = Op.getSimpleValueType();
7976 MVT EltVT = VT.getVectorElementType();
7977 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7978 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7979 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7980 }
7981 case ISD::SPLAT_VECTOR: {
7982 MVT VT = Op.getSimpleValueType();
7983 MVT EltVT = VT.getVectorElementType();
7984 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7985 EltVT == MVT::bf16) {
7986 SDLoc DL(Op);
7987 SDValue Elt;
7988 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7989 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7990 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7991 Op.getOperand(0));
7992 else
7993 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7994 MVT IVT = VT.changeVectorElementType(MVT::i16);
7995 return DAG.getNode(ISD::BITCAST, DL, VT,
7996 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7997 }
7998
7999 if (EltVT == MVT::i1)
8000 return lowerVectorMaskSplat(Op, DAG);
8001 return SDValue();
8002 }
8004 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8005 case ISD::CONCAT_VECTORS: {
8006 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8007 // better than going through the stack, as the default expansion does.
8008 SDLoc DL(Op);
8009 MVT VT = Op.getSimpleValueType();
8010 MVT ContainerVT = VT;
8011 if (VT.isFixedLengthVector())
8012 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8013
8014 // Recursively split concat_vectors with more than 2 operands:
8015 //
8016 // concat_vector op1, op2, op3, op4
8017 // ->
8018 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8019 //
8020 // This reduces the length of the chain of vslideups and allows us to
8021 // perform the vslideups at a smaller LMUL, limited to MF2.
8022 if (Op.getNumOperands() > 2 &&
8023 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8024 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8025 assert(isPowerOf2_32(Op.getNumOperands()));
8026 size_t HalfNumOps = Op.getNumOperands() / 2;
8027 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8028 Op->ops().take_front(HalfNumOps));
8029 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8030 Op->ops().drop_front(HalfNumOps));
8031 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8032 }
8033
8034 unsigned NumOpElts =
8035 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8036 SDValue Vec = DAG.getUNDEF(VT);
8037 for (const auto &OpIdx : enumerate(Op->ops())) {
8038 SDValue SubVec = OpIdx.value();
8039 // Don't insert undef subvectors.
8040 if (SubVec.isUndef())
8041 continue;
8042 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8043 }
8044 return Vec;
8045 }
8046 case ISD::LOAD: {
8047 auto *Load = cast<LoadSDNode>(Op);
8048 EVT VT = Load->getValueType(0);
8049 if (VT == MVT::f64) {
8050 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8051 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8052
8053 // Replace a double precision load with two i32 loads and a BuildPairF64.
8054 SDLoc DL(Op);
8055 SDValue BasePtr = Load->getBasePtr();
8056 SDValue Chain = Load->getChain();
8057
8058 SDValue Lo =
8059 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8060 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8061 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8062 SDValue Hi = DAG.getLoad(
8063 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8064 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8065 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8066 Hi.getValue(1));
8067
8068 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8069 return DAG.getMergeValues({Pair, Chain}, DL);
8070 }
8071
8072 if (VT == MVT::bf16)
8073 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8074
8075 // Handle normal vector tuple load.
8076 if (VT.isRISCVVectorTuple()) {
8077 SDLoc DL(Op);
8078 MVT XLenVT = Subtarget.getXLenVT();
8079 unsigned NF = VT.getRISCVVectorTupleNumFields();
8080 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8081 unsigned NumElts = Sz / (NF * 8);
8082 int Log2LMUL = Log2_64(NumElts) - 3;
8083
8084 auto Flag = SDNodeFlags();
8085 Flag.setNoUnsignedWrap(true);
8086 SDValue Ret = DAG.getUNDEF(VT);
8087 SDValue BasePtr = Load->getBasePtr();
8088 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8089 VROffset =
8090 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8091 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8092 SmallVector<SDValue, 8> OutChains;
8093
8094 // Load NF vector registers and combine them to a vector tuple.
8095 for (unsigned i = 0; i < NF; ++i) {
8096 SDValue LoadVal = DAG.getLoad(
8097 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8098 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8099 OutChains.push_back(LoadVal.getValue(1));
8100 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8101 DAG.getTargetConstant(i, DL, MVT::i32));
8102 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8103 }
8104 return DAG.getMergeValues(
8105 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8106 }
8107
8108 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8109 return V;
8110 if (Op.getValueType().isFixedLengthVector())
8111 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8112 return Op;
8113 }
8114 case ISD::STORE: {
8115 auto *Store = cast<StoreSDNode>(Op);
8116 SDValue StoredVal = Store->getValue();
8117 EVT VT = StoredVal.getValueType();
8118 if (VT == MVT::f64) {
8119 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8120 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8121
8122 // Replace a double precision store with a SplitF64 and i32 stores.
8123 SDValue DL(Op);
8124 SDValue BasePtr = Store->getBasePtr();
8125 SDValue Chain = Store->getChain();
8126 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8127 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8128
8129 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8130 Store->getPointerInfo(), Store->getBaseAlign(),
8131 Store->getMemOperand()->getFlags());
8132 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8133 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8134 Store->getPointerInfo().getWithOffset(4),
8135 Store->getBaseAlign(),
8136 Store->getMemOperand()->getFlags());
8137 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8138 }
8139 if (VT == MVT::i64) {
8140 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8141 "Unexpected custom legalisation");
8142 if (Store->isTruncatingStore())
8143 return SDValue();
8144
8145 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8146 return SDValue();
8147
8148 SDLoc DL(Op);
8149 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8150 DAG.getTargetConstant(0, DL, MVT::i32));
8151 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8152 DAG.getTargetConstant(1, DL, MVT::i32));
8153
8154 return DAG.getMemIntrinsicNode(
8155 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8156 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8157 Store->getMemOperand());
8158 }
8159
8160 if (VT == MVT::bf16)
8161 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8162
8163 // Handle normal vector tuple store.
8164 if (VT.isRISCVVectorTuple()) {
8165 SDLoc DL(Op);
8166 MVT XLenVT = Subtarget.getXLenVT();
8167 unsigned NF = VT.getRISCVVectorTupleNumFields();
8168 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8169 unsigned NumElts = Sz / (NF * 8);
8170 int Log2LMUL = Log2_64(NumElts) - 3;
8171
8172 auto Flag = SDNodeFlags();
8173 Flag.setNoUnsignedWrap(true);
8174 SDValue Ret;
8175 SDValue Chain = Store->getChain();
8176 SDValue BasePtr = Store->getBasePtr();
8177 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8178 VROffset =
8179 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8180 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8181
8182 // Extract subregisters in a vector tuple and store them individually.
8183 for (unsigned i = 0; i < NF; ++i) {
8184 auto Extract =
8185 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8186 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8187 DAG.getTargetConstant(i, DL, MVT::i32));
8188 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8189 MachinePointerInfo(Store->getAddressSpace()),
8190 Store->getBaseAlign(),
8191 Store->getMemOperand()->getFlags());
8192 Chain = Ret.getValue(0);
8193 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8194 }
8195 return Ret;
8196 }
8197
8198 if (auto V = expandUnalignedRVVStore(Op, DAG))
8199 return V;
8200 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8201 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8202 return Op;
8203 }
8204 case ISD::MLOAD:
8205 case ISD::VP_LOAD:
8206 return lowerMaskedLoad(Op, DAG);
8207 case ISD::VP_LOAD_FF:
8208 return lowerLoadFF(Op, DAG);
8209 case ISD::MSTORE:
8210 case ISD::VP_STORE:
8211 return lowerMaskedStore(Op, DAG);
8213 return lowerVectorCompress(Op, DAG);
8214 case ISD::SELECT_CC: {
8215 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8216 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8217 // into separate SETCC+SELECT just like LegalizeDAG.
8218 SDValue Tmp1 = Op.getOperand(0);
8219 SDValue Tmp2 = Op.getOperand(1);
8220 SDValue True = Op.getOperand(2);
8221 SDValue False = Op.getOperand(3);
8222 EVT VT = Op.getValueType();
8223 SDValue CC = Op.getOperand(4);
8224 EVT CmpVT = Tmp1.getValueType();
8225 EVT CCVT =
8226 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8227 SDLoc DL(Op);
8228 SDValue Cond =
8229 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8230 return DAG.getSelect(DL, VT, Cond, True, False);
8231 }
8232 case ISD::SETCC: {
8233 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8234 if (OpVT.isScalarInteger()) {
8235 MVT VT = Op.getSimpleValueType();
8236 SDValue LHS = Op.getOperand(0);
8237 SDValue RHS = Op.getOperand(1);
8238 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8239 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8240 "Unexpected CondCode");
8241
8242 SDLoc DL(Op);
8243
8244 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8245 // convert this to the equivalent of (set(u)ge X, C+1) by using
8246 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8247 // in a register.
8248 if (isa<ConstantSDNode>(RHS)) {
8249 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8250 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8251 // If this is an unsigned compare and the constant is -1, incrementing
8252 // the constant would change behavior. The result should be false.
8253 if (CCVal == ISD::SETUGT && Imm == -1)
8254 return DAG.getConstant(0, DL, VT);
8255 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8256 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8257 SDValue SetCC = DAG.getSetCC(
8258 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8259 return DAG.getLogicalNOT(DL, SetCC, VT);
8260 }
8261 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8262 if (CCVal == ISD::SETUGT && Imm == 2047) {
8263 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8264 DAG.getShiftAmountConstant(11, OpVT, DL));
8265 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8266 ISD::SETNE);
8267 }
8268 }
8269
8270 // Not a constant we could handle, swap the operands and condition code to
8271 // SETLT/SETULT.
8272 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8273 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8274 }
8275
8276 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8277 return SplitVectorOp(Op, DAG);
8278
8279 return lowerToScalableOp(Op, DAG);
8280 }
8281 case ISD::ADD:
8282 case ISD::SUB:
8283 case ISD::MUL:
8284 case ISD::MULHS:
8285 case ISD::MULHU:
8286 case ISD::AND:
8287 case ISD::OR:
8288 case ISD::XOR:
8289 case ISD::SDIV:
8290 case ISD::SREM:
8291 case ISD::UDIV:
8292 case ISD::UREM:
8293 case ISD::BSWAP:
8294 case ISD::CTPOP:
8295 case ISD::VSELECT:
8296 return lowerToScalableOp(Op, DAG);
8297 case ISD::SHL:
8298 case ISD::SRA:
8299 case ISD::SRL:
8300 if (Op.getSimpleValueType().isFixedLengthVector())
8301 return lowerToScalableOp(Op, DAG);
8302 // This can be called for an i32 shift amount that needs to be promoted.
8303 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8304 "Unexpected custom legalisation");
8305 return SDValue();
8306 case ISD::FABS:
8307 case ISD::FNEG:
8308 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8309 return lowerFABSorFNEG(Op, DAG, Subtarget);
8310 [[fallthrough]];
8311 case ISD::FADD:
8312 case ISD::FSUB:
8313 case ISD::FMUL:
8314 case ISD::FDIV:
8315 case ISD::FSQRT:
8316 case ISD::FMA:
8317 case ISD::FMINNUM:
8318 case ISD::FMAXNUM:
8319 case ISD::FMINIMUMNUM:
8320 case ISD::FMAXIMUMNUM:
8321 if (isPromotedOpNeedingSplit(Op, Subtarget))
8322 return SplitVectorOp(Op, DAG);
8323 [[fallthrough]];
8324 case ISD::AVGFLOORS:
8325 case ISD::AVGFLOORU:
8326 case ISD::AVGCEILS:
8327 case ISD::AVGCEILU:
8328 case ISD::SMIN:
8329 case ISD::SMAX:
8330 case ISD::UMIN:
8331 case ISD::UMAX:
8332 case ISD::UADDSAT:
8333 case ISD::USUBSAT:
8334 case ISD::SADDSAT:
8335 case ISD::SSUBSAT:
8336 return lowerToScalableOp(Op, DAG);
8337 case ISD::ABDS:
8338 case ISD::ABDU: {
8339 SDLoc dl(Op);
8340 EVT VT = Op->getValueType(0);
8341 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8342 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8343 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8344
8345 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8346 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8347 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8348 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8349 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8350 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8351 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8352 }
8353 case ISD::ABS:
8354 case ISD::VP_ABS:
8355 return lowerABS(Op, DAG);
8356 case ISD::CTLZ:
8358 case ISD::CTTZ:
8360 if (Subtarget.hasStdExtZvbb())
8361 return lowerToScalableOp(Op, DAG);
8362 assert(Op.getOpcode() != ISD::CTTZ);
8363 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8364 case ISD::FCOPYSIGN:
8365 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8366 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8367 if (isPromotedOpNeedingSplit(Op, Subtarget))
8368 return SplitVectorOp(Op, DAG);
8369 return lowerToScalableOp(Op, DAG);
8370 case ISD::STRICT_FADD:
8371 case ISD::STRICT_FSUB:
8372 case ISD::STRICT_FMUL:
8373 case ISD::STRICT_FDIV:
8374 case ISD::STRICT_FSQRT:
8375 case ISD::STRICT_FMA:
8376 if (isPromotedOpNeedingSplit(Op, Subtarget))
8377 return SplitStrictFPVectorOp(Op, DAG);
8378 return lowerToScalableOp(Op, DAG);
8379 case ISD::STRICT_FSETCC:
8381 return lowerVectorStrictFSetcc(Op, DAG);
8382 case ISD::STRICT_FCEIL:
8383 case ISD::STRICT_FRINT:
8384 case ISD::STRICT_FFLOOR:
8385 case ISD::STRICT_FTRUNC:
8387 case ISD::STRICT_FROUND:
8389 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8390 case ISD::MGATHER:
8391 case ISD::VP_GATHER:
8392 return lowerMaskedGather(Op, DAG);
8393 case ISD::MSCATTER:
8394 case ISD::VP_SCATTER:
8395 return lowerMaskedScatter(Op, DAG);
8396 case ISD::GET_ROUNDING:
8397 return lowerGET_ROUNDING(Op, DAG);
8398 case ISD::SET_ROUNDING:
8399 return lowerSET_ROUNDING(Op, DAG);
8400 case ISD::GET_FPENV:
8401 return lowerGET_FPENV(Op, DAG);
8402 case ISD::SET_FPENV:
8403 return lowerSET_FPENV(Op, DAG);
8404 case ISD::RESET_FPENV:
8405 return lowerRESET_FPENV(Op, DAG);
8406 case ISD::GET_FPMODE:
8407 return lowerGET_FPMODE(Op, DAG);
8408 case ISD::SET_FPMODE:
8409 return lowerSET_FPMODE(Op, DAG);
8410 case ISD::RESET_FPMODE:
8411 return lowerRESET_FPMODE(Op, DAG);
8412 case ISD::EH_DWARF_CFA:
8413 return lowerEH_DWARF_CFA(Op, DAG);
8414 case ISD::VP_MERGE:
8415 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8416 return lowerVPMergeMask(Op, DAG);
8417 [[fallthrough]];
8418 case ISD::VP_SELECT:
8419 case ISD::VP_ADD:
8420 case ISD::VP_SUB:
8421 case ISD::VP_MUL:
8422 case ISD::VP_SDIV:
8423 case ISD::VP_UDIV:
8424 case ISD::VP_SREM:
8425 case ISD::VP_UREM:
8426 case ISD::VP_UADDSAT:
8427 case ISD::VP_USUBSAT:
8428 case ISD::VP_SADDSAT:
8429 case ISD::VP_SSUBSAT:
8430 case ISD::VP_LRINT:
8431 case ISD::VP_LLRINT:
8432 return lowerVPOp(Op, DAG);
8433 case ISD::VP_AND:
8434 case ISD::VP_OR:
8435 case ISD::VP_XOR:
8436 return lowerLogicVPOp(Op, DAG);
8437 case ISD::VP_FADD:
8438 case ISD::VP_FSUB:
8439 case ISD::VP_FMUL:
8440 case ISD::VP_FDIV:
8441 case ISD::VP_FNEG:
8442 case ISD::VP_FABS:
8443 case ISD::VP_SQRT:
8444 case ISD::VP_FMA:
8445 case ISD::VP_FMINNUM:
8446 case ISD::VP_FMAXNUM:
8447 case ISD::VP_FCOPYSIGN:
8448 if (isPromotedOpNeedingSplit(Op, Subtarget))
8449 return SplitVPOp(Op, DAG);
8450 [[fallthrough]];
8451 case ISD::VP_SRA:
8452 case ISD::VP_SRL:
8453 case ISD::VP_SHL:
8454 return lowerVPOp(Op, DAG);
8455 case ISD::VP_IS_FPCLASS:
8456 return LowerIS_FPCLASS(Op, DAG);
8457 case ISD::VP_SIGN_EXTEND:
8458 case ISD::VP_ZERO_EXTEND:
8459 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8460 return lowerVPExtMaskOp(Op, DAG);
8461 return lowerVPOp(Op, DAG);
8462 case ISD::VP_TRUNCATE:
8463 return lowerVectorTruncLike(Op, DAG);
8464 case ISD::VP_FP_EXTEND:
8465 case ISD::VP_FP_ROUND:
8466 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8467 case ISD::VP_SINT_TO_FP:
8468 case ISD::VP_UINT_TO_FP:
8469 if (Op.getValueType().isVector() &&
8470 ((Op.getValueType().getScalarType() == MVT::f16 &&
8471 (Subtarget.hasVInstructionsF16Minimal() &&
8472 !Subtarget.hasVInstructionsF16())) ||
8473 Op.getValueType().getScalarType() == MVT::bf16)) {
8474 if (isPromotedOpNeedingSplit(Op, Subtarget))
8475 return SplitVectorOp(Op, DAG);
8476 // int -> f32
8477 SDLoc DL(Op);
8478 MVT NVT =
8479 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8480 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8481 // f32 -> [b]f16
8482 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8483 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8484 }
8485 [[fallthrough]];
8486 case ISD::VP_FP_TO_SINT:
8487 case ISD::VP_FP_TO_UINT:
8488 if (SDValue Op1 = Op.getOperand(0);
8489 Op1.getValueType().isVector() &&
8490 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8491 (Subtarget.hasVInstructionsF16Minimal() &&
8492 !Subtarget.hasVInstructionsF16())) ||
8493 Op1.getValueType().getScalarType() == MVT::bf16)) {
8494 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8495 return SplitVectorOp(Op, DAG);
8496 // [b]f16 -> f32
8497 SDLoc DL(Op);
8498 MVT NVT = MVT::getVectorVT(MVT::f32,
8499 Op1.getValueType().getVectorElementCount());
8500 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8501 // f32 -> int
8502 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8503 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8504 }
8505 return lowerVPFPIntConvOp(Op, DAG);
8506 case ISD::VP_SETCC:
8507 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8508 return SplitVPOp(Op, DAG);
8509 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8510 return lowerVPSetCCMaskOp(Op, DAG);
8511 [[fallthrough]];
8512 case ISD::VP_SMIN:
8513 case ISD::VP_SMAX:
8514 case ISD::VP_UMIN:
8515 case ISD::VP_UMAX:
8516 case ISD::VP_BITREVERSE:
8517 case ISD::VP_BSWAP:
8518 return lowerVPOp(Op, DAG);
8519 case ISD::VP_CTLZ:
8520 case ISD::VP_CTLZ_ZERO_UNDEF:
8521 if (Subtarget.hasStdExtZvbb())
8522 return lowerVPOp(Op, DAG);
8523 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8524 case ISD::VP_CTTZ:
8525 case ISD::VP_CTTZ_ZERO_UNDEF:
8526 if (Subtarget.hasStdExtZvbb())
8527 return lowerVPOp(Op, DAG);
8528 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8529 case ISD::VP_CTPOP:
8530 return lowerVPOp(Op, DAG);
8531 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8532 return lowerVPStridedLoad(Op, DAG);
8533 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8534 return lowerVPStridedStore(Op, DAG);
8535 case ISD::VP_FCEIL:
8536 case ISD::VP_FFLOOR:
8537 case ISD::VP_FRINT:
8538 case ISD::VP_FNEARBYINT:
8539 case ISD::VP_FROUND:
8540 case ISD::VP_FROUNDEVEN:
8541 case ISD::VP_FROUNDTOZERO:
8542 if (isPromotedOpNeedingSplit(Op, Subtarget))
8543 return SplitVPOp(Op, DAG);
8544 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8545 case ISD::VP_FMAXIMUM:
8546 case ISD::VP_FMINIMUM:
8547 if (isPromotedOpNeedingSplit(Op, Subtarget))
8548 return SplitVPOp(Op, DAG);
8549 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8550 case ISD::EXPERIMENTAL_VP_SPLICE:
8551 return lowerVPSpliceExperimental(Op, DAG);
8552 case ISD::EXPERIMENTAL_VP_REVERSE:
8553 return lowerVPReverseExperimental(Op, DAG);
8554 case ISD::EXPERIMENTAL_VP_SPLAT:
8555 return lowerVPSplatExperimental(Op, DAG);
8556 case ISD::CLEAR_CACHE: {
8557 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8558 "llvm.clear_cache only needs custom lower on Linux targets");
8559 SDLoc DL(Op);
8560 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8561 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8562 Op.getOperand(2), Flags, DL);
8563 }
8564 case ISD::DYNAMIC_STACKALLOC:
8565 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8566 case ISD::INIT_TRAMPOLINE:
8567 return lowerINIT_TRAMPOLINE(Op, DAG);
8568 case ISD::ADJUST_TRAMPOLINE:
8569 return lowerADJUST_TRAMPOLINE(Op, DAG);
8570 case ISD::PARTIAL_REDUCE_UMLA:
8571 case ISD::PARTIAL_REDUCE_SMLA:
8572 case ISD::PARTIAL_REDUCE_SUMLA:
8573 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8574 }
8575}
8576
8577SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8578 SDValue Start, SDValue End,
8579 SDValue Flags, SDLoc DL) const {
8580 MakeLibCallOptions CallOptions;
8581 std::pair<SDValue, SDValue> CallResult =
8582 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8583 {Start, End, Flags}, CallOptions, DL, InChain);
8584
8585 // This function returns void so only the out chain matters.
8586 return CallResult.second;
8587}
8588
8589SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8590 SelectionDAG &DAG) const {
8591 if (!Subtarget.is64Bit())
8592 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8593
8594 // Create an MCCodeEmitter to encode instructions.
8595 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8596 assert(TLO);
8597 MCContext &MCCtx = TLO->getContext();
8598
8599 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8600 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8601
8602 SDValue Root = Op.getOperand(0);
8603 SDValue Trmp = Op.getOperand(1); // trampoline
8604 SDLoc dl(Op);
8605
8606 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8607
8608 // We store in the trampoline buffer the following instructions and data.
8609 // Offset:
8610 // 0: auipc t2, 0
8611 // 4: ld t0, 24(t2)
8612 // 8: ld t2, 16(t2)
8613 // 12: jalr t0
8614 // 16: <StaticChainOffset>
8615 // 24: <FunctionAddressOffset>
8616 // 32:
8617 // Offset with branch control flow protection enabled:
8618 // 0: lpad <imm20>
8619 // 4: auipc t3, 0
8620 // 8: ld t2, 28(t3)
8621 // 12: ld t3, 20(t3)
8622 // 16: jalr t2
8623 // 20: <StaticChainOffset>
8624 // 28: <FunctionAddressOffset>
8625 // 36:
8626
8627 const bool HasCFBranch =
8628 Subtarget.hasStdExtZicfilp() &&
8630 "cf-protection-branch");
8631 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8632 const unsigned StaticChainOffset = StaticChainIdx * 4;
8633 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8634
8635 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8636 assert(STI);
8637 auto GetEncoding = [&](const MCInst &MC) {
8640 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8641 uint32_t Encoding = support::endian::read32le(CB.data());
8642 return Encoding;
8643 };
8644
8645 SmallVector<SDValue> OutChains;
8646
8647 SmallVector<uint32_t> Encodings;
8648 if (!HasCFBranch) {
8649 Encodings.append(
8650 {// auipc t2, 0
8651 // Loads the current PC into t2.
8652 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8653 // ld t0, 24(t2)
8654 // Loads the function address into t0. Note that we are using offsets
8655 // pc-relative to the first instruction of the trampoline.
8656 GetEncoding(MCInstBuilder(RISCV::LD)
8657 .addReg(RISCV::X5)
8658 .addReg(RISCV::X7)
8659 .addImm(FunctionAddressOffset)),
8660 // ld t2, 16(t2)
8661 // Load the value of the static chain.
8662 GetEncoding(MCInstBuilder(RISCV::LD)
8663 .addReg(RISCV::X7)
8664 .addReg(RISCV::X7)
8665 .addImm(StaticChainOffset)),
8666 // jalr t0
8667 // Jump to the function.
8668 GetEncoding(MCInstBuilder(RISCV::JALR)
8669 .addReg(RISCV::X0)
8670 .addReg(RISCV::X5)
8671 .addImm(0))});
8672 } else {
8673 Encodings.append(
8674 {// auipc x0, <imm20> (lpad <imm20>)
8675 // Landing pad.
8676 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8677 // auipc t3, 0
8678 // Loads the current PC into t3.
8679 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8680 // ld t2, (FunctionAddressOffset - 4)(t3)
8681 // Loads the function address into t2. Note that we are using offsets
8682 // pc-relative to the SECOND instruction of the trampoline.
8683 GetEncoding(MCInstBuilder(RISCV::LD)
8684 .addReg(RISCV::X7)
8685 .addReg(RISCV::X28)
8686 .addImm(FunctionAddressOffset - 4)),
8687 // ld t3, (StaticChainOffset - 4)(t3)
8688 // Load the value of the static chain.
8689 GetEncoding(MCInstBuilder(RISCV::LD)
8690 .addReg(RISCV::X28)
8691 .addReg(RISCV::X28)
8692 .addImm(StaticChainOffset - 4)),
8693 // jalr t2
8694 // Software-guarded jump to the function.
8695 GetEncoding(MCInstBuilder(RISCV::JALR)
8696 .addReg(RISCV::X0)
8697 .addReg(RISCV::X7)
8698 .addImm(0))});
8699 }
8700
8701 // Store encoded instructions.
8702 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8703 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8704 DAG.getConstant(Idx * 4, dl, MVT::i64))
8705 : Trmp;
8706 OutChains.push_back(DAG.getTruncStore(
8707 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8708 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8709 }
8710
8711 // Now store the variable part of the trampoline.
8712 SDValue FunctionAddress = Op.getOperand(2);
8713 SDValue StaticChain = Op.getOperand(3);
8714
8715 // Store the given static chain and function pointer in the trampoline buffer.
8716 struct OffsetValuePair {
8717 const unsigned Offset;
8718 const SDValue Value;
8719 SDValue Addr = SDValue(); // Used to cache the address.
8720 } OffsetValues[] = {
8721 {StaticChainOffset, StaticChain},
8722 {FunctionAddressOffset, FunctionAddress},
8723 };
8724 for (auto &OffsetValue : OffsetValues) {
8725 SDValue Addr =
8726 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8727 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8728 OffsetValue.Addr = Addr;
8729 OutChains.push_back(
8730 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8731 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8732 }
8733
8734 assert(OutChains.size() == StaticChainIdx + 2 &&
8735 "Size of OutChains mismatch");
8736 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8737
8738 // The end of instructions of trampoline is the same as the static chain
8739 // address that we computed earlier.
8740 SDValue EndOfTrmp = OffsetValues[0].Addr;
8741
8742 // Call clear cache on the trampoline instructions.
8743 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8744 Trmp, EndOfTrmp);
8745
8746 return Chain;
8747}
8748
8749SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8750 SelectionDAG &DAG) const {
8751 if (!Subtarget.is64Bit())
8752 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8753
8754 return Op.getOperand(0);
8755}
8756
8757SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8758 SelectionDAG &DAG) const {
8759 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8760 // TODO: There are many other sub-cases we could potentially lower, are
8761 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8762 SDLoc DL(Op);
8763 MVT VT = Op.getSimpleValueType();
8764 SDValue Accum = Op.getOperand(0);
8765 assert(Accum.getSimpleValueType() == VT &&
8766 VT.getVectorElementType() == MVT::i32);
8767 SDValue A = Op.getOperand(1);
8768 SDValue B = Op.getOperand(2);
8769 MVT ArgVT = A.getSimpleValueType();
8770 assert(ArgVT == B.getSimpleValueType() &&
8771 ArgVT.getVectorElementType() == MVT::i8);
8772 (void)ArgVT;
8773
8774 // The zvqdotq pseudos are defined with sources and destination both
8775 // being i32. This cast is needed for correctness to avoid incorrect
8776 // .vx matching of i8 splats.
8777 A = DAG.getBitcast(VT, A);
8778 B = DAG.getBitcast(VT, B);
8779
8780 MVT ContainerVT = VT;
8781 if (VT.isFixedLengthVector()) {
8782 ContainerVT = getContainerForFixedLengthVector(VT);
8783 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8784 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8785 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8786 }
8787
8788 unsigned Opc;
8789 switch (Op.getOpcode()) {
8790 case ISD::PARTIAL_REDUCE_SMLA:
8791 Opc = RISCVISD::VQDOT_VL;
8792 break;
8793 case ISD::PARTIAL_REDUCE_UMLA:
8794 Opc = RISCVISD::VQDOTU_VL;
8795 break;
8796 case ISD::PARTIAL_REDUCE_SUMLA:
8797 Opc = RISCVISD::VQDOTSU_VL;
8798 break;
8799 default:
8800 llvm_unreachable("Unexpected opcode");
8801 }
8802 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8803 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8804 if (VT.isFixedLengthVector())
8805 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8806 return Res;
8807}
8808
8810 SelectionDAG &DAG, unsigned Flags) {
8811 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8812}
8813
8815 SelectionDAG &DAG, unsigned Flags) {
8816 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8817 Flags);
8818}
8819
8821 SelectionDAG &DAG, unsigned Flags) {
8822 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8823 N->getOffset(), Flags);
8824}
8825
8827 SelectionDAG &DAG, unsigned Flags) {
8828 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8829}
8830
8832 EVT Ty, SelectionDAG &DAG) {
8834 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8835 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8836 return DAG.getLoad(
8837 Ty, DL, DAG.getEntryNode(), LC,
8839}
8840
8842 EVT Ty, SelectionDAG &DAG) {
8844 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8845 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8846 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8847 return DAG.getLoad(
8848 Ty, DL, DAG.getEntryNode(), LC,
8850}
8851
8852template <class NodeTy>
8853SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8854 bool IsLocal, bool IsExternWeak) const {
8855 SDLoc DL(N);
8856 EVT Ty = getPointerTy(DAG.getDataLayout());
8857
8858 // When HWASAN is used and tagging of global variables is enabled
8859 // they should be accessed via the GOT, since the tagged address of a global
8860 // is incompatible with existing code models. This also applies to non-pic
8861 // mode.
8862 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8863 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8864 if (IsLocal && !Subtarget.allowTaggedGlobals())
8865 // Use PC-relative addressing to access the symbol. This generates the
8866 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8867 // %pcrel_lo(auipc)).
8868 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8869
8870 // Use PC-relative addressing to access the GOT for this symbol, then load
8871 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8872 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8873 SDValue Load =
8874 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8875 MachineFunction &MF = DAG.getMachineFunction();
8876 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8880 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8881 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8882 return Load;
8883 }
8884
8885 switch (getTargetMachine().getCodeModel()) {
8886 default:
8887 reportFatalUsageError("Unsupported code model for lowering");
8888 case CodeModel::Small: {
8889 // Generate a sequence for accessing addresses within the first 2 GiB of
8890 // address space.
8891 if (Subtarget.hasVendorXqcili()) {
8892 // Use QC.E.LI to generate the address, as this is easier to relax than
8893 // LUI/ADDI.
8894 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8895 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8896 }
8897
8898 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8899 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8900 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8901 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8902 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8903 }
8904 case CodeModel::Medium: {
8905 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8906 if (IsExternWeak) {
8907 // An extern weak symbol may be undefined, i.e. have value 0, which may
8908 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8909 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8910 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8911 SDValue Load =
8912 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8913 MachineFunction &MF = DAG.getMachineFunction();
8914 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8918 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8919 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8920 return Load;
8921 }
8922
8923 // Generate a sequence for accessing addresses within any 2GiB range within
8924 // the address space. This generates the pattern (PseudoLLA sym), which
8925 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8926 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8927 }
8928 case CodeModel::Large: {
8929 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8930 return getLargeGlobalAddress(G, DL, Ty, DAG);
8931
8932 // Using pc-relative mode for other node type.
8933 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8934 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8935 }
8936 }
8937}
8938
8939SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8940 SelectionDAG &DAG) const {
8941 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8942 assert(N->getOffset() == 0 && "unexpected offset in global node");
8943 const GlobalValue *GV = N->getGlobal();
8944 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8945}
8946
8947SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8948 SelectionDAG &DAG) const {
8949 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8950
8951 return getAddr(N, DAG);
8952}
8953
8954SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8955 SelectionDAG &DAG) const {
8956 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8957
8958 return getAddr(N, DAG);
8959}
8960
8961SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8962 SelectionDAG &DAG) const {
8963 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8964
8965 return getAddr(N, DAG);
8966}
8967
8968SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8969 SelectionDAG &DAG,
8970 bool UseGOT) const {
8971 SDLoc DL(N);
8972 EVT Ty = getPointerTy(DAG.getDataLayout());
8973 const GlobalValue *GV = N->getGlobal();
8974 MVT XLenVT = Subtarget.getXLenVT();
8975
8976 if (UseGOT) {
8977 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8978 // load the address from the GOT and add the thread pointer. This generates
8979 // the pattern (PseudoLA_TLS_IE sym), which expands to
8980 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8981 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8982 SDValue Load =
8983 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8984 MachineFunction &MF = DAG.getMachineFunction();
8985 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8989 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8990 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8991
8992 // Add the thread pointer.
8993 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8994 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8995 }
8996
8997 // Generate a sequence for accessing the address relative to the thread
8998 // pointer, with the appropriate adjustment for the thread pointer offset.
8999 // This generates the pattern
9000 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9001 SDValue AddrHi =
9003 SDValue AddrAdd =
9005 SDValue AddrLo =
9007
9008 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9009 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9010 SDValue MNAdd =
9011 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9012 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9013}
9014
9015SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9016 SelectionDAG &DAG) const {
9017 SDLoc DL(N);
9018 EVT Ty = getPointerTy(DAG.getDataLayout());
9019 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9020 const GlobalValue *GV = N->getGlobal();
9021
9022 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9023 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9024 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9025 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9026 SDValue Load =
9027 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9028
9029 // Prepare argument list to generate call.
9031 Args.emplace_back(Load, CallTy);
9032
9033 // Setup call to __tls_get_addr.
9034 TargetLowering::CallLoweringInfo CLI(DAG);
9035 CLI.setDebugLoc(DL)
9036 .setChain(DAG.getEntryNode())
9037 .setLibCallee(CallingConv::C, CallTy,
9038 DAG.getExternalSymbol("__tls_get_addr", Ty),
9039 std::move(Args));
9040
9041 return LowerCallTo(CLI).first;
9042}
9043
9044SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9045 SelectionDAG &DAG) const {
9046 SDLoc DL(N);
9047 EVT Ty = getPointerTy(DAG.getDataLayout());
9048 const GlobalValue *GV = N->getGlobal();
9049
9050 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9051 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9052 //
9053 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9054 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9055 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9056 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9057 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9058 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9059}
9060
9061SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9062 SelectionDAG &DAG) const {
9063 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9064 assert(N->getOffset() == 0 && "unexpected offset in global node");
9065
9066 if (DAG.getTarget().useEmulatedTLS())
9067 return LowerToTLSEmulatedModel(N, DAG);
9068
9070
9073 reportFatalUsageError("In GHC calling convention TLS is not supported");
9074
9075 SDValue Addr;
9076 switch (Model) {
9078 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9079 break;
9081 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9082 break;
9085 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9086 : getDynamicTLSAddr(N, DAG);
9087 break;
9088 }
9089
9090 return Addr;
9091}
9092
9093// Return true if Val is equal to (setcc LHS, RHS, CC).
9094// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9095// Otherwise, return std::nullopt.
9096static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9097 ISD::CondCode CC, SDValue Val) {
9098 assert(Val->getOpcode() == ISD::SETCC);
9099 SDValue LHS2 = Val.getOperand(0);
9100 SDValue RHS2 = Val.getOperand(1);
9101 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9102
9103 if (LHS == LHS2 && RHS == RHS2) {
9104 if (CC == CC2)
9105 return true;
9106 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9107 return false;
9108 } else if (LHS == RHS2 && RHS == LHS2) {
9110 if (CC == CC2)
9111 return true;
9112 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9113 return false;
9114 }
9115
9116 return std::nullopt;
9117}
9118
9120 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9121}
9122
9124 const RISCVSubtarget &Subtarget) {
9125 SDValue CondV = N->getOperand(0);
9126 SDValue TrueV = N->getOperand(1);
9127 SDValue FalseV = N->getOperand(2);
9128 MVT VT = N->getSimpleValueType(0);
9129 SDLoc DL(N);
9130
9131 if (!Subtarget.hasConditionalMoveFusion()) {
9132 // (select c, -1, y) -> -c | y
9133 if (isAllOnesConstant(TrueV)) {
9134 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9135 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9136 }
9137 // (select c, y, -1) -> (c-1) | y
9138 if (isAllOnesConstant(FalseV)) {
9139 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9140 DAG.getAllOnesConstant(DL, VT));
9141 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9142 }
9143
9144 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9145
9146 // (select c, 0, y) -> (c-1) & y
9147 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9148 SDValue Neg =
9149 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9150 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9151 }
9152 // (select c, y, 0) -> -c & y
9153 if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) {
9154 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9155 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9156 }
9157 }
9158
9159 // select c, ~x, x --> xor -c, x
9160 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9161 const APInt &TrueVal = TrueV->getAsAPIntVal();
9162 const APInt &FalseVal = FalseV->getAsAPIntVal();
9163 if (~TrueVal == FalseVal) {
9164 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9165 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9166 }
9167 }
9168
9169 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9170 // when both truev and falsev are also setcc.
9171 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9172 FalseV.getOpcode() == ISD::SETCC) {
9173 SDValue LHS = CondV.getOperand(0);
9174 SDValue RHS = CondV.getOperand(1);
9175 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9176
9177 // (select x, x, y) -> x | y
9178 // (select !x, x, y) -> x & y
9179 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9180 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9181 DAG.getFreeze(FalseV));
9182 }
9183 // (select x, y, x) -> x & y
9184 // (select !x, y, x) -> x | y
9185 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9186 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9187 DAG.getFreeze(TrueV), FalseV);
9188 }
9189 }
9190
9191 return SDValue();
9192}
9193
9194// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9195// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9196// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9197// being `0` or `-1`. In such cases we can replace `select` with `and`.
9198// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9199// than `c0`?
9200static SDValue
9202 const RISCVSubtarget &Subtarget) {
9203 if (Subtarget.hasShortForwardBranchOpt())
9204 return SDValue();
9205
9206 unsigned SelOpNo = 0;
9207 SDValue Sel = BO->getOperand(0);
9208 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9209 SelOpNo = 1;
9210 Sel = BO->getOperand(1);
9211 }
9212
9213 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9214 return SDValue();
9215
9216 unsigned ConstSelOpNo = 1;
9217 unsigned OtherSelOpNo = 2;
9218 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9219 ConstSelOpNo = 2;
9220 OtherSelOpNo = 1;
9221 }
9222 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9223 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9224 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9225 return SDValue();
9226
9227 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9228 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9229 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9230 return SDValue();
9231
9232 SDLoc DL(Sel);
9233 EVT VT = BO->getValueType(0);
9234
9235 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9236 if (SelOpNo == 1)
9237 std::swap(NewConstOps[0], NewConstOps[1]);
9238
9239 SDValue NewConstOp =
9240 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9241 if (!NewConstOp)
9242 return SDValue();
9243
9244 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9245 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9246 return SDValue();
9247
9248 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9249 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9250 if (SelOpNo == 1)
9251 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9252 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9253
9254 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9255 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9256 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9257}
9258
9259SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9260 SDValue CondV = Op.getOperand(0);
9261 SDValue TrueV = Op.getOperand(1);
9262 SDValue FalseV = Op.getOperand(2);
9263 SDLoc DL(Op);
9264 MVT VT = Op.getSimpleValueType();
9265 MVT XLenVT = Subtarget.getXLenVT();
9266
9267 // Lower vector SELECTs to VSELECTs by splatting the condition.
9268 if (VT.isVector()) {
9269 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9270 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9271 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9272 }
9273
9274 // Try some other optimizations before falling back to generic lowering.
9275 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9276 return V;
9277
9278 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9279 // nodes to implement the SELECT. Performing the lowering here allows for
9280 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9281 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9282 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9283
9284 // (select c, t, 0) -> (czero_eqz t, c)
9285 if (isNullConstant(FalseV))
9286 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9287 // (select c, 0, f) -> (czero_nez f, c)
9288 if (isNullConstant(TrueV))
9289 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9290
9291 // Check to see if a given operation is a 'NOT', if so return the negated
9292 // operand
9293 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9294 using namespace llvm::SDPatternMatch;
9295 SDValue Xor;
9296 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9297 return Xor;
9298 }
9299 return std::nullopt;
9300 };
9301 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9302 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9303 if (TrueV.getOpcode() == ISD::AND &&
9304 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9305 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9306 ? getNotOperand(TrueV.getOperand(1))
9307 : getNotOperand(TrueV.getOperand(0));
9308 if (NotOperand) {
9309 SDValue CMOV =
9310 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9311 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9312 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9313 }
9314 return DAG.getNode(
9315 ISD::OR, DL, VT, TrueV,
9316 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9317 }
9318
9319 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9320 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9321 if (FalseV.getOpcode() == ISD::AND &&
9322 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9323 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9324 ? getNotOperand(FalseV.getOperand(1))
9325 : getNotOperand(FalseV.getOperand(0));
9326 if (NotOperand) {
9327 SDValue CMOV =
9328 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9329 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9330 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9331 }
9332 return DAG.getNode(
9333 ISD::OR, DL, VT, FalseV,
9334 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9335 }
9336
9337 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9338 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9339 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9340 const APInt &TrueVal = TrueV->getAsAPIntVal();
9341 const APInt &FalseVal = FalseV->getAsAPIntVal();
9342
9343 // Prefer these over Zicond to avoid materializing an immediate:
9344 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9345 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9346 if (CondV.getOpcode() == ISD::SETCC &&
9347 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9348 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9349 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9350 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9351 int64_t TrueImm = TrueVal.getSExtValue();
9352 int64_t FalseImm = FalseVal.getSExtValue();
9353 if (CCVal == ISD::SETGT)
9354 std::swap(TrueImm, FalseImm);
9355 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9356 isInt<12>(TrueImm - FalseImm)) {
9357 SDValue SRA =
9358 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9359 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9360 SDValue AND =
9361 DAG.getNode(ISD::AND, DL, VT, SRA,
9362 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9363 return DAG.getNode(ISD::ADD, DL, VT, AND,
9364 DAG.getSignedConstant(FalseImm, DL, VT));
9365 }
9366 }
9367 }
9368
9369 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9370 // a constant in register
9371 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9372 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9373 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9374 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9375 }
9376 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9377 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9378 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9379 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9380 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9381 }
9382
9383 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9384 const int DeltaCost = RISCVMatInt::getIntMatCost(
9385 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9386 // Does the addend fold into an ADDI
9387 if (Addend.isSignedIntN(12))
9388 return DeltaCost;
9389 const int AddendCost = RISCVMatInt::getIntMatCost(
9390 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9391 return AddendCost + DeltaCost;
9392 };
9393 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9394 getCost(TrueVal - FalseVal, FalseVal);
9395 SDValue LHSVal = DAG.getConstant(
9396 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9397 SDValue CMOV =
9398 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9399 DL, VT, LHSVal, CondV);
9400 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9401 }
9402
9403 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9404 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9405 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9406 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9407 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9408 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9409 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9410 // Fall back to XORI if Const == -0x800
9411 if (RawConstVal == -0x800) {
9412 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9413 SDValue CMOV =
9414 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9415 DL, VT, XorOp, CondV);
9416 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9417 }
9418 // Efficient only if the constant and its negation fit into `ADDI`
9419 // Prefer Add/Sub over Xor since can be compressed for small immediates
9420 if (isInt<12>(RawConstVal)) {
9421 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9422 SDValue CMOV =
9423 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9424 DL, VT, SubOp, CondV);
9425 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9426 }
9427 }
9428
9429 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9430 // Unless we have the short forward branch optimization.
9431 if (!Subtarget.hasConditionalMoveFusion())
9432 return DAG.getNode(
9433 ISD::OR, DL, VT,
9434 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9435 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9437 }
9438
9439 if (Op.hasOneUse()) {
9440 unsigned UseOpc = Op->user_begin()->getOpcode();
9441 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9442 SDNode *BinOp = *Op->user_begin();
9443 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9444 DAG, Subtarget)) {
9445 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9446 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9447 // may return a constant node and cause crash in lowerSELECT.
9448 if (NewSel.getOpcode() == ISD::SELECT)
9449 return lowerSELECT(NewSel, DAG);
9450 return NewSel;
9451 }
9452 }
9453 }
9454
9455 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9456 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9457 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9458 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9459 if (FPTV && FPFV) {
9460 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9461 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9462 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9463 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9464 DAG.getConstant(1, DL, XLenVT));
9465 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9466 }
9467 }
9468
9469 // If the condition is not an integer SETCC which operates on XLenVT, we need
9470 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9471 // (select condv, truev, falsev)
9472 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9473 if (CondV.getOpcode() != ISD::SETCC ||
9474 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9475 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9476 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9477
9478 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9479
9480 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9481 }
9482
9483 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9484 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9485 // advantage of the integer compare+branch instructions. i.e.:
9486 // (select (setcc lhs, rhs, cc), truev, falsev)
9487 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9488 SDValue LHS = CondV.getOperand(0);
9489 SDValue RHS = CondV.getOperand(1);
9490 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9491
9492 // Special case for a select of 2 constants that have a difference of 1.
9493 // Normally this is done by DAGCombine, but if the select is introduced by
9494 // type legalization or op legalization, we miss it. Restricting to SETLT
9495 // case for now because that is what signed saturating add/sub need.
9496 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9497 // but we would probably want to swap the true/false values if the condition
9498 // is SETGE/SETLE to avoid an XORI.
9499 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9500 CCVal == ISD::SETLT) {
9501 const APInt &TrueVal = TrueV->getAsAPIntVal();
9502 const APInt &FalseVal = FalseV->getAsAPIntVal();
9503 if (TrueVal - 1 == FalseVal)
9504 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9505 if (TrueVal + 1 == FalseVal)
9506 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9507 }
9508
9509 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9510 // 1 < x ? x : 1 -> 0 < x ? x : 1
9511 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9512 RHS == TrueV && LHS == FalseV) {
9513 LHS = DAG.getConstant(0, DL, VT);
9514 // 0 <u x is the same as x != 0.
9515 if (CCVal == ISD::SETULT) {
9516 std::swap(LHS, RHS);
9517 CCVal = ISD::SETNE;
9518 }
9519 }
9520
9521 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9522 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9523 RHS == FalseV) {
9524 RHS = DAG.getConstant(0, DL, VT);
9525 }
9526
9527 SDValue TargetCC = DAG.getCondCode(CCVal);
9528
9529 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9530 // (select (setcc lhs, rhs, CC), constant, falsev)
9531 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9532 std::swap(TrueV, FalseV);
9533 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9534 }
9535
9536 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9537 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9538}
9539
9540SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9541 SDValue CondV = Op.getOperand(1);
9542 SDLoc DL(Op);
9543 MVT XLenVT = Subtarget.getXLenVT();
9544
9545 if (CondV.getOpcode() == ISD::SETCC &&
9546 CondV.getOperand(0).getValueType() == XLenVT) {
9547 SDValue LHS = CondV.getOperand(0);
9548 SDValue RHS = CondV.getOperand(1);
9549 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9550
9551 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9552
9553 SDValue TargetCC = DAG.getCondCode(CCVal);
9554 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9555 LHS, RHS, TargetCC, Op.getOperand(2));
9556 }
9557
9558 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9559 CondV, DAG.getConstant(0, DL, XLenVT),
9560 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9561}
9562
9563SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9564 MachineFunction &MF = DAG.getMachineFunction();
9565 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9566
9567 SDLoc DL(Op);
9568 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9570
9571 // vastart just stores the address of the VarArgsFrameIndex slot into the
9572 // memory location argument.
9573 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9574 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9575 MachinePointerInfo(SV));
9576}
9577
9578SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9579 SelectionDAG &DAG) const {
9580 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9581 MachineFunction &MF = DAG.getMachineFunction();
9582 MachineFrameInfo &MFI = MF.getFrameInfo();
9583 MFI.setFrameAddressIsTaken(true);
9584 Register FrameReg = RI.getFrameRegister(MF);
9585 int XLenInBytes = Subtarget.getXLen() / 8;
9586
9587 EVT VT = Op.getValueType();
9588 SDLoc DL(Op);
9589 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9590 unsigned Depth = Op.getConstantOperandVal(0);
9591 while (Depth--) {
9592 int Offset = -(XLenInBytes * 2);
9593 SDValue Ptr = DAG.getNode(
9594 ISD::ADD, DL, VT, FrameAddr,
9596 FrameAddr =
9597 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9598 }
9599 return FrameAddr;
9600}
9601
9602SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9603 SelectionDAG &DAG) const {
9604 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9605 MachineFunction &MF = DAG.getMachineFunction();
9606 MachineFrameInfo &MFI = MF.getFrameInfo();
9607 MFI.setReturnAddressIsTaken(true);
9608 MVT XLenVT = Subtarget.getXLenVT();
9609 int XLenInBytes = Subtarget.getXLen() / 8;
9610
9611 EVT VT = Op.getValueType();
9612 SDLoc DL(Op);
9613 unsigned Depth = Op.getConstantOperandVal(0);
9614 if (Depth) {
9615 int Off = -XLenInBytes;
9616 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9617 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9618 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9619 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9620 MachinePointerInfo());
9621 }
9622
9623 // Return the value of the return address register, marking it an implicit
9624 // live-in.
9625 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9626 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9627}
9628
9629SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9630 SelectionDAG &DAG) const {
9631 SDLoc DL(Op);
9632 SDValue Lo = Op.getOperand(0);
9633 SDValue Hi = Op.getOperand(1);
9634 SDValue Shamt = Op.getOperand(2);
9635 EVT VT = Lo.getValueType();
9636
9637 // if Shamt-XLEN < 0: // Shamt < XLEN
9638 // Lo = Lo << Shamt
9639 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9640 // else:
9641 // Lo = 0
9642 // Hi = Lo << (Shamt-XLEN)
9643
9644 SDValue Zero = DAG.getConstant(0, DL, VT);
9645 SDValue One = DAG.getConstant(1, DL, VT);
9646 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9647 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9648 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9649 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9650
9651 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9652 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9653 SDValue ShiftRightLo =
9654 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9655 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9656 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9657 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9658
9659 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9660
9661 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9662 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9663
9664 SDValue Parts[2] = {Lo, Hi};
9665 return DAG.getMergeValues(Parts, DL);
9666}
9667
9668SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9669 bool IsSRA) const {
9670 SDLoc DL(Op);
9671 SDValue Lo = Op.getOperand(0);
9672 SDValue Hi = Op.getOperand(1);
9673 SDValue Shamt = Op.getOperand(2);
9674 EVT VT = Lo.getValueType();
9675
9676 // SRA expansion:
9677 // if Shamt-XLEN < 0: // Shamt < XLEN
9678 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9679 // Hi = Hi >>s Shamt
9680 // else:
9681 // Lo = Hi >>s (Shamt-XLEN);
9682 // Hi = Hi >>s (XLEN-1)
9683 //
9684 // SRL expansion:
9685 // if Shamt-XLEN < 0: // Shamt < XLEN
9686 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9687 // Hi = Hi >>u Shamt
9688 // else:
9689 // Lo = Hi >>u (Shamt-XLEN);
9690 // Hi = 0;
9691
9692 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9693
9694 SDValue Zero = DAG.getConstant(0, DL, VT);
9695 SDValue One = DAG.getConstant(1, DL, VT);
9696 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9697 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9698 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9699 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9700
9701 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9702 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9703 SDValue ShiftLeftHi =
9704 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9705 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9706 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9707 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9708 SDValue HiFalse =
9709 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9710
9711 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9712
9713 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9714 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9715
9716 SDValue Parts[2] = {Lo, Hi};
9717 return DAG.getMergeValues(Parts, DL);
9718}
9719
9720// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9721// legal equivalently-sized i8 type, so we can use that as a go-between.
9722SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9723 SelectionDAG &DAG) const {
9724 SDLoc DL(Op);
9725 MVT VT = Op.getSimpleValueType();
9726 SDValue SplatVal = Op.getOperand(0);
9727 // All-zeros or all-ones splats are handled specially.
9728 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9729 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9730 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9731 }
9732 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9733 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9734 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9735 }
9736 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9737 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9738 DAG.getConstant(1, DL, SplatVal.getValueType()));
9739 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9740 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9741 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9742}
9743
9744// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9745// illegal (currently only vXi64 RV32).
9746// FIXME: We could also catch non-constant sign-extended i32 values and lower
9747// them to VMV_V_X_VL.
9748SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9749 SelectionDAG &DAG) const {
9750 SDLoc DL(Op);
9751 MVT VecVT = Op.getSimpleValueType();
9752 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9753 "Unexpected SPLAT_VECTOR_PARTS lowering");
9754
9755 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9756 SDValue Lo = Op.getOperand(0);
9757 SDValue Hi = Op.getOperand(1);
9758
9759 MVT ContainerVT = VecVT;
9760 if (VecVT.isFixedLengthVector())
9761 ContainerVT = getContainerForFixedLengthVector(VecVT);
9762
9763 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9764
9765 SDValue Res =
9766 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9767
9768 if (VecVT.isFixedLengthVector())
9769 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9770
9771 return Res;
9772}
9773
9774// Custom-lower extensions from mask vectors by using a vselect either with 1
9775// for zero/any-extension or -1 for sign-extension:
9776// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9777// Note that any-extension is lowered identically to zero-extension.
9778SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9779 int64_t ExtTrueVal) const {
9780 SDLoc DL(Op);
9781 MVT VecVT = Op.getSimpleValueType();
9782 SDValue Src = Op.getOperand(0);
9783 // Only custom-lower extensions from mask types
9784 assert(Src.getValueType().isVector() &&
9785 Src.getValueType().getVectorElementType() == MVT::i1);
9786
9787 if (VecVT.isScalableVector()) {
9788 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9789 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9790 if (Src.getOpcode() == ISD::XOR &&
9791 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9792 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9793 SplatTrueVal);
9794 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9795 }
9796
9797 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9798 MVT I1ContainerVT =
9799 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9800
9801 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9802
9803 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9804
9805 MVT XLenVT = Subtarget.getXLenVT();
9806 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9807 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9808
9809 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9810 SDValue Xor = Src.getOperand(0);
9811 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9812 SDValue ScalableOnes = Xor.getOperand(1);
9813 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9814 ScalableOnes.getOperand(0).isUndef() &&
9816 ScalableOnes.getOperand(1).getNode())) {
9817 CC = Xor.getOperand(0);
9818 std::swap(SplatZero, SplatTrueVal);
9819 }
9820 }
9821 }
9822
9823 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9824 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9825 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9826 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9827 SDValue Select =
9828 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9829 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9830
9831 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9832}
9833
9834// Custom-lower truncations from vectors to mask vectors by using a mask and a
9835// setcc operation:
9836// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9837SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9838 SelectionDAG &DAG) const {
9839 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9840 SDLoc DL(Op);
9841 EVT MaskVT = Op.getValueType();
9842 // Only expect to custom-lower truncations to mask types
9843 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9844 "Unexpected type for vector mask lowering");
9845 SDValue Src = Op.getOperand(0);
9846 MVT VecVT = Src.getSimpleValueType();
9847 SDValue Mask, VL;
9848 if (IsVPTrunc) {
9849 Mask = Op.getOperand(1);
9850 VL = Op.getOperand(2);
9851 }
9852 // If this is a fixed vector, we need to convert it to a scalable vector.
9853 MVT ContainerVT = VecVT;
9854
9855 if (VecVT.isFixedLengthVector()) {
9856 ContainerVT = getContainerForFixedLengthVector(VecVT);
9857 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9858 if (IsVPTrunc) {
9859 MVT MaskContainerVT =
9860 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9861 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9862 }
9863 }
9864
9865 if (!IsVPTrunc) {
9866 std::tie(Mask, VL) =
9867 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9868 }
9869
9870 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9871 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9872
9873 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9874 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9875 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9876 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9877
9878 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9879 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9880 DAG.getUNDEF(ContainerVT), Mask, VL);
9881 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9882 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9883 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9884 if (MaskVT.isFixedLengthVector())
9885 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9886 return Trunc;
9887}
9888
9889SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9890 SelectionDAG &DAG) const {
9891 unsigned Opc = Op.getOpcode();
9892 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9893 SDLoc DL(Op);
9894
9895 MVT VT = Op.getSimpleValueType();
9896 // Only custom-lower vector truncates
9897 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9898
9899 // Truncates to mask types are handled differently
9900 if (VT.getVectorElementType() == MVT::i1)
9901 return lowerVectorMaskTruncLike(Op, DAG);
9902
9903 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9904 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9905 // truncate by one power of two at a time.
9906 MVT DstEltVT = VT.getVectorElementType();
9907
9908 SDValue Src = Op.getOperand(0);
9909 MVT SrcVT = Src.getSimpleValueType();
9910 MVT SrcEltVT = SrcVT.getVectorElementType();
9911
9912 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9913 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9914 "Unexpected vector truncate lowering");
9915
9916 MVT ContainerVT = SrcVT;
9917 SDValue Mask, VL;
9918 if (IsVPTrunc) {
9919 Mask = Op.getOperand(1);
9920 VL = Op.getOperand(2);
9921 }
9922 if (SrcVT.isFixedLengthVector()) {
9923 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9924 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9925 if (IsVPTrunc) {
9926 MVT MaskVT = getMaskTypeFor(ContainerVT);
9927 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9928 }
9929 }
9930
9931 SDValue Result = Src;
9932 if (!IsVPTrunc) {
9933 std::tie(Mask, VL) =
9934 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9935 }
9936
9937 unsigned NewOpc;
9939 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9940 else if (Opc == ISD::TRUNCATE_USAT_U)
9941 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9942 else
9943 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9944
9945 do {
9946 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9947 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9948 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9949 } while (SrcEltVT != DstEltVT);
9950
9951 if (SrcVT.isFixedLengthVector())
9952 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9953
9954 return Result;
9955}
9956
9957SDValue
9958RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9959 SelectionDAG &DAG) const {
9960 SDLoc DL(Op);
9961 SDValue Chain = Op.getOperand(0);
9962 SDValue Src = Op.getOperand(1);
9963 MVT VT = Op.getSimpleValueType();
9964 MVT SrcVT = Src.getSimpleValueType();
9965 MVT ContainerVT = VT;
9966 if (VT.isFixedLengthVector()) {
9967 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9968 ContainerVT =
9969 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9970 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9971 }
9972
9973 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9974
9975 // RVV can only widen/truncate fp to types double/half the size as the source.
9976 if ((VT.getVectorElementType() == MVT::f64 &&
9977 (SrcVT.getVectorElementType() == MVT::f16 ||
9978 SrcVT.getVectorElementType() == MVT::bf16)) ||
9979 ((VT.getVectorElementType() == MVT::f16 ||
9980 VT.getVectorElementType() == MVT::bf16) &&
9981 SrcVT.getVectorElementType() == MVT::f64)) {
9982 // For double rounding, the intermediate rounding should be round-to-odd.
9983 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9984 ? RISCVISD::STRICT_FP_EXTEND_VL
9985 : RISCVISD::STRICT_VFNCVT_ROD_VL;
9986 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9987 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9988 Chain, Src, Mask, VL);
9989 Chain = Src.getValue(1);
9990 }
9991
9992 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9993 ? RISCVISD::STRICT_FP_EXTEND_VL
9994 : RISCVISD::STRICT_FP_ROUND_VL;
9995 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
9996 Chain, Src, Mask, VL);
9997 if (VT.isFixedLengthVector()) {
9998 // StrictFP operations have two result values. Their lowered result should
9999 // have same result count.
10000 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10001 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10002 }
10003 return Res;
10004}
10005
10006SDValue
10007RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10008 SelectionDAG &DAG) const {
10009 bool IsVP =
10010 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10011 bool IsExtend =
10012 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10013 // RVV can only do truncate fp to types half the size as the source. We
10014 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10015 // conversion instruction.
10016 SDLoc DL(Op);
10017 MVT VT = Op.getSimpleValueType();
10018
10019 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10020
10021 SDValue Src = Op.getOperand(0);
10022 MVT SrcVT = Src.getSimpleValueType();
10023
10024 bool IsDirectExtend =
10025 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10026 (SrcVT.getVectorElementType() != MVT::f16 &&
10027 SrcVT.getVectorElementType() != MVT::bf16));
10028 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10029 VT.getVectorElementType() != MVT::bf16) ||
10030 SrcVT.getVectorElementType() != MVT::f64);
10031
10032 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10033
10034 // We have regular SD node patterns for direct non-VL extends.
10035 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10036 return Op;
10037
10038 // Prepare any fixed-length vector operands.
10039 MVT ContainerVT = VT;
10040 SDValue Mask, VL;
10041 if (IsVP) {
10042 Mask = Op.getOperand(1);
10043 VL = Op.getOperand(2);
10044 }
10045 if (VT.isFixedLengthVector()) {
10046 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10047 ContainerVT =
10048 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10049 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10050 if (IsVP) {
10051 MVT MaskVT = getMaskTypeFor(ContainerVT);
10052 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10053 }
10054 }
10055
10056 if (!IsVP)
10057 std::tie(Mask, VL) =
10058 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10059
10060 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10061
10062 if (IsDirectConv) {
10063 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10064 if (VT.isFixedLengthVector())
10065 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10066 return Src;
10067 }
10068
10069 unsigned InterConvOpc =
10070 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10071
10072 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10073 SDValue IntermediateConv =
10074 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10075 SDValue Result =
10076 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10077 if (VT.isFixedLengthVector())
10078 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10079 return Result;
10080}
10081
10082// Given a scalable vector type and an index into it, returns the type for the
10083// smallest subvector that the index fits in. This can be used to reduce LMUL
10084// for operations like vslidedown.
10085//
10086// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10087static std::optional<MVT>
10088getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10089 const RISCVSubtarget &Subtarget) {
10090 assert(VecVT.isScalableVector());
10091 const unsigned EltSize = VecVT.getScalarSizeInBits();
10092 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10093 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10094 MVT SmallerVT;
10095 if (MaxIdx < MinVLMAX)
10096 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10097 else if (MaxIdx < MinVLMAX * 2)
10098 SmallerVT =
10100 else if (MaxIdx < MinVLMAX * 4)
10101 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10104 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10105 return std::nullopt;
10106 return SmallerVT;
10107}
10108
10110 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10111 if (!IdxC || isNullConstant(Idx))
10112 return false;
10113 return isUInt<5>(IdxC->getZExtValue());
10114}
10115
10116// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10117// first position of a vector, and that vector is slid up to the insert index.
10118// By limiting the active vector length to index+1 and merging with the
10119// original vector (with an undisturbed tail policy for elements >= VL), we
10120// achieve the desired result of leaving all elements untouched except the one
10121// at VL-1, which is replaced with the desired value.
10122SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10123 SelectionDAG &DAG) const {
10124 SDLoc DL(Op);
10125 MVT VecVT = Op.getSimpleValueType();
10126 MVT XLenVT = Subtarget.getXLenVT();
10127 SDValue Vec = Op.getOperand(0);
10128 SDValue Val = Op.getOperand(1);
10129 MVT ValVT = Val.getSimpleValueType();
10130 SDValue Idx = Op.getOperand(2);
10131
10132 if (VecVT.getVectorElementType() == MVT::i1) {
10133 // FIXME: For now we just promote to an i8 vector and insert into that,
10134 // but this is probably not optimal.
10135 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10136 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10137 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10138 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10139 }
10140
10141 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10142 ValVT == MVT::bf16) {
10143 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10144 MVT IntVT = VecVT.changeTypeToInteger();
10145 SDValue IntInsert = DAG.getNode(
10146 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10147 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10148 return DAG.getBitcast(VecVT, IntInsert);
10149 }
10150
10151 MVT ContainerVT = VecVT;
10152 // If the operand is a fixed-length vector, convert to a scalable one.
10153 if (VecVT.isFixedLengthVector()) {
10154 ContainerVT = getContainerForFixedLengthVector(VecVT);
10155 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10156 }
10157
10158 // If we know the index we're going to insert at, we can shrink Vec so that
10159 // we're performing the scalar inserts and slideup on a smaller LMUL.
10160 SDValue OrigVec = Vec;
10161 std::optional<unsigned> AlignedIdx;
10162 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10163 const unsigned OrigIdx = IdxC->getZExtValue();
10164 // Do we know an upper bound on LMUL?
10165 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10166 DL, DAG, Subtarget)) {
10167 ContainerVT = *ShrunkVT;
10168 AlignedIdx = 0;
10169 }
10170
10171 // If we're compiling for an exact VLEN value, we can always perform
10172 // the insert in m1 as we can determine the register corresponding to
10173 // the index in the register group.
10174 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10175 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10176 EVT ElemVT = VecVT.getVectorElementType();
10177 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10178 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10179 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10180 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10181 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10182 ContainerVT = M1VT;
10183 }
10184
10185 if (AlignedIdx)
10186 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10187 }
10188
10189 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10190 // Even i64-element vectors on RV32 can be lowered without scalar
10191 // legalization if the most-significant 32 bits of the value are not affected
10192 // by the sign-extension of the lower 32 bits.
10193 // TODO: We could also catch sign extensions of a 32-bit value.
10194 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10195 const auto *CVal = cast<ConstantSDNode>(Val);
10196 if (isInt<32>(CVal->getSExtValue())) {
10197 IsLegalInsert = true;
10198 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10199 }
10200 }
10201
10202 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10203
10204 SDValue ValInVec;
10205
10206 if (IsLegalInsert) {
10207 unsigned Opc =
10208 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10209 if (isNullConstant(Idx)) {
10210 if (!VecVT.isFloatingPoint())
10211 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10212 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10213
10214 if (AlignedIdx)
10215 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10216 if (!VecVT.isFixedLengthVector())
10217 return Vec;
10218 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10219 }
10220
10221 // Use ri.vinsert.v.x if available.
10222 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10224 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10225 SDValue PolicyOp =
10227 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10228 VL, PolicyOp);
10229 if (AlignedIdx)
10230 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10231 if (!VecVT.isFixedLengthVector())
10232 return Vec;
10233 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10234 }
10235
10236 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10237 } else {
10238 // On RV32, i64-element vectors must be specially handled to place the
10239 // value at element 0, by using two vslide1down instructions in sequence on
10240 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10241 // this.
10242 SDValue ValLo, ValHi;
10243 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10244 MVT I32ContainerVT =
10245 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10246 SDValue I32Mask =
10247 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10248 // Limit the active VL to two.
10249 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10250 // If the Idx is 0 we can insert directly into the vector.
10251 if (isNullConstant(Idx)) {
10252 // First slide in the lo value, then the hi in above it. We use slide1down
10253 // to avoid the register group overlap constraint of vslide1up.
10254 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10255 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10256 // If the source vector is undef don't pass along the tail elements from
10257 // the previous slide1down.
10258 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10259 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10260 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10261 // Bitcast back to the right container type.
10262 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10263
10264 if (AlignedIdx)
10265 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10266 if (!VecVT.isFixedLengthVector())
10267 return ValInVec;
10268 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10269 }
10270
10271 // First slide in the lo value, then the hi in above it. We use slide1down
10272 // to avoid the register group overlap constraint of vslide1up.
10273 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10274 DAG.getUNDEF(I32ContainerVT),
10275 DAG.getUNDEF(I32ContainerVT), ValLo,
10276 I32Mask, InsertI64VL);
10277 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10278 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10279 I32Mask, InsertI64VL);
10280 // Bitcast back to the right container type.
10281 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10282 }
10283
10284 // Now that the value is in a vector, slide it into position.
10285 SDValue InsertVL =
10286 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10287
10288 // Use tail agnostic policy if Idx is the last index of Vec.
10290 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10291 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10293 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10294 Idx, Mask, InsertVL, Policy);
10295
10296 if (AlignedIdx)
10297 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10298 if (!VecVT.isFixedLengthVector())
10299 return Slideup;
10300 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10301}
10302
10303// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10304// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10305// types this is done using VMV_X_S to allow us to glean information about the
10306// sign bits of the result.
10307SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10308 SelectionDAG &DAG) const {
10309 SDLoc DL(Op);
10310 SDValue Idx = Op.getOperand(1);
10311 SDValue Vec = Op.getOperand(0);
10312 EVT EltVT = Op.getValueType();
10313 MVT VecVT = Vec.getSimpleValueType();
10314 MVT XLenVT = Subtarget.getXLenVT();
10315
10316 if (VecVT.getVectorElementType() == MVT::i1) {
10317 // Use vfirst.m to extract the first bit.
10318 if (isNullConstant(Idx)) {
10319 MVT ContainerVT = VecVT;
10320 if (VecVT.isFixedLengthVector()) {
10321 ContainerVT = getContainerForFixedLengthVector(VecVT);
10322 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10323 }
10324 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10325 SDValue Vfirst =
10326 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10327 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10328 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10329 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10330 }
10331 if (VecVT.isFixedLengthVector()) {
10332 unsigned NumElts = VecVT.getVectorNumElements();
10333 if (NumElts >= 8) {
10334 MVT WideEltVT;
10335 unsigned WidenVecLen;
10336 SDValue ExtractElementIdx;
10337 SDValue ExtractBitIdx;
10338 unsigned MaxEEW = Subtarget.getELen();
10339 MVT LargestEltVT = MVT::getIntegerVT(
10340 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10341 if (NumElts <= LargestEltVT.getSizeInBits()) {
10342 assert(isPowerOf2_32(NumElts) &&
10343 "the number of elements should be power of 2");
10344 WideEltVT = MVT::getIntegerVT(NumElts);
10345 WidenVecLen = 1;
10346 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10347 ExtractBitIdx = Idx;
10348 } else {
10349 WideEltVT = LargestEltVT;
10350 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10351 // extract element index = index / element width
10352 ExtractElementIdx = DAG.getNode(
10353 ISD::SRL, DL, XLenVT, Idx,
10354 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10355 // mask bit index = index % element width
10356 ExtractBitIdx = DAG.getNode(
10357 ISD::AND, DL, XLenVT, Idx,
10358 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10359 }
10360 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10361 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10362 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10363 Vec, ExtractElementIdx);
10364 // Extract the bit from GPR.
10365 SDValue ShiftRight =
10366 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10367 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10368 DAG.getConstant(1, DL, XLenVT));
10369 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10370 }
10371 }
10372 // Otherwise, promote to an i8 vector and extract from that.
10373 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10374 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10375 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10376 }
10377
10378 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10379 EltVT == MVT::bf16) {
10380 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10381 MVT IntVT = VecVT.changeTypeToInteger();
10382 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10383 SDValue IntExtract =
10384 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10385 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10386 }
10387
10388 // If this is a fixed vector, we need to convert it to a scalable vector.
10389 MVT ContainerVT = VecVT;
10390 if (VecVT.isFixedLengthVector()) {
10391 ContainerVT = getContainerForFixedLengthVector(VecVT);
10392 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10393 }
10394
10395 // If we're compiling for an exact VLEN value and we have a known
10396 // constant index, we can always perform the extract in m1 (or
10397 // smaller) as we can determine the register corresponding to
10398 // the index in the register group.
10399 const auto VLen = Subtarget.getRealVLen();
10400 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10401 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10402 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10403 unsigned OrigIdx = IdxC->getZExtValue();
10404 EVT ElemVT = VecVT.getVectorElementType();
10405 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10406 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10407 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10408 unsigned ExtractIdx =
10409 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10410 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10411 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10412 ContainerVT = M1VT;
10413 }
10414
10415 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10416 // contains our index.
10417 std::optional<uint64_t> MaxIdx;
10418 if (VecVT.isFixedLengthVector())
10419 MaxIdx = VecVT.getVectorNumElements() - 1;
10420 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10421 MaxIdx = IdxC->getZExtValue();
10422 if (MaxIdx) {
10423 if (auto SmallerVT =
10424 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10425 ContainerVT = *SmallerVT;
10426 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10427 }
10428 }
10429
10430 // Use ri.vextract.x.v if available.
10431 // TODO: Avoid index 0 and just use the vmv.x.s
10432 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10434 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10435 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10436 }
10437
10438 // If after narrowing, the required slide is still greater than LMUL2,
10439 // fallback to generic expansion and go through the stack. This is done
10440 // for a subtle reason: extracting *all* elements out of a vector is
10441 // widely expected to be linear in vector size, but because vslidedown
10442 // is linear in LMUL, performing N extracts using vslidedown becomes
10443 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10444 // seems to have the same problem (the store is linear in LMUL), but the
10445 // generic expansion *memoizes* the store, and thus for many extracts of
10446 // the same vector we end up with one store and a bunch of loads.
10447 // TODO: We don't have the same code for insert_vector_elt because we
10448 // have BUILD_VECTOR and handle the degenerate case there. Should we
10449 // consider adding an inverse BUILD_VECTOR node?
10450 MVT LMUL2VT =
10452 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10453 return SDValue();
10454
10455 // If the index is 0, the vector is already in the right position.
10456 if (!isNullConstant(Idx)) {
10457 // Use a VL of 1 to avoid processing more elements than we need.
10458 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10459 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10460 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10461 }
10462
10463 if (!EltVT.isInteger()) {
10464 // Floating-point extracts are handled in TableGen.
10465 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10466 }
10467
10468 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10469 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10470}
10471
10472// Some RVV intrinsics may claim that they want an integer operand to be
10473// promoted or expanded.
10475 const RISCVSubtarget &Subtarget) {
10476 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10477 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10478 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10479 "Unexpected opcode");
10480
10481 if (!Subtarget.hasVInstructions())
10482 return SDValue();
10483
10484 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10485 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10486 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10487
10488 SDLoc DL(Op);
10489
10491 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10492 if (!II || !II->hasScalarOperand())
10493 return SDValue();
10494
10495 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10496 assert(SplatOp < Op.getNumOperands());
10497
10499 SDValue &ScalarOp = Operands[SplatOp];
10500 MVT OpVT = ScalarOp.getSimpleValueType();
10501 MVT XLenVT = Subtarget.getXLenVT();
10502
10503 // If this isn't a scalar, or its type is XLenVT we're done.
10504 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10505 return SDValue();
10506
10507 // Simplest case is that the operand needs to be promoted to XLenVT.
10508 if (OpVT.bitsLT(XLenVT)) {
10509 // If the operand is a constant, sign extend to increase our chances
10510 // of being able to use a .vi instruction. ANY_EXTEND would become a
10511 // a zero extend and the simm5 check in isel would fail.
10512 // FIXME: Should we ignore the upper bits in isel instead?
10513 unsigned ExtOpc =
10515 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10516 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10517 }
10518
10519 // Use the previous operand to get the vXi64 VT. The result might be a mask
10520 // VT for compares. Using the previous operand assumes that the previous
10521 // operand will never have a smaller element size than a scalar operand and
10522 // that a widening operation never uses SEW=64.
10523 // NOTE: If this fails the below assert, we can probably just find the
10524 // element count from any operand or result and use it to construct the VT.
10525 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10526 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10527
10528 // The more complex case is when the scalar is larger than XLenVT.
10529 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10530 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10531
10532 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10533 // instruction to sign-extend since SEW>XLEN.
10534 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10535 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10536 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10537 }
10538
10539 switch (IntNo) {
10540 case Intrinsic::riscv_vslide1up:
10541 case Intrinsic::riscv_vslide1down:
10542 case Intrinsic::riscv_vslide1up_mask:
10543 case Intrinsic::riscv_vslide1down_mask: {
10544 // We need to special case these when the scalar is larger than XLen.
10545 unsigned NumOps = Op.getNumOperands();
10546 bool IsMasked = NumOps == 7;
10547
10548 // Convert the vector source to the equivalent nxvXi32 vector.
10549 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10550 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10551 SDValue ScalarLo, ScalarHi;
10552 std::tie(ScalarLo, ScalarHi) =
10553 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10554
10555 // Double the VL since we halved SEW.
10556 SDValue AVL = getVLOperand(Op);
10557 SDValue I32VL;
10558
10559 // Optimize for constant AVL
10560 if (isa<ConstantSDNode>(AVL)) {
10561 const auto [MinVLMAX, MaxVLMAX] =
10563
10564 uint64_t AVLInt = AVL->getAsZExtVal();
10565 if (AVLInt <= MinVLMAX) {
10566 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10567 } else if (AVLInt >= 2 * MaxVLMAX) {
10568 // Just set vl to VLMAX in this situation
10569 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10570 } else {
10571 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10572 // is related to the hardware implementation.
10573 // So let the following code handle
10574 }
10575 }
10576 if (!I32VL) {
10578 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10579 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10580 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10581 SDValue SETVL =
10582 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10583 // Using vsetvli instruction to get actually used length which related to
10584 // the hardware implementation
10585 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10586 SEW, LMUL);
10587 I32VL =
10588 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10589 }
10590
10591 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10592
10593 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10594 // instructions.
10595 SDValue Passthru;
10596 if (IsMasked)
10597 Passthru = DAG.getUNDEF(I32VT);
10598 else
10599 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10600
10601 if (IntNo == Intrinsic::riscv_vslide1up ||
10602 IntNo == Intrinsic::riscv_vslide1up_mask) {
10603 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10604 ScalarHi, I32Mask, I32VL);
10605 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10606 ScalarLo, I32Mask, I32VL);
10607 } else {
10608 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10609 ScalarLo, I32Mask, I32VL);
10610 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10611 ScalarHi, I32Mask, I32VL);
10612 }
10613
10614 // Convert back to nxvXi64.
10615 Vec = DAG.getBitcast(VT, Vec);
10616
10617 if (!IsMasked)
10618 return Vec;
10619 // Apply mask after the operation.
10620 SDValue Mask = Operands[NumOps - 3];
10621 SDValue MaskedOff = Operands[1];
10622 // Assume Policy operand is the last operand.
10623 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10624 // We don't need to select maskedoff if it's undef.
10625 if (MaskedOff.isUndef())
10626 return Vec;
10627 // TAMU
10628 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10629 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10630 DAG.getUNDEF(VT), AVL);
10631 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10632 // It's fine because vmerge does not care mask policy.
10633 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10634 MaskedOff, AVL);
10635 }
10636 }
10637
10638 // We need to convert the scalar to a splat vector.
10639 SDValue VL = getVLOperand(Op);
10640 assert(VL.getValueType() == XLenVT);
10641 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10642 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10643}
10644
10645// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10646// scalable vector llvm.get.vector.length for now.
10647//
10648// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10649// (vscale * VF). The vscale and VF are independent of element width. We use
10650// SEW=8 for the vsetvli because it is the only element width that supports all
10651// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10652// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10653// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10654// SEW and LMUL are better for the surrounding vector instructions.
10656 const RISCVSubtarget &Subtarget) {
10657 MVT XLenVT = Subtarget.getXLenVT();
10658
10659 // The smallest LMUL is only valid for the smallest element width.
10660 const unsigned ElementWidth = 8;
10661
10662 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10663 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10664 // We don't support VF==1 with ELEN==32.
10665 [[maybe_unused]] unsigned MinVF =
10666 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10667
10668 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10669 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10670 "Unexpected VF");
10671
10672 bool Fractional = VF < LMul1VF;
10673 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10674 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10675 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10676
10677 SDLoc DL(N);
10678
10679 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10680 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10681
10682 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10683
10684 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10685 SDValue Res =
10686 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10687 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10688}
10689
10691 const RISCVSubtarget &Subtarget) {
10692 SDValue Op0 = N->getOperand(1);
10693 MVT OpVT = Op0.getSimpleValueType();
10694 MVT ContainerVT = OpVT;
10695 if (OpVT.isFixedLengthVector()) {
10696 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10697 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10698 }
10699 MVT XLenVT = Subtarget.getXLenVT();
10700 SDLoc DL(N);
10701 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10702 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10703 if (isOneConstant(N->getOperand(2)))
10704 return Res;
10705
10706 // Convert -1 to VL.
10707 SDValue Setcc =
10708 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10709 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10710 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10711}
10712
10713static inline void promoteVCIXScalar(const SDValue &Op,
10715 SelectionDAG &DAG) {
10716 const RISCVSubtarget &Subtarget =
10718
10719 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10720 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10721 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10722 SDLoc DL(Op);
10723
10725 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10726 if (!II || !II->hasScalarOperand())
10727 return;
10728
10729 unsigned SplatOp = II->ScalarOperand + 1;
10730 assert(SplatOp < Op.getNumOperands());
10731
10732 SDValue &ScalarOp = Operands[SplatOp];
10733 MVT OpVT = ScalarOp.getSimpleValueType();
10734 MVT XLenVT = Subtarget.getXLenVT();
10735
10736 // The code below is partially copied from lowerVectorIntrinsicScalars.
10737 // If this isn't a scalar, or its type is XLenVT we're done.
10738 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10739 return;
10740
10741 // Manually emit promote operation for scalar operation.
10742 if (OpVT.bitsLT(XLenVT)) {
10743 unsigned ExtOpc =
10745 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10746 }
10747}
10748
10749static void processVCIXOperands(SDValue &OrigOp,
10751 SelectionDAG &DAG) {
10752 promoteVCIXScalar(OrigOp, Operands, DAG);
10753 const RISCVSubtarget &Subtarget =
10755 for (SDValue &V : Operands) {
10756 EVT ValType = V.getValueType();
10757 if (ValType.isVector() && ValType.isFloatingPoint()) {
10758 MVT InterimIVT =
10759 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10760 ValType.getVectorElementCount());
10761 V = DAG.getBitcast(InterimIVT, V);
10762 }
10763 if (ValType.isFixedLengthVector()) {
10764 MVT OpContainerVT = getContainerForFixedLengthVector(
10765 DAG, V.getSimpleValueType(), Subtarget);
10766 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10767 }
10768 }
10769}
10770
10771// LMUL * VLEN should be greater than or equal to EGS * SEW
10772static inline bool isValidEGW(int EGS, EVT VT,
10773 const RISCVSubtarget &Subtarget) {
10774 return (Subtarget.getRealMinVLen() *
10776 EGS * VT.getScalarSizeInBits();
10777}
10778
10779SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10780 SelectionDAG &DAG) const {
10781 unsigned IntNo = Op.getConstantOperandVal(0);
10782 SDLoc DL(Op);
10783 MVT XLenVT = Subtarget.getXLenVT();
10784
10785 switch (IntNo) {
10786 default:
10787 break; // Don't custom lower most intrinsics.
10788 case Intrinsic::riscv_tuple_insert: {
10789 SDValue Vec = Op.getOperand(1);
10790 SDValue SubVec = Op.getOperand(2);
10791 SDValue Index = Op.getOperand(3);
10792
10793 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10794 SubVec, Index);
10795 }
10796 case Intrinsic::riscv_tuple_extract: {
10797 SDValue Vec = Op.getOperand(1);
10798 SDValue Index = Op.getOperand(2);
10799
10800 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10801 Index);
10802 }
10803 case Intrinsic::thread_pointer: {
10804 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10805 return DAG.getRegister(RISCV::X4, PtrVT);
10806 }
10807 case Intrinsic::riscv_orc_b:
10808 case Intrinsic::riscv_brev8:
10809 case Intrinsic::riscv_sha256sig0:
10810 case Intrinsic::riscv_sha256sig1:
10811 case Intrinsic::riscv_sha256sum0:
10812 case Intrinsic::riscv_sha256sum1:
10813 case Intrinsic::riscv_sm3p0:
10814 case Intrinsic::riscv_sm3p1: {
10815 unsigned Opc;
10816 switch (IntNo) {
10817 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10818 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10819 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10820 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10821 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10822 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10823 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10824 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10825 }
10826
10827 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10828 }
10829 case Intrinsic::riscv_sm4ks:
10830 case Intrinsic::riscv_sm4ed: {
10831 unsigned Opc =
10832 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10833
10834 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10835 Op.getOperand(3));
10836 }
10837 case Intrinsic::riscv_zip:
10838 case Intrinsic::riscv_unzip: {
10839 unsigned Opc =
10840 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10841 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10842 }
10843 case Intrinsic::riscv_mopr:
10844 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10845 Op.getOperand(2));
10846
10847 case Intrinsic::riscv_moprr: {
10848 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10849 Op.getOperand(2), Op.getOperand(3));
10850 }
10851 case Intrinsic::riscv_clmul:
10852 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10853 Op.getOperand(2));
10854 case Intrinsic::riscv_clmulh:
10855 case Intrinsic::riscv_clmulr: {
10856 unsigned Opc =
10857 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10859 }
10860 case Intrinsic::experimental_get_vector_length:
10861 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10862 case Intrinsic::experimental_cttz_elts:
10863 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10864 case Intrinsic::riscv_vmv_x_s: {
10865 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10866 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10867 }
10868 case Intrinsic::riscv_vfmv_f_s:
10869 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10870 case Intrinsic::riscv_vmv_v_x:
10871 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10872 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10873 Subtarget);
10874 case Intrinsic::riscv_vfmv_v_f:
10875 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10876 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10877 case Intrinsic::riscv_vmv_s_x: {
10878 SDValue Scalar = Op.getOperand(2);
10879
10880 if (Scalar.getValueType().bitsLE(XLenVT)) {
10881 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10882 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10883 Op.getOperand(1), Scalar, Op.getOperand(3));
10884 }
10885
10886 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10887
10888 // This is an i64 value that lives in two scalar registers. We have to
10889 // insert this in a convoluted way. First we build vXi64 splat containing
10890 // the two values that we assemble using some bit math. Next we'll use
10891 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10892 // to merge element 0 from our splat into the source vector.
10893 // FIXME: This is probably not the best way to do this, but it is
10894 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10895 // point.
10896 // sw lo, (a0)
10897 // sw hi, 4(a0)
10898 // vlse vX, (a0)
10899 //
10900 // vid.v vVid
10901 // vmseq.vx mMask, vVid, 0
10902 // vmerge.vvm vDest, vSrc, vVal, mMask
10903 MVT VT = Op.getSimpleValueType();
10904 SDValue Vec = Op.getOperand(1);
10905 SDValue VL = getVLOperand(Op);
10906
10907 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10908 if (Op.getOperand(1).isUndef())
10909 return SplattedVal;
10910 SDValue SplattedIdx =
10911 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10912 DAG.getConstant(0, DL, MVT::i32), VL);
10913
10914 MVT MaskVT = getMaskTypeFor(VT);
10915 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10916 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10917 SDValue SelectCond =
10918 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10919 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10920 DAG.getUNDEF(MaskVT), Mask, VL});
10921 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10922 Vec, DAG.getUNDEF(VT), VL);
10923 }
10924 case Intrinsic::riscv_vfmv_s_f:
10925 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10926 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10927 // EGS * EEW >= 128 bits
10928 case Intrinsic::riscv_vaesdf_vv:
10929 case Intrinsic::riscv_vaesdf_vs:
10930 case Intrinsic::riscv_vaesdm_vv:
10931 case Intrinsic::riscv_vaesdm_vs:
10932 case Intrinsic::riscv_vaesef_vv:
10933 case Intrinsic::riscv_vaesef_vs:
10934 case Intrinsic::riscv_vaesem_vv:
10935 case Intrinsic::riscv_vaesem_vs:
10936 case Intrinsic::riscv_vaeskf1:
10937 case Intrinsic::riscv_vaeskf2:
10938 case Intrinsic::riscv_vaesz_vs:
10939 case Intrinsic::riscv_vsm4k:
10940 case Intrinsic::riscv_vsm4r_vv:
10941 case Intrinsic::riscv_vsm4r_vs: {
10942 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10943 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10944 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10945 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10946 return Op;
10947 }
10948 // EGS * EEW >= 256 bits
10949 case Intrinsic::riscv_vsm3c:
10950 case Intrinsic::riscv_vsm3me: {
10951 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10952 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10953 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10954 return Op;
10955 }
10956 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10957 case Intrinsic::riscv_vsha2ch:
10958 case Intrinsic::riscv_vsha2cl:
10959 case Intrinsic::riscv_vsha2ms: {
10960 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10961 !Subtarget.hasStdExtZvknhb())
10962 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10963 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10964 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10965 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10966 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10967 return Op;
10968 }
10969 case Intrinsic::riscv_sf_vc_v_x:
10970 case Intrinsic::riscv_sf_vc_v_i:
10971 case Intrinsic::riscv_sf_vc_v_xv:
10972 case Intrinsic::riscv_sf_vc_v_iv:
10973 case Intrinsic::riscv_sf_vc_v_vv:
10974 case Intrinsic::riscv_sf_vc_v_fv:
10975 case Intrinsic::riscv_sf_vc_v_xvv:
10976 case Intrinsic::riscv_sf_vc_v_ivv:
10977 case Intrinsic::riscv_sf_vc_v_vvv:
10978 case Intrinsic::riscv_sf_vc_v_fvv:
10979 case Intrinsic::riscv_sf_vc_v_xvw:
10980 case Intrinsic::riscv_sf_vc_v_ivw:
10981 case Intrinsic::riscv_sf_vc_v_vvw:
10982 case Intrinsic::riscv_sf_vc_v_fvw: {
10983 MVT VT = Op.getSimpleValueType();
10984
10985 SmallVector<SDValue> Operands{Op->op_values()};
10987
10988 MVT RetVT = VT;
10989 if (VT.isFixedLengthVector())
10991 else if (VT.isFloatingPoint())
10994
10995 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
10996
10997 if (VT.isFixedLengthVector())
10998 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
10999 else if (VT.isFloatingPoint())
11000 NewNode = DAG.getBitcast(VT, NewNode);
11001
11002 if (Op == NewNode)
11003 break;
11004
11005 return NewNode;
11006 }
11007 }
11008
11009 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11010}
11011
11013 unsigned Type) {
11014 SDLoc DL(Op);
11015 SmallVector<SDValue> Operands{Op->op_values()};
11016 Operands.erase(Operands.begin() + 1);
11017
11018 const RISCVSubtarget &Subtarget =
11020 MVT VT = Op.getSimpleValueType();
11021 MVT RetVT = VT;
11022 MVT FloatVT = VT;
11023
11024 if (VT.isFloatingPoint()) {
11025 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11026 VT.getVectorElementCount());
11027 FloatVT = RetVT;
11028 }
11029 if (VT.isFixedLengthVector())
11031 Subtarget);
11032
11034
11035 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11036 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11037 SDValue Chain = NewNode.getValue(1);
11038
11039 if (VT.isFixedLengthVector())
11040 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11041 if (VT.isFloatingPoint())
11042 NewNode = DAG.getBitcast(VT, NewNode);
11043
11044 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11045
11046 return NewNode;
11047}
11048
11050 unsigned Type) {
11051 SmallVector<SDValue> Operands{Op->op_values()};
11052 Operands.erase(Operands.begin() + 1);
11054
11055 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11056}
11057
11058static SDValue
11060 const RISCVSubtarget &Subtarget,
11061 SelectionDAG &DAG) {
11062 bool IsStrided;
11063 switch (IntNo) {
11064 case Intrinsic::riscv_seg2_load_mask:
11065 case Intrinsic::riscv_seg3_load_mask:
11066 case Intrinsic::riscv_seg4_load_mask:
11067 case Intrinsic::riscv_seg5_load_mask:
11068 case Intrinsic::riscv_seg6_load_mask:
11069 case Intrinsic::riscv_seg7_load_mask:
11070 case Intrinsic::riscv_seg8_load_mask:
11071 IsStrided = false;
11072 break;
11073 case Intrinsic::riscv_sseg2_load_mask:
11074 case Intrinsic::riscv_sseg3_load_mask:
11075 case Intrinsic::riscv_sseg4_load_mask:
11076 case Intrinsic::riscv_sseg5_load_mask:
11077 case Intrinsic::riscv_sseg6_load_mask:
11078 case Intrinsic::riscv_sseg7_load_mask:
11079 case Intrinsic::riscv_sseg8_load_mask:
11080 IsStrided = true;
11081 break;
11082 default:
11083 llvm_unreachable("unexpected intrinsic ID");
11084 };
11085
11086 static const Intrinsic::ID VlsegInts[7] = {
11087 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11088 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11089 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11090 Intrinsic::riscv_vlseg8_mask};
11091 static const Intrinsic::ID VlssegInts[7] = {
11092 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11093 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11094 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11095 Intrinsic::riscv_vlsseg8_mask};
11096
11097 SDLoc DL(Op);
11098 unsigned NF = Op->getNumValues() - 1;
11099 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11100 MVT XLenVT = Subtarget.getXLenVT();
11101 MVT VT = Op->getSimpleValueType(0);
11102 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11103 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11104 ContainerVT.getScalarSizeInBits();
11105 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11106
11107 // Operands: (chain, int_id, pointer, mask, vl) or
11108 // (chain, int_id, pointer, offset, mask, vl)
11109 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11110 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11111 MVT MaskVT = Mask.getSimpleValueType();
11112 MVT MaskContainerVT =
11113 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11114 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11115
11116 SDValue IntID = DAG.getTargetConstant(
11117 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11118 auto *Load = cast<MemIntrinsicSDNode>(Op);
11119
11120 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11122 Load->getChain(),
11123 IntID,
11124 DAG.getUNDEF(VecTupTy),
11125 Op.getOperand(2),
11126 Mask,
11127 VL,
11130 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11131 // Insert the stride operand.
11132 if (IsStrided)
11133 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11134
11135 SDValue Result =
11137 Load->getMemoryVT(), Load->getMemOperand());
11139 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11140 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11141 Result.getValue(0),
11142 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11143 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11144 }
11145 Results.push_back(Result.getValue(1));
11146 return DAG.getMergeValues(Results, DL);
11147}
11148
11149SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11150 SelectionDAG &DAG) const {
11151 unsigned IntNo = Op.getConstantOperandVal(1);
11152 switch (IntNo) {
11153 default:
11154 break;
11155 case Intrinsic::riscv_seg2_load_mask:
11156 case Intrinsic::riscv_seg3_load_mask:
11157 case Intrinsic::riscv_seg4_load_mask:
11158 case Intrinsic::riscv_seg5_load_mask:
11159 case Intrinsic::riscv_seg6_load_mask:
11160 case Intrinsic::riscv_seg7_load_mask:
11161 case Intrinsic::riscv_seg8_load_mask:
11162 case Intrinsic::riscv_sseg2_load_mask:
11163 case Intrinsic::riscv_sseg3_load_mask:
11164 case Intrinsic::riscv_sseg4_load_mask:
11165 case Intrinsic::riscv_sseg5_load_mask:
11166 case Intrinsic::riscv_sseg6_load_mask:
11167 case Intrinsic::riscv_sseg7_load_mask:
11168 case Intrinsic::riscv_sseg8_load_mask:
11169 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11170
11171 case Intrinsic::riscv_sf_vc_v_x_se:
11172 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11173 case Intrinsic::riscv_sf_vc_v_i_se:
11174 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11175 case Intrinsic::riscv_sf_vc_v_xv_se:
11176 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11177 case Intrinsic::riscv_sf_vc_v_iv_se:
11178 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11179 case Intrinsic::riscv_sf_vc_v_vv_se:
11180 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11181 case Intrinsic::riscv_sf_vc_v_fv_se:
11182 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11183 case Intrinsic::riscv_sf_vc_v_xvv_se:
11184 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11185 case Intrinsic::riscv_sf_vc_v_ivv_se:
11186 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11187 case Intrinsic::riscv_sf_vc_v_vvv_se:
11188 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11189 case Intrinsic::riscv_sf_vc_v_fvv_se:
11190 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11191 case Intrinsic::riscv_sf_vc_v_xvw_se:
11192 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11193 case Intrinsic::riscv_sf_vc_v_ivw_se:
11194 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11195 case Intrinsic::riscv_sf_vc_v_vvw_se:
11196 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11197 case Intrinsic::riscv_sf_vc_v_fvw_se:
11198 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11199 }
11200
11201 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11202}
11203
11204static SDValue
11206 const RISCVSubtarget &Subtarget,
11207 SelectionDAG &DAG) {
11208 bool IsStrided;
11209 switch (IntNo) {
11210 case Intrinsic::riscv_seg2_store_mask:
11211 case Intrinsic::riscv_seg3_store_mask:
11212 case Intrinsic::riscv_seg4_store_mask:
11213 case Intrinsic::riscv_seg5_store_mask:
11214 case Intrinsic::riscv_seg6_store_mask:
11215 case Intrinsic::riscv_seg7_store_mask:
11216 case Intrinsic::riscv_seg8_store_mask:
11217 IsStrided = false;
11218 break;
11219 case Intrinsic::riscv_sseg2_store_mask:
11220 case Intrinsic::riscv_sseg3_store_mask:
11221 case Intrinsic::riscv_sseg4_store_mask:
11222 case Intrinsic::riscv_sseg5_store_mask:
11223 case Intrinsic::riscv_sseg6_store_mask:
11224 case Intrinsic::riscv_sseg7_store_mask:
11225 case Intrinsic::riscv_sseg8_store_mask:
11226 IsStrided = true;
11227 break;
11228 default:
11229 llvm_unreachable("unexpected intrinsic ID");
11230 }
11231
11232 SDLoc DL(Op);
11233 static const Intrinsic::ID VssegInts[] = {
11234 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11235 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11236 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11237 Intrinsic::riscv_vsseg8_mask};
11238 static const Intrinsic::ID VsssegInts[] = {
11239 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11240 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11241 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11242 Intrinsic::riscv_vssseg8_mask};
11243
11244 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11245 // (chain, int_id, vec*, ptr, stride, mask, vl)
11246 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11247 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11248 MVT XLenVT = Subtarget.getXLenVT();
11249 MVT VT = Op->getOperand(2).getSimpleValueType();
11250 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11251 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11252 ContainerVT.getScalarSizeInBits();
11253 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11254
11255 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11256 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11257 MVT MaskVT = Mask.getSimpleValueType();
11258 MVT MaskContainerVT =
11259 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11260 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11261
11262 SDValue IntID = DAG.getTargetConstant(
11263 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11264 SDValue Ptr = Op->getOperand(NF + 2);
11265
11266 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11267
11268 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11269 for (unsigned i = 0; i < NF; i++)
11270 StoredVal = DAG.getNode(
11271 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11272 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11273 DAG, Subtarget),
11274 DAG.getTargetConstant(i, DL, MVT::i32));
11275
11277 FixedIntrinsic->getChain(),
11278 IntID,
11279 StoredVal,
11280 Ptr,
11281 Mask,
11282 VL,
11283 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11284 // Insert the stride operand.
11285 if (IsStrided)
11286 Ops.insert(std::next(Ops.begin(), 4),
11287 Op.getOperand(Op.getNumOperands() - 3));
11288
11289 return DAG.getMemIntrinsicNode(
11290 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11291 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11292}
11293
11294SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11295 SelectionDAG &DAG) const {
11296 unsigned IntNo = Op.getConstantOperandVal(1);
11297 switch (IntNo) {
11298 default:
11299 break;
11300 case Intrinsic::riscv_seg2_store_mask:
11301 case Intrinsic::riscv_seg3_store_mask:
11302 case Intrinsic::riscv_seg4_store_mask:
11303 case Intrinsic::riscv_seg5_store_mask:
11304 case Intrinsic::riscv_seg6_store_mask:
11305 case Intrinsic::riscv_seg7_store_mask:
11306 case Intrinsic::riscv_seg8_store_mask:
11307 case Intrinsic::riscv_sseg2_store_mask:
11308 case Intrinsic::riscv_sseg3_store_mask:
11309 case Intrinsic::riscv_sseg4_store_mask:
11310 case Intrinsic::riscv_sseg5_store_mask:
11311 case Intrinsic::riscv_sseg6_store_mask:
11312 case Intrinsic::riscv_sseg7_store_mask:
11313 case Intrinsic::riscv_sseg8_store_mask:
11314 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11315
11316 case Intrinsic::riscv_sf_vc_xv_se:
11317 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11318 case Intrinsic::riscv_sf_vc_iv_se:
11319 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11320 case Intrinsic::riscv_sf_vc_vv_se:
11321 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11322 case Intrinsic::riscv_sf_vc_fv_se:
11323 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11324 case Intrinsic::riscv_sf_vc_xvv_se:
11325 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11326 case Intrinsic::riscv_sf_vc_ivv_se:
11327 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11328 case Intrinsic::riscv_sf_vc_vvv_se:
11329 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11330 case Intrinsic::riscv_sf_vc_fvv_se:
11331 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11332 case Intrinsic::riscv_sf_vc_xvw_se:
11333 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11334 case Intrinsic::riscv_sf_vc_ivw_se:
11335 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11336 case Intrinsic::riscv_sf_vc_vvw_se:
11337 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11338 case Intrinsic::riscv_sf_vc_fvw_se:
11339 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11340 }
11341
11342 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11343}
11344
11345static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11346 switch (ISDOpcode) {
11347 default:
11348 llvm_unreachable("Unhandled reduction");
11349 case ISD::VP_REDUCE_ADD:
11350 case ISD::VECREDUCE_ADD:
11351 return RISCVISD::VECREDUCE_ADD_VL;
11352 case ISD::VP_REDUCE_UMAX:
11353 case ISD::VECREDUCE_UMAX:
11354 return RISCVISD::VECREDUCE_UMAX_VL;
11355 case ISD::VP_REDUCE_SMAX:
11356 case ISD::VECREDUCE_SMAX:
11357 return RISCVISD::VECREDUCE_SMAX_VL;
11358 case ISD::VP_REDUCE_UMIN:
11359 case ISD::VECREDUCE_UMIN:
11360 return RISCVISD::VECREDUCE_UMIN_VL;
11361 case ISD::VP_REDUCE_SMIN:
11362 case ISD::VECREDUCE_SMIN:
11363 return RISCVISD::VECREDUCE_SMIN_VL;
11364 case ISD::VP_REDUCE_AND:
11365 case ISD::VECREDUCE_AND:
11366 return RISCVISD::VECREDUCE_AND_VL;
11367 case ISD::VP_REDUCE_OR:
11368 case ISD::VECREDUCE_OR:
11369 return RISCVISD::VECREDUCE_OR_VL;
11370 case ISD::VP_REDUCE_XOR:
11371 case ISD::VECREDUCE_XOR:
11372 return RISCVISD::VECREDUCE_XOR_VL;
11373 case ISD::VP_REDUCE_FADD:
11374 return RISCVISD::VECREDUCE_FADD_VL;
11375 case ISD::VP_REDUCE_SEQ_FADD:
11376 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11377 case ISD::VP_REDUCE_FMAX:
11378 case ISD::VP_REDUCE_FMAXIMUM:
11379 return RISCVISD::VECREDUCE_FMAX_VL;
11380 case ISD::VP_REDUCE_FMIN:
11381 case ISD::VP_REDUCE_FMINIMUM:
11382 return RISCVISD::VECREDUCE_FMIN_VL;
11383 }
11384
11385}
11386
11387SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11388 SelectionDAG &DAG,
11389 bool IsVP) const {
11390 SDLoc DL(Op);
11391 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11392 MVT VecVT = Vec.getSimpleValueType();
11393 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11394 Op.getOpcode() == ISD::VECREDUCE_OR ||
11395 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11396 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11397 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11398 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11399 "Unexpected reduction lowering");
11400
11401 MVT XLenVT = Subtarget.getXLenVT();
11402
11403 MVT ContainerVT = VecVT;
11404 if (VecVT.isFixedLengthVector()) {
11405 ContainerVT = getContainerForFixedLengthVector(VecVT);
11406 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11407 }
11408
11409 SDValue Mask, VL;
11410 if (IsVP) {
11411 Mask = Op.getOperand(2);
11412 VL = Op.getOperand(3);
11413 } else {
11414 std::tie(Mask, VL) =
11415 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11416 }
11417
11418 ISD::CondCode CC;
11419 switch (Op.getOpcode()) {
11420 default:
11421 llvm_unreachable("Unhandled reduction");
11422 case ISD::VECREDUCE_AND:
11423 case ISD::VP_REDUCE_AND: {
11424 // vcpop ~x == 0
11425 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11426 if (IsVP || VecVT.isFixedLengthVector())
11427 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11428 else
11429 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11430 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11431 CC = ISD::SETEQ;
11432 break;
11433 }
11434 case ISD::VECREDUCE_OR:
11435 case ISD::VP_REDUCE_OR:
11436 // vcpop x != 0
11437 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11438 CC = ISD::SETNE;
11439 break;
11440 case ISD::VECREDUCE_XOR:
11441 case ISD::VP_REDUCE_XOR: {
11442 // ((vcpop x) & 1) != 0
11443 SDValue One = DAG.getConstant(1, DL, XLenVT);
11444 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11445 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11446 CC = ISD::SETNE;
11447 break;
11448 }
11449 }
11450
11451 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11452 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11453 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11454
11455 if (!IsVP)
11456 return SetCC;
11457
11458 // Now include the start value in the operation.
11459 // Note that we must return the start value when no elements are operated
11460 // upon. The vcpop instructions we've emitted in each case above will return
11461 // 0 for an inactive vector, and so we've already received the neutral value:
11462 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11463 // can simply include the start value.
11464 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11465 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11466}
11467
11468static bool isNonZeroAVL(SDValue AVL) {
11469 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11470 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11471 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11472 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11473}
11474
11475/// Helper to lower a reduction sequence of the form:
11476/// scalar = reduce_op vec, scalar_start
11477static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11478 SDValue StartValue, SDValue Vec, SDValue Mask,
11479 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11480 const RISCVSubtarget &Subtarget) {
11481 const MVT VecVT = Vec.getSimpleValueType();
11482 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11483 const MVT XLenVT = Subtarget.getXLenVT();
11484 const bool NonZeroAVL = isNonZeroAVL(VL);
11485
11486 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11487 // or the original VT if fractional.
11488 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11489 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11490 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11491 // be the result of the reduction operation.
11492 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11493 SDValue InitialValue =
11494 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11495 if (M1VT != InnerVT)
11496 InitialValue =
11497 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11498 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11500 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11501 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11502 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11503}
11504
11505SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11506 SelectionDAG &DAG) const {
11507 SDLoc DL(Op);
11508 SDValue Vec = Op.getOperand(0);
11509 EVT VecEVT = Vec.getValueType();
11510
11511 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11512
11513 // Due to ordering in legalize types we may have a vector type that needs to
11514 // be split. Do that manually so we can get down to a legal type.
11515 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11517 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11518 VecEVT = Lo.getValueType();
11519 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11520 }
11521
11522 // TODO: The type may need to be widened rather than split. Or widened before
11523 // it can be split.
11524 if (!isTypeLegal(VecEVT))
11525 return SDValue();
11526
11527 MVT VecVT = VecEVT.getSimpleVT();
11528 MVT VecEltVT = VecVT.getVectorElementType();
11529 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11530
11531 MVT ContainerVT = VecVT;
11532 if (VecVT.isFixedLengthVector()) {
11533 ContainerVT = getContainerForFixedLengthVector(VecVT);
11534 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11535 }
11536
11537 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11538
11539 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11540 switch (BaseOpc) {
11541 case ISD::AND:
11542 case ISD::OR:
11543 case ISD::UMAX:
11544 case ISD::UMIN:
11545 case ISD::SMAX:
11546 case ISD::SMIN:
11547 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11548 }
11549 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11550 Mask, VL, DL, DAG, Subtarget);
11551}
11552
11553// Given a reduction op, this function returns the matching reduction opcode,
11554// the vector SDValue and the scalar SDValue required to lower this to a
11555// RISCVISD node.
11556static std::tuple<unsigned, SDValue, SDValue>
11558 const RISCVSubtarget &Subtarget) {
11559 SDLoc DL(Op);
11560 auto Flags = Op->getFlags();
11561 unsigned Opcode = Op.getOpcode();
11562 switch (Opcode) {
11563 default:
11564 llvm_unreachable("Unhandled reduction");
11565 case ISD::VECREDUCE_FADD: {
11566 // Use positive zero if we can. It is cheaper to materialize.
11567 SDValue Zero =
11568 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11569 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11570 }
11571 case ISD::VECREDUCE_SEQ_FADD:
11572 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11573 Op.getOperand(0));
11574 case ISD::VECREDUCE_FMINIMUM:
11575 case ISD::VECREDUCE_FMAXIMUM:
11576 case ISD::VECREDUCE_FMIN:
11577 case ISD::VECREDUCE_FMAX: {
11578 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11579 unsigned RVVOpc =
11580 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11581 ? RISCVISD::VECREDUCE_FMIN_VL
11582 : RISCVISD::VECREDUCE_FMAX_VL;
11583 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11584 }
11585 }
11586}
11587
11588SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11589 SelectionDAG &DAG) const {
11590 SDLoc DL(Op);
11591 MVT VecEltVT = Op.getSimpleValueType();
11592
11593 unsigned RVVOpcode;
11594 SDValue VectorVal, ScalarVal;
11595 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11596 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11597 MVT VecVT = VectorVal.getSimpleValueType();
11598
11599 MVT ContainerVT = VecVT;
11600 if (VecVT.isFixedLengthVector()) {
11601 ContainerVT = getContainerForFixedLengthVector(VecVT);
11602 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11603 }
11604
11605 MVT ResVT = Op.getSimpleValueType();
11606 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11607 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11608 VL, DL, DAG, Subtarget);
11609 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11610 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11611 return Res;
11612
11613 if (Op->getFlags().hasNoNaNs())
11614 return Res;
11615
11616 // Force output to NaN if any element is Nan.
11617 SDValue IsNan =
11618 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11619 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11620 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11621 MVT XLenVT = Subtarget.getXLenVT();
11622 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11623 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11624 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11625 return DAG.getSelect(
11626 DL, ResVT, NoNaNs, Res,
11627 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11628}
11629
11630SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11631 SelectionDAG &DAG) const {
11632 SDLoc DL(Op);
11633 unsigned Opc = Op.getOpcode();
11634 SDValue Start = Op.getOperand(0);
11635 SDValue Vec = Op.getOperand(1);
11636 EVT VecEVT = Vec.getValueType();
11637 MVT XLenVT = Subtarget.getXLenVT();
11638
11639 // TODO: The type may need to be widened rather than split. Or widened before
11640 // it can be split.
11641 if (!isTypeLegal(VecEVT))
11642 return SDValue();
11643
11644 MVT VecVT = VecEVT.getSimpleVT();
11645 unsigned RVVOpcode = getRVVReductionOp(Opc);
11646
11647 if (VecVT.isFixedLengthVector()) {
11648 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11649 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11650 }
11651
11652 SDValue VL = Op.getOperand(3);
11653 SDValue Mask = Op.getOperand(2);
11654 SDValue Res =
11655 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11656 Vec, Mask, VL, DL, DAG, Subtarget);
11657 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11658 Op->getFlags().hasNoNaNs())
11659 return Res;
11660
11661 // Propagate NaNs.
11662 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11663 // Check if any of the elements in Vec is NaN.
11664 SDValue IsNaN = DAG.getNode(
11665 RISCVISD::SETCC_VL, DL, PredVT,
11666 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11667 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11668 // Check if the start value is NaN.
11669 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11670 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11671 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11672 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11673 MVT ResVT = Res.getSimpleValueType();
11674 return DAG.getSelect(
11675 DL, ResVT, NoNaNs, Res,
11676 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11677}
11678
11679SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11680 SelectionDAG &DAG) const {
11681 SDValue Vec = Op.getOperand(0);
11682 SDValue SubVec = Op.getOperand(1);
11683 MVT VecVT = Vec.getSimpleValueType();
11684 MVT SubVecVT = SubVec.getSimpleValueType();
11685
11686 SDLoc DL(Op);
11687 MVT XLenVT = Subtarget.getXLenVT();
11688 unsigned OrigIdx = Op.getConstantOperandVal(2);
11689 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11690
11691 if (OrigIdx == 0 && Vec.isUndef())
11692 return Op;
11693
11694 // We don't have the ability to slide mask vectors up indexed by their i1
11695 // elements; the smallest we can do is i8. Often we are able to bitcast to
11696 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11697 // into a scalable one, we might not necessarily have enough scalable
11698 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11699 if (SubVecVT.getVectorElementType() == MVT::i1) {
11700 if (VecVT.getVectorMinNumElements() >= 8 &&
11701 SubVecVT.getVectorMinNumElements() >= 8) {
11702 assert(OrigIdx % 8 == 0 && "Invalid index");
11703 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11704 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11705 "Unexpected mask vector lowering");
11706 OrigIdx /= 8;
11707 SubVecVT =
11708 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11709 SubVecVT.isScalableVector());
11710 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11711 VecVT.isScalableVector());
11712 Vec = DAG.getBitcast(VecVT, Vec);
11713 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11714 } else {
11715 // We can't slide this mask vector up indexed by its i1 elements.
11716 // This poses a problem when we wish to insert a scalable vector which
11717 // can't be re-expressed as a larger type. Just choose the slow path and
11718 // extend to a larger type, then truncate back down.
11719 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11720 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11721 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11722 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11723 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11724 Op.getOperand(2));
11725 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11726 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11727 }
11728 }
11729
11730 // If the subvector vector is a fixed-length type and we don't know VLEN
11731 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11732 // don't know which register of a LMUL group contains the specific subvector
11733 // as we only know the minimum register size. Therefore we must slide the
11734 // vector group up the full amount.
11735 const auto VLen = Subtarget.getRealVLen();
11736 if (SubVecVT.isFixedLengthVector() && !VLen) {
11737 MVT ContainerVT = VecVT;
11738 if (VecVT.isFixedLengthVector()) {
11739 ContainerVT = getContainerForFixedLengthVector(VecVT);
11740 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11741 }
11742
11743 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11744
11745 SDValue Mask =
11746 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11747 // Set the vector length to only the number of elements we care about. Note
11748 // that for slideup this includes the offset.
11749 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11750 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11751
11752 // Use tail agnostic policy if we're inserting over Vec's tail.
11754 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11756
11757 // If we're inserting into the lowest elements, use a tail undisturbed
11758 // vmv.v.v.
11759 if (OrigIdx == 0) {
11760 SubVec =
11761 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11762 } else {
11763 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11764 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11765 SlideupAmt, Mask, VL, Policy);
11766 }
11767
11768 if (VecVT.isFixedLengthVector())
11769 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11770 return DAG.getBitcast(Op.getValueType(), SubVec);
11771 }
11772
11773 MVT ContainerVecVT = VecVT;
11774 if (VecVT.isFixedLengthVector()) {
11775 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11776 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11777 }
11778
11779 MVT ContainerSubVecVT = SubVecVT;
11780 if (SubVecVT.isFixedLengthVector()) {
11781 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11782 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11783 }
11784
11785 unsigned SubRegIdx;
11786 ElementCount RemIdx;
11787 // insert_subvector scales the index by vscale if the subvector is scalable,
11788 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11789 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11790 if (SubVecVT.isFixedLengthVector()) {
11791 assert(VLen);
11792 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11793 auto Decompose =
11795 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11796 SubRegIdx = Decompose.first;
11797 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11798 (OrigIdx % Vscale));
11799 } else {
11800 auto Decompose =
11802 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11803 SubRegIdx = Decompose.first;
11804 RemIdx = ElementCount::getScalable(Decompose.second);
11805 }
11806
11807 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11809 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11810 bool ExactlyVecRegSized =
11811 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11812 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11813
11814 // 1. If the Idx has been completely eliminated and this subvector's size is
11815 // a vector register or a multiple thereof, or the surrounding elements are
11816 // undef, then this is a subvector insert which naturally aligns to a vector
11817 // register. These can easily be handled using subregister manipulation.
11818 // 2. If the subvector isn't an exact multiple of a valid register group size,
11819 // then the insertion must preserve the undisturbed elements of the register.
11820 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11821 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11822 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11823 // of that LMUL=1 type back into the larger vector (resolving to another
11824 // subregister operation). See below for how our VSLIDEUP works. We go via a
11825 // LMUL=1 type to avoid allocating a large register group to hold our
11826 // subvector.
11827 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11828 if (SubVecVT.isFixedLengthVector()) {
11829 // We may get NoSubRegister if inserting at index 0 and the subvec
11830 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11831 if (SubRegIdx == RISCV::NoSubRegister) {
11832 assert(OrigIdx == 0);
11833 return Op;
11834 }
11835
11836 // Use a insert_subvector that will resolve to an insert subreg.
11837 assert(VLen);
11838 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11839 SDValue Insert =
11840 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11841 if (VecVT.isFixedLengthVector())
11842 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11843 return Insert;
11844 }
11845 return Op;
11846 }
11847
11848 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11849 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11850 // (in our case undisturbed). This means we can set up a subvector insertion
11851 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11852 // size of the subvector.
11853 MVT InterSubVT = ContainerVecVT;
11854 SDValue AlignedExtract = Vec;
11855 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11856 if (SubVecVT.isFixedLengthVector()) {
11857 assert(VLen);
11858 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11859 }
11860 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11861 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11862 // Extract a subvector equal to the nearest full vector register type. This
11863 // should resolve to a EXTRACT_SUBREG instruction.
11864 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11865 }
11866
11867 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11868
11869 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11870
11871 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11872 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11873
11874 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11876 if (Subtarget.expandVScale(EndIndex) ==
11877 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11879
11880 // If we're inserting into the lowest elements, use a tail undisturbed
11881 // vmv.v.v.
11882 if (RemIdx.isZero()) {
11883 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11884 SubVec, VL);
11885 } else {
11886 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11887
11888 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11889 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11890
11891 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11892 SlideupAmt, Mask, VL, Policy);
11893 }
11894
11895 // If required, insert this subvector back into the correct vector register.
11896 // This should resolve to an INSERT_SUBREG instruction.
11897 if (ContainerVecVT.bitsGT(InterSubVT))
11898 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11899
11900 if (VecVT.isFixedLengthVector())
11901 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11902
11903 // We might have bitcast from a mask type: cast back to the original type if
11904 // required.
11905 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11906}
11907
11908SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11909 SelectionDAG &DAG) const {
11910 SDValue Vec = Op.getOperand(0);
11911 MVT SubVecVT = Op.getSimpleValueType();
11912 MVT VecVT = Vec.getSimpleValueType();
11913
11914 SDLoc DL(Op);
11915 MVT XLenVT = Subtarget.getXLenVT();
11916 unsigned OrigIdx = Op.getConstantOperandVal(1);
11917 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11918
11919 // With an index of 0 this is a cast-like subvector, which can be performed
11920 // with subregister operations.
11921 if (OrigIdx == 0)
11922 return Op;
11923
11924 // We don't have the ability to slide mask vectors down indexed by their i1
11925 // elements; the smallest we can do is i8. Often we are able to bitcast to
11926 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11927 // from a scalable one, we might not necessarily have enough scalable
11928 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11929 if (SubVecVT.getVectorElementType() == MVT::i1) {
11930 if (VecVT.getVectorMinNumElements() >= 8 &&
11931 SubVecVT.getVectorMinNumElements() >= 8) {
11932 assert(OrigIdx % 8 == 0 && "Invalid index");
11933 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11934 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11935 "Unexpected mask vector lowering");
11936 OrigIdx /= 8;
11937 SubVecVT =
11938 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11939 SubVecVT.isScalableVector());
11940 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11941 VecVT.isScalableVector());
11942 Vec = DAG.getBitcast(VecVT, Vec);
11943 } else {
11944 // We can't slide this mask vector down, indexed by its i1 elements.
11945 // This poses a problem when we wish to extract a scalable vector which
11946 // can't be re-expressed as a larger type. Just choose the slow path and
11947 // extend to a larger type, then truncate back down.
11948 // TODO: We could probably improve this when extracting certain fixed
11949 // from fixed, where we can extract as i8 and shift the correct element
11950 // right to reach the desired subvector?
11951 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11952 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11953 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11954 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11955 Op.getOperand(1));
11956 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11957 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11958 }
11959 }
11960
11961 const auto VLen = Subtarget.getRealVLen();
11962
11963 // If the subvector vector is a fixed-length type and we don't know VLEN
11964 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11965 // don't know which register of a LMUL group contains the specific subvector
11966 // as we only know the minimum register size. Therefore we must slide the
11967 // vector group down the full amount.
11968 if (SubVecVT.isFixedLengthVector() && !VLen) {
11969 MVT ContainerVT = VecVT;
11970 if (VecVT.isFixedLengthVector()) {
11971 ContainerVT = getContainerForFixedLengthVector(VecVT);
11972 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11973 }
11974
11975 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11976 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
11977 if (auto ShrunkVT =
11978 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
11979 ContainerVT = *ShrunkVT;
11980 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11981 }
11982
11983 SDValue Mask =
11984 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11985 // Set the vector length to only the number of elements we care about. This
11986 // avoids sliding down elements we're going to discard straight away.
11987 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11988 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11989 SDValue Slidedown =
11990 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11991 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
11992 // Now we can use a cast-like subvector extract to get the result.
11993 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
11994 return DAG.getBitcast(Op.getValueType(), Slidedown);
11995 }
11996
11997 if (VecVT.isFixedLengthVector()) {
11998 VecVT = getContainerForFixedLengthVector(VecVT);
11999 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12000 }
12001
12002 MVT ContainerSubVecVT = SubVecVT;
12003 if (SubVecVT.isFixedLengthVector())
12004 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12005
12006 unsigned SubRegIdx;
12007 ElementCount RemIdx;
12008 // extract_subvector scales the index by vscale if the subvector is scalable,
12009 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12010 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12011 if (SubVecVT.isFixedLengthVector()) {
12012 assert(VLen);
12013 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12014 auto Decompose =
12016 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12017 SubRegIdx = Decompose.first;
12018 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12019 (OrigIdx % Vscale));
12020 } else {
12021 auto Decompose =
12023 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12024 SubRegIdx = Decompose.first;
12025 RemIdx = ElementCount::getScalable(Decompose.second);
12026 }
12027
12028 // If the Idx has been completely eliminated then this is a subvector extract
12029 // which naturally aligns to a vector register. These can easily be handled
12030 // using subregister manipulation. We use an extract_subvector that will
12031 // resolve to an extract subreg.
12032 if (RemIdx.isZero()) {
12033 if (SubVecVT.isFixedLengthVector()) {
12034 assert(VLen);
12035 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12036 Vec =
12037 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12038 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12039 }
12040 return Op;
12041 }
12042
12043 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12044 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12045 // divide exactly.
12046 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12047 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12048
12049 // If the vector type is an LMUL-group type, extract a subvector equal to the
12050 // nearest full vector register type.
12051 MVT InterSubVT = VecVT;
12052 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12053 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12054 // we should have successfully decomposed the extract into a subregister.
12055 // We use an extract_subvector that will resolve to a subreg extract.
12056 assert(SubRegIdx != RISCV::NoSubRegister);
12057 (void)SubRegIdx;
12058 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12059 if (SubVecVT.isFixedLengthVector()) {
12060 assert(VLen);
12061 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12062 }
12063 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12064 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12065 }
12066
12067 // Slide this vector register down by the desired number of elements in order
12068 // to place the desired subvector starting at element 0.
12069 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12070 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12071 if (SubVecVT.isFixedLengthVector())
12072 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12073 SDValue Slidedown =
12074 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12075 Vec, SlidedownAmt, Mask, VL);
12076
12077 // Now the vector is in the right position, extract our final subvector. This
12078 // should resolve to a COPY.
12079 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12080
12081 // We might have bitcast from a mask type: cast back to the original type if
12082 // required.
12083 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12084}
12085
12086// Widen a vector's operands to i8, then truncate its results back to the
12087// original type, typically i1. All operand and result types must be the same.
12089 SelectionDAG &DAG) {
12090 MVT VT = N.getSimpleValueType();
12091 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12093 for (SDValue Op : N->ops()) {
12094 assert(Op.getSimpleValueType() == VT &&
12095 "Operands and result must be same type");
12096 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12097 }
12098
12099 unsigned NumVals = N->getNumValues();
12100
12102 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12103 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12104 SmallVector<SDValue, 4> TruncVals;
12105 for (unsigned I = 0; I < NumVals; I++) {
12106 TruncVals.push_back(
12107 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12108 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12109 }
12110
12111 if (TruncVals.size() > 1)
12112 return DAG.getMergeValues(TruncVals, DL);
12113 return TruncVals.front();
12114}
12115
12116SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12117 SelectionDAG &DAG) const {
12118 SDLoc DL(Op);
12119 MVT VecVT = Op.getSimpleValueType();
12120
12121 const unsigned Factor = Op->getNumValues();
12122 assert(Factor <= 8);
12123
12124 // 1 bit element vectors need to be widened to e8
12125 if (VecVT.getVectorElementType() == MVT::i1)
12126 return widenVectorOpsToi8(Op, DL, DAG);
12127
12128 // Convert to scalable vectors first.
12129 if (VecVT.isFixedLengthVector()) {
12130 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12132 for (unsigned i = 0U; i < Factor; ++i)
12133 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12134 Subtarget);
12135
12136 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12137 SDValue NewDeinterleave =
12139
12140 SmallVector<SDValue, 8> Res(Factor);
12141 for (unsigned i = 0U; i < Factor; ++i)
12142 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12143 DAG, Subtarget);
12144 return DAG.getMergeValues(Res, DL);
12145 }
12146
12147 // If concatenating would exceed LMUL=8, we need to split.
12148 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12149 (8 * RISCV::RVVBitsPerBlock)) {
12150 SmallVector<SDValue, 8> Ops(Factor * 2);
12151 for (unsigned i = 0; i != Factor; ++i) {
12152 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12153 Ops[i * 2] = OpLo;
12154 Ops[i * 2 + 1] = OpHi;
12155 }
12156
12157 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12158
12160 ArrayRef(Ops).slice(0, Factor));
12162 ArrayRef(Ops).slice(Factor, Factor));
12163
12164 SmallVector<SDValue, 8> Res(Factor);
12165 for (unsigned i = 0; i != Factor; ++i)
12166 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12167 Hi.getValue(i));
12168
12169 return DAG.getMergeValues(Res, DL);
12170 }
12171
12172 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12173 MVT VT = Op->getSimpleValueType(0);
12174 SDValue V1 = Op->getOperand(0);
12175 SDValue V2 = Op->getOperand(1);
12176
12177 // For fractional LMUL, check if we can use a higher LMUL
12178 // instruction to avoid a vslidedown.
12179 if (SDValue Src = foldConcatVector(V1, V2);
12180 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12181 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12182 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12183 // Freeze the source so we can increase its use count.
12184 Src = DAG.getFreeze(Src);
12185 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12186 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12187 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12188 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12189 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12190 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12191 return DAG.getMergeValues({Even, Odd}, DL);
12192 }
12193
12194 // Freeze the sources so we can increase their use count.
12195 V1 = DAG.getFreeze(V1);
12196 V2 = DAG.getFreeze(V2);
12197 SDValue Even =
12198 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12199 SDValue Odd =
12200 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12201 return DAG.getMergeValues({Even, Odd}, DL);
12202 }
12203
12204 SmallVector<SDValue, 8> Ops(Op->op_values());
12205
12206 // Concatenate the vectors as one vector to deinterleave
12207 MVT ConcatVT =
12210 PowerOf2Ceil(Factor)));
12211 if (Ops.size() < PowerOf2Ceil(Factor))
12212 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12213 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12214
12215 if (Factor == 2) {
12216 // We can deinterleave through vnsrl.wi if the element type is smaller than
12217 // ELEN
12218 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12219 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12220 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12221 return DAG.getMergeValues({Even, Odd}, DL);
12222 }
12223
12224 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12225 // possibly mask vector, then extract the required subvector. Doing this
12226 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12227 // creation to be rematerialized during register allocation to reduce
12228 // register pressure if needed.
12229
12230 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12231
12232 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12233 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12234 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12235
12236 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12237 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12238 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12239
12240 // vcompress the even and odd elements into two separate vectors
12241 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12242 EvenMask, DAG.getUNDEF(ConcatVT));
12243 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12244 OddMask, DAG.getUNDEF(ConcatVT));
12245
12246 // Extract the result half of the gather for even and odd
12247 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12248 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12249
12250 return DAG.getMergeValues({Even, Odd}, DL);
12251 }
12252
12253 // Store with unit-stride store and load it back with segmented load.
12254 MVT XLenVT = Subtarget.getXLenVT();
12255 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12256 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12257
12258 // Allocate a stack slot.
12259 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12261 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12262 auto &MF = DAG.getMachineFunction();
12263 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12264 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12265
12266 SDValue StoreOps[] = {DAG.getEntryNode(),
12267 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12268 Concat, StackPtr, VL};
12269
12270 SDValue Chain = DAG.getMemIntrinsicNode(
12271 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12272 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12274
12275 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12276 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12277 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12278 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12279 Intrinsic::riscv_vlseg8_mask};
12280
12281 SDValue LoadOps[] = {
12282 Chain,
12283 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12284 Passthru,
12285 StackPtr,
12286 Mask,
12287 VL,
12290 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12291
12292 unsigned Sz =
12293 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12294 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12295
12297 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12298 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12300
12301 SmallVector<SDValue, 8> Res(Factor);
12302
12303 for (unsigned i = 0U; i < Factor; ++i)
12304 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12305 DAG.getTargetConstant(i, DL, MVT::i32));
12306
12307 return DAG.getMergeValues(Res, DL);
12308}
12309
12310SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12311 SelectionDAG &DAG) const {
12312 SDLoc DL(Op);
12313 MVT VecVT = Op.getSimpleValueType();
12314
12315 const unsigned Factor = Op.getNumOperands();
12316 assert(Factor <= 8);
12317
12318 // i1 vectors need to be widened to i8
12319 if (VecVT.getVectorElementType() == MVT::i1)
12320 return widenVectorOpsToi8(Op, DL, DAG);
12321
12322 // Convert to scalable vectors first.
12323 if (VecVT.isFixedLengthVector()) {
12324 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12326 for (unsigned i = 0U; i < Factor; ++i)
12327 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12328 Subtarget);
12329
12330 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12331 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12332
12333 SmallVector<SDValue, 8> Res(Factor);
12334 for (unsigned i = 0U; i < Factor; ++i)
12335 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12336 Subtarget);
12337 return DAG.getMergeValues(Res, DL);
12338 }
12339
12340 MVT XLenVT = Subtarget.getXLenVT();
12341 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12342
12343 // If the VT is larger than LMUL=8, we need to split and reassemble.
12344 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12345 (8 * RISCV::RVVBitsPerBlock)) {
12346 SmallVector<SDValue, 8> Ops(Factor * 2);
12347 for (unsigned i = 0; i != Factor; ++i) {
12348 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12349 Ops[i] = OpLo;
12350 Ops[i + Factor] = OpHi;
12351 }
12352
12353 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12354
12355 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12356 ArrayRef(Ops).take_front(Factor)),
12358 ArrayRef(Ops).drop_front(Factor))};
12359
12360 SmallVector<SDValue, 8> Concats(Factor);
12361 for (unsigned i = 0; i != Factor; ++i) {
12362 unsigned IdxLo = 2 * i;
12363 unsigned IdxHi = 2 * i + 1;
12364 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12365 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12366 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12367 }
12368
12369 return DAG.getMergeValues(Concats, DL);
12370 }
12371
12372 SDValue Interleaved;
12373
12374 // Spill to the stack using a segment store for simplicity.
12375 if (Factor != 2) {
12376 EVT MemVT =
12378 VecVT.getVectorElementCount() * Factor);
12379
12380 // Allocate a stack slot.
12381 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12383 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12384 EVT PtrVT = StackPtr.getValueType();
12385 auto &MF = DAG.getMachineFunction();
12386 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12387 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12388
12389 static const Intrinsic::ID IntrIds[] = {
12390 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12391 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12392 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12393 Intrinsic::riscv_vsseg8_mask,
12394 };
12395
12396 unsigned Sz =
12397 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12398 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12399
12400 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12401 for (unsigned i = 0; i < Factor; i++)
12402 StoredVal =
12403 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12404 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12405
12406 SDValue Ops[] = {DAG.getEntryNode(),
12407 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12408 StoredVal,
12409 StackPtr,
12410 Mask,
12411 VL,
12413 DL, XLenVT)};
12414
12415 SDValue Chain = DAG.getMemIntrinsicNode(
12416 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12417 VecVT.getVectorElementType(), PtrInfo, Alignment,
12419
12420 SmallVector<SDValue, 8> Loads(Factor);
12421
12423 DAG.getVScale(DL, PtrVT,
12424 APInt(PtrVT.getFixedSizeInBits(),
12425 VecVT.getStoreSize().getKnownMinValue()));
12426 for (unsigned i = 0; i != Factor; ++i) {
12427 if (i != 0)
12428 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12429
12430 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12431 }
12432
12433 return DAG.getMergeValues(Loads, DL);
12434 }
12435
12436 // Use ri.vzip2{a,b} if available
12437 // TODO: Figure out the best lowering for the spread variants
12438 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12439 !Op.getOperand(1).isUndef()) {
12440 // Freeze the sources so we can increase their use count.
12441 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12442 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12443 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12444 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12445 return DAG.getMergeValues({Lo, Hi}, DL);
12446 }
12447
12448 // If the element type is smaller than ELEN, then we can interleave with
12449 // vwaddu.vv and vwmaccu.vx
12450 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12451 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12452 DAG, Subtarget);
12453 } else {
12454 // Otherwise, fallback to using vrgathere16.vv
12455 MVT ConcatVT =
12458 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12459 Op.getOperand(0), Op.getOperand(1));
12460
12461 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12462
12463 // 0 1 2 3 4 5 6 7 ...
12464 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12465
12466 // 1 1 1 1 1 1 1 1 ...
12467 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12468
12469 // 1 0 1 0 1 0 1 0 ...
12470 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12471 OddMask = DAG.getSetCC(
12472 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12473 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12475
12476 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12477
12478 // Build up the index vector for interleaving the concatenated vector
12479 // 0 0 1 1 2 2 3 3 ...
12480 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12481 // 0 n 1 n+1 2 n+2 3 n+3 ...
12482 Idx =
12483 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12484
12485 // Then perform the interleave
12486 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12487 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12488 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12489 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12490 }
12491
12492 // Extract the two halves from the interleaved result
12493 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12494 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12495 VecVT.getVectorMinNumElements());
12496
12497 return DAG.getMergeValues({Lo, Hi}, DL);
12498}
12499
12500// Lower step_vector to the vid instruction. Any non-identity step value must
12501// be accounted for my manual expansion.
12502SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12503 SelectionDAG &DAG) const {
12504 SDLoc DL(Op);
12505 MVT VT = Op.getSimpleValueType();
12506 assert(VT.isScalableVector() && "Expected scalable vector");
12507 MVT XLenVT = Subtarget.getXLenVT();
12508 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12509 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12510 uint64_t StepValImm = Op.getConstantOperandVal(0);
12511 if (StepValImm != 1) {
12512 if (isPowerOf2_64(StepValImm)) {
12513 SDValue StepVal =
12514 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12515 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12516 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12517 } else {
12518 SDValue StepVal = lowerScalarSplat(
12519 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12520 VL, VT, DL, DAG, Subtarget);
12521 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12522 }
12523 }
12524 return StepVec;
12525}
12526
12527// Implement vector_reverse using vrgather.vv with indices determined by
12528// subtracting the id of each element from (VLMAX-1). This will convert
12529// the indices like so:
12530// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12531// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12532SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12533 SelectionDAG &DAG) const {
12534 SDLoc DL(Op);
12535 MVT VecVT = Op.getSimpleValueType();
12536 if (VecVT.getVectorElementType() == MVT::i1) {
12537 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12538 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12539 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12540 return DAG.getSetCC(DL, VecVT, Op2,
12541 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12542 }
12543
12544 MVT ContainerVT = VecVT;
12545 SDValue Vec = Op.getOperand(0);
12546 if (VecVT.isFixedLengthVector()) {
12547 ContainerVT = getContainerForFixedLengthVector(VecVT);
12548 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12549 }
12550
12551 MVT XLenVT = Subtarget.getXLenVT();
12552 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12553
12554 // On some uarchs vrgather.vv will read from every input register for each
12555 // output register, regardless of the indices. However to reverse a vector
12556 // each output register only needs to read from one register. So decompose it
12557 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12558 // O(LMUL^2).
12559 //
12560 // vsetvli a1, zero, e64, m4, ta, ma
12561 // vrgatherei16.vv v12, v8, v16
12562 // ->
12563 // vsetvli a1, zero, e64, m1, ta, ma
12564 // vrgather.vv v15, v8, v16
12565 // vrgather.vv v14, v9, v16
12566 // vrgather.vv v13, v10, v16
12567 // vrgather.vv v12, v11, v16
12568 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12569 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12570 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12571 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12572 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12573 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12574
12575 // Fixed length vectors might not fit exactly into their container, and so
12576 // leave a gap in the front of the vector after being reversed. Slide this
12577 // away.
12578 //
12579 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12580 // 0 1 2 3 x x x x <- reverse
12581 // x x x x 0 1 2 3 <- vslidedown.vx
12582 if (VecVT.isFixedLengthVector()) {
12583 SDValue Offset = DAG.getNode(
12584 ISD::SUB, DL, XLenVT,
12585 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12586 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12587 Concat =
12588 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12589 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12590 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12591 }
12592 return Concat;
12593 }
12594
12595 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12596 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12597 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12598 unsigned MaxVLMAX =
12599 VecVT.isFixedLengthVector()
12600 ? VecVT.getVectorNumElements()
12601 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12602
12603 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12604 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12605
12606 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12607 // to use vrgatherei16.vv.
12608 if (MaxVLMAX > 256 && EltSize == 8) {
12609 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12610 // Reverse each half, then reassemble them in reverse order.
12611 // NOTE: It's also possible that after splitting that VLMAX no longer
12612 // requires vrgatherei16.vv.
12613 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12614 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12615 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12616 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12617 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12618 // Reassemble the low and high pieces reversed.
12619 // FIXME: This is a CONCAT_VECTORS.
12620 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12621 return DAG.getInsertSubvector(DL, Res, Lo,
12622 LoVT.getVectorMinNumElements());
12623 }
12624
12625 // Just promote the int type to i16 which will double the LMUL.
12626 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12627 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12628 }
12629
12630 // At LMUL > 1, do the index computation in 16 bits to reduce register
12631 // pressure.
12632 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12633 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12634 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12635 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12636 IntVT = IntVT.changeVectorElementType(MVT::i16);
12637 }
12638
12639 // Calculate VLMAX-1 for the desired SEW.
12640 SDValue VLMinus1 = DAG.getNode(
12641 ISD::SUB, DL, XLenVT,
12642 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12643 DAG.getConstant(1, DL, XLenVT));
12644
12645 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12646 bool IsRV32E64 =
12647 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12648 SDValue SplatVL;
12649 if (!IsRV32E64)
12650 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12651 else
12652 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12653 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12654
12655 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12656 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12657 DAG.getUNDEF(IntVT), Mask, VL);
12658
12659 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12660 DAG.getUNDEF(ContainerVT), Mask, VL);
12661 if (VecVT.isFixedLengthVector())
12662 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12663 return Gather;
12664}
12665
12666SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12667 SelectionDAG &DAG) const {
12668 SDLoc DL(Op);
12669 SDValue V1 = Op.getOperand(0);
12670 SDValue V2 = Op.getOperand(1);
12671 MVT XLenVT = Subtarget.getXLenVT();
12672 MVT VecVT = Op.getSimpleValueType();
12673
12674 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12675
12676 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12677 SDValue DownOffset, UpOffset;
12678 if (ImmValue >= 0) {
12679 // The operand is a TargetConstant, we need to rebuild it as a regular
12680 // constant.
12681 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12682 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12683 } else {
12684 // The operand is a TargetConstant, we need to rebuild it as a regular
12685 // constant rather than negating the original operand.
12686 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12687 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12688 }
12689
12690 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12691
12692 SDValue SlideDown = getVSlidedown(
12693 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12694 Subtarget.hasVLDependentLatency() ? UpOffset
12695 : DAG.getRegister(RISCV::X0, XLenVT));
12696 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12697 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12699}
12700
12701SDValue
12702RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12703 SelectionDAG &DAG) const {
12704 SDLoc DL(Op);
12705 auto *Load = cast<LoadSDNode>(Op);
12706
12708 Load->getMemoryVT(),
12709 *Load->getMemOperand()) &&
12710 "Expecting a correctly-aligned load");
12711
12712 MVT VT = Op.getSimpleValueType();
12713 MVT XLenVT = Subtarget.getXLenVT();
12714 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12715
12716 // If we know the exact VLEN and our fixed length vector completely fills
12717 // the container, use a whole register load instead.
12718 const auto [MinVLMAX, MaxVLMAX] =
12719 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12720 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12721 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12722 MachineMemOperand *MMO = Load->getMemOperand();
12723 SDValue NewLoad =
12724 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12725 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12726 MMO->getAAInfo(), MMO->getRanges());
12727 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12728 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12729 }
12730
12731 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12732
12733 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12734 SDValue IntID = DAG.getTargetConstant(
12735 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12736 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12737 if (!IsMaskOp)
12738 Ops.push_back(DAG.getUNDEF(ContainerVT));
12739 Ops.push_back(Load->getBasePtr());
12740 Ops.push_back(VL);
12741 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12742 SDValue NewLoad =
12744 Load->getMemoryVT(), Load->getMemOperand());
12745
12746 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12747 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12748}
12749
12750SDValue
12751RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12752 SelectionDAG &DAG) const {
12753 SDLoc DL(Op);
12754 auto *Store = cast<StoreSDNode>(Op);
12755
12757 Store->getMemoryVT(),
12758 *Store->getMemOperand()) &&
12759 "Expecting a correctly-aligned store");
12760
12761 SDValue StoreVal = Store->getValue();
12762 MVT VT = StoreVal.getSimpleValueType();
12763 MVT XLenVT = Subtarget.getXLenVT();
12764
12765 // If the size less than a byte, we need to pad with zeros to make a byte.
12766 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12767 VT = MVT::v8i1;
12768 StoreVal =
12769 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12770 }
12771
12772 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12773
12774 SDValue NewValue =
12775 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12776
12777 // If we know the exact VLEN and our fixed length vector completely fills
12778 // the container, use a whole register store instead.
12779 const auto [MinVLMAX, MaxVLMAX] =
12780 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12781 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12782 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12783 MachineMemOperand *MMO = Store->getMemOperand();
12784 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12785 MMO->getPointerInfo(), MMO->getBaseAlign(),
12786 MMO->getFlags(), MMO->getAAInfo());
12787 }
12788
12789 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12790
12791 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12792 SDValue IntID = DAG.getTargetConstant(
12793 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12794 return DAG.getMemIntrinsicNode(
12795 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12796 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12797 Store->getMemoryVT(), Store->getMemOperand());
12798}
12799
12800SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12801 SelectionDAG &DAG) const {
12802 SDLoc DL(Op);
12803 MVT VT = Op.getSimpleValueType();
12804
12805 const auto *MemSD = cast<MemSDNode>(Op);
12806 EVT MemVT = MemSD->getMemoryVT();
12807 MachineMemOperand *MMO = MemSD->getMemOperand();
12808 SDValue Chain = MemSD->getChain();
12809 SDValue BasePtr = MemSD->getBasePtr();
12810
12811 SDValue Mask, PassThru, VL;
12812 bool IsExpandingLoad = false;
12813 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12814 Mask = VPLoad->getMask();
12815 PassThru = DAG.getUNDEF(VT);
12816 VL = VPLoad->getVectorLength();
12817 } else {
12818 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12819 Mask = MLoad->getMask();
12820 PassThru = MLoad->getPassThru();
12821 IsExpandingLoad = MLoad->isExpandingLoad();
12822 }
12823
12824 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12825
12826 MVT XLenVT = Subtarget.getXLenVT();
12827
12828 MVT ContainerVT = VT;
12829 if (VT.isFixedLengthVector()) {
12830 ContainerVT = getContainerForFixedLengthVector(VT);
12831 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12832 if (!IsUnmasked) {
12833 MVT MaskVT = getMaskTypeFor(ContainerVT);
12834 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12835 }
12836 }
12837
12838 if (!VL)
12839 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12840
12841 SDValue ExpandingVL;
12842 if (!IsUnmasked && IsExpandingLoad) {
12843 ExpandingVL = VL;
12844 VL =
12845 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12846 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12847 }
12848
12849 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12850 : Intrinsic::riscv_vle_mask;
12851 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12852 if (IntID == Intrinsic::riscv_vle)
12853 Ops.push_back(DAG.getUNDEF(ContainerVT));
12854 else
12855 Ops.push_back(PassThru);
12856 Ops.push_back(BasePtr);
12857 if (IntID == Intrinsic::riscv_vle_mask)
12858 Ops.push_back(Mask);
12859 Ops.push_back(VL);
12860 if (IntID == Intrinsic::riscv_vle_mask)
12861 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12862
12863 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12864
12865 SDValue Result =
12866 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12867 Chain = Result.getValue(1);
12868 if (ExpandingVL) {
12869 MVT IndexVT = ContainerVT;
12870 if (ContainerVT.isFloatingPoint())
12871 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12872
12873 MVT IndexEltVT = IndexVT.getVectorElementType();
12874 bool UseVRGATHEREI16 = false;
12875 // If index vector is an i8 vector and the element count exceeds 256, we
12876 // should change the element type of index vector to i16 to avoid
12877 // overflow.
12878 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12879 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12880 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12881 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12882 UseVRGATHEREI16 = true;
12883 }
12884
12885 SDValue Iota =
12886 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12887 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12888 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12889 Result =
12890 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12891 : RISCVISD::VRGATHER_VV_VL,
12892 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12893 }
12894
12895 if (VT.isFixedLengthVector())
12896 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12897
12898 return DAG.getMergeValues({Result, Chain}, DL);
12899}
12900
12901SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12902 SDLoc DL(Op);
12903 MVT VT = Op->getSimpleValueType(0);
12904
12905 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12906 EVT MemVT = VPLoadFF->getMemoryVT();
12907 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12908 SDValue Chain = VPLoadFF->getChain();
12909 SDValue BasePtr = VPLoadFF->getBasePtr();
12910
12911 SDValue Mask = VPLoadFF->getMask();
12912 SDValue VL = VPLoadFF->getVectorLength();
12913
12914 MVT XLenVT = Subtarget.getXLenVT();
12915
12916 MVT ContainerVT = VT;
12917 if (VT.isFixedLengthVector()) {
12918 ContainerVT = getContainerForFixedLengthVector(VT);
12919 MVT MaskVT = getMaskTypeFor(ContainerVT);
12920 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12921 }
12922
12923 unsigned IntID = Intrinsic::riscv_vleff_mask;
12924 SDValue Ops[] = {
12925 Chain,
12926 DAG.getTargetConstant(IntID, DL, XLenVT),
12927 DAG.getUNDEF(ContainerVT),
12928 BasePtr,
12929 Mask,
12930 VL,
12932
12933 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12934
12935 SDValue Result =
12936 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12937 SDValue OutVL = Result.getValue(1);
12938 Chain = Result.getValue(2);
12939
12940 if (VT.isFixedLengthVector())
12941 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12942
12943 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12944}
12945
12946SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12947 SelectionDAG &DAG) const {
12948 SDLoc DL(Op);
12949
12950 const auto *MemSD = cast<MemSDNode>(Op);
12951 EVT MemVT = MemSD->getMemoryVT();
12952 MachineMemOperand *MMO = MemSD->getMemOperand();
12953 SDValue Chain = MemSD->getChain();
12954 SDValue BasePtr = MemSD->getBasePtr();
12955 SDValue Val, Mask, VL;
12956
12957 bool IsCompressingStore = false;
12958 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12959 Val = VPStore->getValue();
12960 Mask = VPStore->getMask();
12961 VL = VPStore->getVectorLength();
12962 } else {
12963 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12964 Val = MStore->getValue();
12965 Mask = MStore->getMask();
12966 IsCompressingStore = MStore->isCompressingStore();
12967 }
12968
12969 bool IsUnmasked =
12970 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12971
12972 MVT VT = Val.getSimpleValueType();
12973 MVT XLenVT = Subtarget.getXLenVT();
12974
12975 MVT ContainerVT = VT;
12976 if (VT.isFixedLengthVector()) {
12977 ContainerVT = getContainerForFixedLengthVector(VT);
12978
12979 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12980 if (!IsUnmasked || IsCompressingStore) {
12981 MVT MaskVT = getMaskTypeFor(ContainerVT);
12982 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12983 }
12984 }
12985
12986 if (!VL)
12987 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12988
12989 if (IsCompressingStore) {
12990 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12991 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
12992 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
12993 VL =
12994 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12995 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12996 }
12997
12998 unsigned IntID =
12999 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13000 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13001 Ops.push_back(Val);
13002 Ops.push_back(BasePtr);
13003 if (!IsUnmasked)
13004 Ops.push_back(Mask);
13005 Ops.push_back(VL);
13006
13008 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13009}
13010
13011SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13012 SelectionDAG &DAG) const {
13013 SDLoc DL(Op);
13014 SDValue Val = Op.getOperand(0);
13015 SDValue Mask = Op.getOperand(1);
13016 SDValue Passthru = Op.getOperand(2);
13017
13018 MVT VT = Val.getSimpleValueType();
13019 MVT XLenVT = Subtarget.getXLenVT();
13020 MVT ContainerVT = VT;
13021 if (VT.isFixedLengthVector()) {
13022 ContainerVT = getContainerForFixedLengthVector(VT);
13023 MVT MaskVT = getMaskTypeFor(ContainerVT);
13024 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13025 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13026 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13027 }
13028
13029 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13030 SDValue Res =
13031 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13032 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13033 Passthru, Val, Mask, VL);
13034
13035 if (VT.isFixedLengthVector())
13036 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13037
13038 return Res;
13039}
13040
13041SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13042 SelectionDAG &DAG) const {
13043 unsigned Opc = Op.getOpcode();
13044 SDLoc DL(Op);
13045 SDValue Chain = Op.getOperand(0);
13046 SDValue Op1 = Op.getOperand(1);
13047 SDValue Op2 = Op.getOperand(2);
13048 SDValue CC = Op.getOperand(3);
13049 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13050 MVT VT = Op.getSimpleValueType();
13051 MVT InVT = Op1.getSimpleValueType();
13052
13053 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13054 // condition code.
13055 if (Opc == ISD::STRICT_FSETCCS) {
13056 // Expand strict_fsetccs(x, oeq) to
13057 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13058 SDVTList VTList = Op->getVTList();
13059 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13060 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13061 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13062 Op2, OLECCVal);
13063 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13064 Op1, OLECCVal);
13065 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13066 Tmp1.getValue(1), Tmp2.getValue(1));
13067 // Tmp1 and Tmp2 might be the same node.
13068 if (Tmp1 != Tmp2)
13069 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13070 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13071 }
13072
13073 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13074 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13075 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13076 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13077 Op2, OEQCCVal);
13078 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13079 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13080 }
13081 }
13082
13083 MVT ContainerInVT = InVT;
13084 if (InVT.isFixedLengthVector()) {
13085 ContainerInVT = getContainerForFixedLengthVector(InVT);
13086 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13087 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13088 }
13089 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13090
13091 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13092
13093 SDValue Res;
13094 if (Opc == ISD::STRICT_FSETCC &&
13095 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13096 CCVal == ISD::SETOLE)) {
13097 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13098 // active when both input elements are ordered.
13099 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13100 SDValue OrderMask1 = DAG.getNode(
13101 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13102 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13103 True, VL});
13104 SDValue OrderMask2 = DAG.getNode(
13105 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13106 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13107 True, VL});
13108 Mask =
13109 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13110 // Use Mask as the passthru operand to let the result be 0 if either of the
13111 // inputs is unordered.
13112 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13113 DAG.getVTList(MaskVT, MVT::Other),
13114 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13115 } else {
13116 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13117 : RISCVISD::STRICT_FSETCCS_VL;
13118 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13119 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13120 }
13121
13122 if (VT.isFixedLengthVector()) {
13123 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13124 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13125 }
13126 return Res;
13127}
13128
13129// Lower vector ABS to smax(X, sub(0, X)).
13130SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13131 SDLoc DL(Op);
13132 MVT VT = Op.getSimpleValueType();
13133 SDValue X = Op.getOperand(0);
13134
13135 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13136 "Unexpected type for ISD::ABS");
13137
13138 MVT ContainerVT = VT;
13139 if (VT.isFixedLengthVector()) {
13140 ContainerVT = getContainerForFixedLengthVector(VT);
13141 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13142 }
13143
13144 SDValue Mask, VL;
13145 if (Op->getOpcode() == ISD::VP_ABS) {
13146 Mask = Op->getOperand(1);
13147 if (VT.isFixedLengthVector())
13148 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13149 Subtarget);
13150 VL = Op->getOperand(2);
13151 } else
13152 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13153
13154 SDValue SplatZero = DAG.getNode(
13155 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13156 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13157 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13158 DAG.getUNDEF(ContainerVT), Mask, VL);
13159 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13160 DAG.getUNDEF(ContainerVT), Mask, VL);
13161
13162 if (VT.isFixedLengthVector())
13163 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13164 return Max;
13165}
13166
13167SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13168 SelectionDAG &DAG) const {
13169 const auto &TSInfo =
13170 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13171
13172 unsigned NewOpc = getRISCVVLOp(Op);
13173 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13174 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13175
13176 MVT VT = Op.getSimpleValueType();
13177 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13178
13179 // Create list of operands by converting existing ones to scalable types.
13181 for (const SDValue &V : Op->op_values()) {
13182 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13183
13184 // Pass through non-vector operands.
13185 if (!V.getValueType().isVector()) {
13186 Ops.push_back(V);
13187 continue;
13188 }
13189
13190 // "cast" fixed length vector to a scalable vector.
13191 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13192 "Only fixed length vectors are supported!");
13193 MVT VContainerVT = ContainerVT.changeVectorElementType(
13194 V.getSimpleValueType().getVectorElementType());
13195 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13196 }
13197
13198 SDLoc DL(Op);
13199 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13200 if (HasPassthruOp)
13201 Ops.push_back(DAG.getUNDEF(ContainerVT));
13202 if (HasMask)
13203 Ops.push_back(Mask);
13204 Ops.push_back(VL);
13205
13206 // StrictFP operations have two result values. Their lowered result should
13207 // have same result count.
13208 if (Op->isStrictFPOpcode()) {
13209 SDValue ScalableRes =
13210 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13211 Op->getFlags());
13212 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13213 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13214 }
13215
13216 SDValue ScalableRes =
13217 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13218 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13219}
13220
13221// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13222// * Operands of each node are assumed to be in the same order.
13223// * The EVL operand is promoted from i32 to i64 on RV64.
13224// * Fixed-length vectors are converted to their scalable-vector container
13225// types.
13226SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13227 const auto &TSInfo =
13228 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13229
13230 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13231 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13232
13233 SDLoc DL(Op);
13234 MVT VT = Op.getSimpleValueType();
13236
13237 MVT ContainerVT = VT;
13238 if (VT.isFixedLengthVector())
13239 ContainerVT = getContainerForFixedLengthVector(VT);
13240
13241 for (const auto &OpIdx : enumerate(Op->ops())) {
13242 SDValue V = OpIdx.value();
13243 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13244 // Add dummy passthru value before the mask. Or if there isn't a mask,
13245 // before EVL.
13246 if (HasPassthruOp) {
13247 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13248 if (MaskIdx) {
13249 if (*MaskIdx == OpIdx.index())
13250 Ops.push_back(DAG.getUNDEF(ContainerVT));
13251 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13252 OpIdx.index()) {
13253 if (Op.getOpcode() == ISD::VP_MERGE) {
13254 // For VP_MERGE, copy the false operand instead of an undef value.
13255 Ops.push_back(Ops.back());
13256 } else {
13257 assert(Op.getOpcode() == ISD::VP_SELECT);
13258 // For VP_SELECT, add an undef value.
13259 Ops.push_back(DAG.getUNDEF(ContainerVT));
13260 }
13261 }
13262 }
13263 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13264 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13265 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13267 Subtarget.getXLenVT()));
13268 // Pass through operands which aren't fixed-length vectors.
13269 if (!V.getValueType().isFixedLengthVector()) {
13270 Ops.push_back(V);
13271 continue;
13272 }
13273 // "cast" fixed length vector to a scalable vector.
13274 MVT OpVT = V.getSimpleValueType();
13275 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13276 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13277 "Only fixed length vectors are supported!");
13278 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13279 }
13280
13281 if (!VT.isFixedLengthVector())
13282 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13283
13284 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13285
13286 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13287}
13288
13289SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13290 SelectionDAG &DAG) const {
13291 SDLoc DL(Op);
13292 MVT VT = Op.getSimpleValueType();
13293
13294 SDValue Src = Op.getOperand(0);
13295 // NOTE: Mask is dropped.
13296 SDValue VL = Op.getOperand(2);
13297
13298 MVT ContainerVT = VT;
13299 if (VT.isFixedLengthVector()) {
13300 ContainerVT = getContainerForFixedLengthVector(VT);
13301 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13302 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13303 }
13304
13305 MVT XLenVT = Subtarget.getXLenVT();
13306 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13307 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13308 DAG.getUNDEF(ContainerVT), Zero, VL);
13309
13310 SDValue SplatValue = DAG.getSignedConstant(
13311 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13312 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13313 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13314
13315 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13316 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13317 if (!VT.isFixedLengthVector())
13318 return Result;
13319 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13320}
13321
13322SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13323 SelectionDAG &DAG) const {
13324 SDLoc DL(Op);
13325 MVT VT = Op.getSimpleValueType();
13326
13327 SDValue Op1 = Op.getOperand(0);
13328 SDValue Op2 = Op.getOperand(1);
13329 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13330 // NOTE: Mask is dropped.
13331 SDValue VL = Op.getOperand(4);
13332
13333 MVT ContainerVT = VT;
13334 if (VT.isFixedLengthVector()) {
13335 ContainerVT = getContainerForFixedLengthVector(VT);
13336 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13337 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13338 }
13339
13341 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13342
13343 switch (Condition) {
13344 default:
13345 break;
13346 // X != Y --> (X^Y)
13347 case ISD::SETNE:
13348 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13349 break;
13350 // X == Y --> ~(X^Y)
13351 case ISD::SETEQ: {
13352 SDValue Temp =
13353 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13354 Result =
13355 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13356 break;
13357 }
13358 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13359 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13360 case ISD::SETGT:
13361 case ISD::SETULT: {
13362 SDValue Temp =
13363 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13364 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13365 break;
13366 }
13367 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13368 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13369 case ISD::SETLT:
13370 case ISD::SETUGT: {
13371 SDValue Temp =
13372 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13373 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13374 break;
13375 }
13376 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13377 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13378 case ISD::SETGE:
13379 case ISD::SETULE: {
13380 SDValue Temp =
13381 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13382 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13383 break;
13384 }
13385 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13386 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13387 case ISD::SETLE:
13388 case ISD::SETUGE: {
13389 SDValue Temp =
13390 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13391 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13392 break;
13393 }
13394 }
13395
13396 if (!VT.isFixedLengthVector())
13397 return Result;
13398 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13399}
13400
13401// Lower Floating-Point/Integer Type-Convert VP SDNodes
13402SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13403 SelectionDAG &DAG) const {
13404 SDLoc DL(Op);
13405
13406 SDValue Src = Op.getOperand(0);
13407 SDValue Mask = Op.getOperand(1);
13408 SDValue VL = Op.getOperand(2);
13409 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13410
13411 MVT DstVT = Op.getSimpleValueType();
13412 MVT SrcVT = Src.getSimpleValueType();
13413 if (DstVT.isFixedLengthVector()) {
13414 DstVT = getContainerForFixedLengthVector(DstVT);
13415 SrcVT = getContainerForFixedLengthVector(SrcVT);
13416 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13417 MVT MaskVT = getMaskTypeFor(DstVT);
13418 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13419 }
13420
13421 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13422 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13423
13425 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13426 if (SrcVT.isInteger()) {
13427 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13428
13429 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13430 ? RISCVISD::VSEXT_VL
13431 : RISCVISD::VZEXT_VL;
13432
13433 // Do we need to do any pre-widening before converting?
13434 if (SrcEltSize == 1) {
13435 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13436 MVT XLenVT = Subtarget.getXLenVT();
13437 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13438 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13439 DAG.getUNDEF(IntVT), Zero, VL);
13440 SDValue One = DAG.getSignedConstant(
13441 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13442 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13443 DAG.getUNDEF(IntVT), One, VL);
13444 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13445 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13446 } else if (DstEltSize > (2 * SrcEltSize)) {
13447 // Widen before converting.
13448 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13449 DstVT.getVectorElementCount());
13450 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13451 }
13452
13453 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13454 } else {
13455 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13456 "Wrong input/output vector types");
13457
13458 // Convert f16 to f32 then convert f32 to i64.
13459 if (DstEltSize > (2 * SrcEltSize)) {
13460 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13461 MVT InterimFVT =
13462 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13463 Src =
13464 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13465 }
13466
13467 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13468 }
13469 } else { // Narrowing + Conversion
13470 if (SrcVT.isInteger()) {
13471 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13472 // First do a narrowing convert to an FP type half the size, then round
13473 // the FP type to a small FP type if needed.
13474
13475 MVT InterimFVT = DstVT;
13476 if (SrcEltSize > (2 * DstEltSize)) {
13477 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13478 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13479 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13480 }
13481
13482 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13483
13484 if (InterimFVT != DstVT) {
13485 Src = Result;
13486 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13487 }
13488 } else {
13489 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13490 "Wrong input/output vector types");
13491 // First do a narrowing conversion to an integer half the size, then
13492 // truncate if needed.
13493
13494 if (DstEltSize == 1) {
13495 // First convert to the same size integer, then convert to mask using
13496 // setcc.
13497 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13498 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13499 DstVT.getVectorElementCount());
13500 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13501
13502 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13503 // otherwise the conversion was undefined.
13504 MVT XLenVT = Subtarget.getXLenVT();
13505 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13506 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13507 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13508 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13509 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13510 DAG.getUNDEF(DstVT), Mask, VL});
13511 } else {
13512 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13513 DstVT.getVectorElementCount());
13514
13515 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13516
13517 while (InterimIVT != DstVT) {
13518 SrcEltSize /= 2;
13519 Src = Result;
13520 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13521 DstVT.getVectorElementCount());
13522 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13523 Src, Mask, VL);
13524 }
13525 }
13526 }
13527 }
13528
13529 MVT VT = Op.getSimpleValueType();
13530 if (!VT.isFixedLengthVector())
13531 return Result;
13532 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13533}
13534
13535SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13536 SelectionDAG &DAG) const {
13537 SDLoc DL(Op);
13538 MVT VT = Op.getSimpleValueType();
13539 MVT XLenVT = Subtarget.getXLenVT();
13540
13541 SDValue Mask = Op.getOperand(0);
13542 SDValue TrueVal = Op.getOperand(1);
13543 SDValue FalseVal = Op.getOperand(2);
13544 SDValue VL = Op.getOperand(3);
13545
13546 // Use default legalization if a vector of EVL type would be legal.
13547 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13549 if (isTypeLegal(EVLVecVT))
13550 return SDValue();
13551
13552 MVT ContainerVT = VT;
13553 if (VT.isFixedLengthVector()) {
13554 ContainerVT = getContainerForFixedLengthVector(VT);
13555 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13556 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13557 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13558 }
13559
13560 // Promote to a vector of i8.
13561 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13562
13563 // Promote TrueVal and FalseVal using VLMax.
13564 // FIXME: Is there a better way to do this?
13565 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13566 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13567 DAG.getUNDEF(PromotedVT),
13568 DAG.getConstant(1, DL, XLenVT), VLMax);
13569 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13570 DAG.getUNDEF(PromotedVT),
13571 DAG.getConstant(0, DL, XLenVT), VLMax);
13572 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13573 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13574 // Any element past VL uses FalseVal, so use VLMax
13575 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13576 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13577
13578 // VP_MERGE the two promoted values.
13579 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13580 TrueVal, FalseVal, FalseVal, VL);
13581
13582 // Convert back to mask.
13583 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13584 SDValue Result = DAG.getNode(
13585 RISCVISD::SETCC_VL, DL, ContainerVT,
13586 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13587 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13588
13589 if (VT.isFixedLengthVector())
13590 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13591 return Result;
13592}
13593
13594SDValue
13595RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13596 SelectionDAG &DAG) const {
13597 using namespace SDPatternMatch;
13598
13599 SDLoc DL(Op);
13600
13601 SDValue Op1 = Op.getOperand(0);
13602 SDValue Op2 = Op.getOperand(1);
13603 SDValue Offset = Op.getOperand(2);
13604 SDValue Mask = Op.getOperand(3);
13605 SDValue EVL1 = Op.getOperand(4);
13606 SDValue EVL2 = Op.getOperand(5);
13607
13608 const MVT XLenVT = Subtarget.getXLenVT();
13609 MVT VT = Op.getSimpleValueType();
13610 MVT ContainerVT = VT;
13611 if (VT.isFixedLengthVector()) {
13612 ContainerVT = getContainerForFixedLengthVector(VT);
13613 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13614 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13615 MVT MaskVT = getMaskTypeFor(ContainerVT);
13616 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13617 }
13618
13619 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13620 if (IsMaskVector) {
13621 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13622
13623 // Expand input operands
13624 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13625 DAG.getUNDEF(ContainerVT),
13626 DAG.getConstant(1, DL, XLenVT), EVL1);
13627 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13628 DAG.getUNDEF(ContainerVT),
13629 DAG.getConstant(0, DL, XLenVT), EVL1);
13630 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13631 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13632
13633 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13634 DAG.getUNDEF(ContainerVT),
13635 DAG.getConstant(1, DL, XLenVT), EVL2);
13636 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13637 DAG.getUNDEF(ContainerVT),
13638 DAG.getConstant(0, DL, XLenVT), EVL2);
13639 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13640 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13641 }
13642
13643 auto getVectorFirstEle = [](SDValue Vec) {
13644 SDValue FirstEle;
13645 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13646 return FirstEle;
13647
13648 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13650 return Vec.getOperand(0);
13651
13652 return SDValue();
13653 };
13654
13655 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13656 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13657 MVT EltVT = ContainerVT.getVectorElementType();
13659 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13660 EltVT == MVT::bf16) {
13661 EltVT = EltVT.changeTypeToInteger();
13662 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13663 Op2 = DAG.getBitcast(ContainerVT, Op2);
13664 FirstEle =
13665 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13666 }
13667 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13668 : RISCVISD::VSLIDE1UP_VL,
13669 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13670 FirstEle, Mask, EVL2);
13671 Result = DAG.getBitcast(
13673 Result);
13674 return VT.isFixedLengthVector()
13675 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13676 : Result;
13677 }
13678
13679 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13680 SDValue DownOffset, UpOffset;
13681 if (ImmValue >= 0) {
13682 // The operand is a TargetConstant, we need to rebuild it as a regular
13683 // constant.
13684 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13685 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13686 } else {
13687 // The operand is a TargetConstant, we need to rebuild it as a regular
13688 // constant rather than negating the original operand.
13689 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13690 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13691 }
13692
13693 if (ImmValue != 0)
13694 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13695 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13696 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13697 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13698 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13699
13700 if (IsMaskVector) {
13701 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13702 Result = DAG.getNode(
13703 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13704 {Result, DAG.getConstant(0, DL, ContainerVT),
13705 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13706 Mask, EVL2});
13707 }
13708
13709 if (!VT.isFixedLengthVector())
13710 return Result;
13711 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13712}
13713
13714SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13715 SelectionDAG &DAG) const {
13716 SDLoc DL(Op);
13717 SDValue Val = Op.getOperand(0);
13718 SDValue Mask = Op.getOperand(1);
13719 SDValue VL = Op.getOperand(2);
13720 MVT VT = Op.getSimpleValueType();
13721
13722 MVT ContainerVT = VT;
13723 if (VT.isFixedLengthVector()) {
13724 ContainerVT = getContainerForFixedLengthVector(VT);
13725 MVT MaskVT = getMaskTypeFor(ContainerVT);
13726 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13727 }
13728
13730 if (VT.getScalarType() == MVT::i1) {
13731 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13732 Result =
13733 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13734 ContainerVT, VL);
13735 } else {
13736 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13737 SDValue LHS =
13738 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13739 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13740 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13741 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13742 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13743 DAG.getUNDEF(ContainerVT), Mask, VL});
13744 }
13745 } else {
13746 Result =
13747 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13748 }
13749
13750 if (!VT.isFixedLengthVector())
13751 return Result;
13752 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13753}
13754
13755SDValue
13756RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13757 SelectionDAG &DAG) const {
13758 SDLoc DL(Op);
13759 MVT VT = Op.getSimpleValueType();
13760 MVT XLenVT = Subtarget.getXLenVT();
13761
13762 SDValue Op1 = Op.getOperand(0);
13763 SDValue Mask = Op.getOperand(1);
13764 SDValue EVL = Op.getOperand(2);
13765
13766 MVT ContainerVT = VT;
13767 if (VT.isFixedLengthVector()) {
13768 ContainerVT = getContainerForFixedLengthVector(VT);
13769 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13770 MVT MaskVT = getMaskTypeFor(ContainerVT);
13771 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13772 }
13773
13774 MVT GatherVT = ContainerVT;
13775 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13776 // Check if we are working with mask vectors
13777 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13778 if (IsMaskVector) {
13779 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13780
13781 // Expand input operand
13782 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13783 DAG.getUNDEF(IndicesVT),
13784 DAG.getConstant(1, DL, XLenVT), EVL);
13785 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13786 DAG.getUNDEF(IndicesVT),
13787 DAG.getConstant(0, DL, XLenVT), EVL);
13788 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13789 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13790 }
13791
13792 unsigned EltSize = GatherVT.getScalarSizeInBits();
13793 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13795 unsigned MaxVLMAX =
13796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13797
13798 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13799 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13800 // to use vrgatherei16.vv.
13801 // TODO: It's also possible to use vrgatherei16.vv for other types to
13802 // decrease register width for the index calculation.
13803 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13804 if (MaxVLMAX > 256 && EltSize == 8) {
13805 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13806 // Split the vector in half and reverse each half using a full register
13807 // reverse.
13808 // Swap the halves and concatenate them.
13809 // Slide the concatenated result by (VLMax - VL).
13810 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13811 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13812 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13813
13814 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13815 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13816
13817 // Reassemble the low and high pieces reversed.
13818 // NOTE: this Result is unmasked (because we do not need masks for
13819 // shuffles). If in the future this has to change, we can use a SELECT_VL
13820 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13821 SDValue Result =
13822 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13823
13824 // Slide off any elements from past EVL that were reversed into the low
13825 // elements.
13826 unsigned MinElts = GatherVT.getVectorMinNumElements();
13827 SDValue VLMax =
13828 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13829 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13830
13831 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13832 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13833
13834 if (IsMaskVector) {
13835 // Truncate Result back to a mask vector
13836 Result =
13837 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13838 {Result, DAG.getConstant(0, DL, GatherVT),
13840 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13841 }
13842
13843 if (!VT.isFixedLengthVector())
13844 return Result;
13845 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13846 }
13847
13848 // Just promote the int type to i16 which will double the LMUL.
13849 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13850 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13851 }
13852
13853 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13854 SDValue VecLen =
13855 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13856 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13857 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13858 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13859 DAG.getUNDEF(IndicesVT), Mask, EVL);
13860 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13861 DAG.getUNDEF(GatherVT), Mask, EVL);
13862
13863 if (IsMaskVector) {
13864 // Truncate Result back to a mask vector
13865 Result = DAG.getNode(
13866 RISCVISD::SETCC_VL, DL, ContainerVT,
13867 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13868 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13869 }
13870
13871 if (!VT.isFixedLengthVector())
13872 return Result;
13873 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13874}
13875
13876SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13877 SelectionDAG &DAG) const {
13878 MVT VT = Op.getSimpleValueType();
13879 if (VT.getVectorElementType() != MVT::i1)
13880 return lowerVPOp(Op, DAG);
13881
13882 // It is safe to drop mask parameter as masked-off elements are undef.
13883 SDValue Op1 = Op->getOperand(0);
13884 SDValue Op2 = Op->getOperand(1);
13885 SDValue VL = Op->getOperand(3);
13886
13887 MVT ContainerVT = VT;
13888 const bool IsFixed = VT.isFixedLengthVector();
13889 if (IsFixed) {
13890 ContainerVT = getContainerForFixedLengthVector(VT);
13891 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13892 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13893 }
13894
13895 SDLoc DL(Op);
13896 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13897 if (!IsFixed)
13898 return Val;
13899 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13900}
13901
13902SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13903 SelectionDAG &DAG) const {
13904 SDLoc DL(Op);
13905 MVT XLenVT = Subtarget.getXLenVT();
13906 MVT VT = Op.getSimpleValueType();
13907 MVT ContainerVT = VT;
13908 if (VT.isFixedLengthVector())
13909 ContainerVT = getContainerForFixedLengthVector(VT);
13910
13911 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13912
13913 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13914 // Check if the mask is known to be all ones
13915 SDValue Mask = VPNode->getMask();
13916 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13917
13918 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13919 : Intrinsic::riscv_vlse_mask,
13920 DL, XLenVT);
13921 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13922 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13923 VPNode->getStride()};
13924 if (!IsUnmasked) {
13925 if (VT.isFixedLengthVector()) {
13926 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13927 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13928 }
13929 Ops.push_back(Mask);
13930 }
13931 Ops.push_back(VPNode->getVectorLength());
13932 if (!IsUnmasked) {
13933 SDValue Policy =
13935 Ops.push_back(Policy);
13936 }
13937
13938 SDValue Result =
13940 VPNode->getMemoryVT(), VPNode->getMemOperand());
13941 SDValue Chain = Result.getValue(1);
13942
13943 if (VT.isFixedLengthVector())
13944 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13945
13946 return DAG.getMergeValues({Result, Chain}, DL);
13947}
13948
13949SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13950 SelectionDAG &DAG) const {
13951 SDLoc DL(Op);
13952 MVT XLenVT = Subtarget.getXLenVT();
13953
13954 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13955 SDValue StoreVal = VPNode->getValue();
13956 MVT VT = StoreVal.getSimpleValueType();
13957 MVT ContainerVT = VT;
13958 if (VT.isFixedLengthVector()) {
13959 ContainerVT = getContainerForFixedLengthVector(VT);
13960 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13961 }
13962
13963 // Check if the mask is known to be all ones
13964 SDValue Mask = VPNode->getMask();
13965 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13966
13967 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13968 : Intrinsic::riscv_vsse_mask,
13969 DL, XLenVT);
13970 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13971 VPNode->getBasePtr(), VPNode->getStride()};
13972 if (!IsUnmasked) {
13973 if (VT.isFixedLengthVector()) {
13974 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13975 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13976 }
13977 Ops.push_back(Mask);
13978 }
13979 Ops.push_back(VPNode->getVectorLength());
13980
13981 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
13982 Ops, VPNode->getMemoryVT(),
13983 VPNode->getMemOperand());
13984}
13985
13986// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13987// matched to a RVV indexed load. The RVV indexed load instructions only
13988// support the "unsigned unscaled" addressing mode; indices are implicitly
13989// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13990// signed or scaled indexing is extended to the XLEN value type and scaled
13991// accordingly.
13992SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
13993 SelectionDAG &DAG) const {
13994 SDLoc DL(Op);
13995 MVT VT = Op.getSimpleValueType();
13996
13997 const auto *MemSD = cast<MemSDNode>(Op.getNode());
13998 EVT MemVT = MemSD->getMemoryVT();
13999 MachineMemOperand *MMO = MemSD->getMemOperand();
14000 SDValue Chain = MemSD->getChain();
14001 SDValue BasePtr = MemSD->getBasePtr();
14002
14003 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14004 SDValue Index, Mask, PassThru, VL;
14005
14006 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14007 Index = VPGN->getIndex();
14008 Mask = VPGN->getMask();
14009 PassThru = DAG.getUNDEF(VT);
14010 VL = VPGN->getVectorLength();
14011 // VP doesn't support extending loads.
14013 } else {
14014 // Else it must be a MGATHER.
14015 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14016 Index = MGN->getIndex();
14017 Mask = MGN->getMask();
14018 PassThru = MGN->getPassThru();
14019 LoadExtType = MGN->getExtensionType();
14020 }
14021
14022 MVT IndexVT = Index.getSimpleValueType();
14023 MVT XLenVT = Subtarget.getXLenVT();
14024
14026 "Unexpected VTs!");
14027 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14028 // Targets have to explicitly opt-in for extending vector loads.
14029 assert(LoadExtType == ISD::NON_EXTLOAD &&
14030 "Unexpected extending MGATHER/VP_GATHER");
14031
14032 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14033 // the selection of the masked intrinsics doesn't do this for us.
14034 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14035
14036 MVT ContainerVT = VT;
14037 if (VT.isFixedLengthVector()) {
14038 ContainerVT = getContainerForFixedLengthVector(VT);
14039 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14040 ContainerVT.getVectorElementCount());
14041
14042 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14043
14044 if (!IsUnmasked) {
14045 MVT MaskVT = getMaskTypeFor(ContainerVT);
14046 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14047 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14048 }
14049 }
14050
14051 if (!VL)
14052 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14053
14054 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14055 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14056 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14057 }
14058
14059 unsigned IntID =
14060 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14061 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14062 if (IsUnmasked)
14063 Ops.push_back(DAG.getUNDEF(ContainerVT));
14064 else
14065 Ops.push_back(PassThru);
14066 Ops.push_back(BasePtr);
14067 Ops.push_back(Index);
14068 if (!IsUnmasked)
14069 Ops.push_back(Mask);
14070 Ops.push_back(VL);
14071 if (!IsUnmasked)
14072 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14073
14074 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14075 SDValue Result =
14076 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14077 Chain = Result.getValue(1);
14078
14079 if (VT.isFixedLengthVector())
14080 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14081
14082 return DAG.getMergeValues({Result, Chain}, DL);
14083}
14084
14085// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14086// matched to a RVV indexed store. The RVV indexed store instructions only
14087// support the "unsigned unscaled" addressing mode; indices are implicitly
14088// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14089// signed or scaled indexing is extended to the XLEN value type and scaled
14090// accordingly.
14091SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14092 SelectionDAG &DAG) const {
14093 SDLoc DL(Op);
14094 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14095 EVT MemVT = MemSD->getMemoryVT();
14096 MachineMemOperand *MMO = MemSD->getMemOperand();
14097 SDValue Chain = MemSD->getChain();
14098 SDValue BasePtr = MemSD->getBasePtr();
14099
14100 [[maybe_unused]] bool IsTruncatingStore = false;
14101 SDValue Index, Mask, Val, VL;
14102
14103 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14104 Index = VPSN->getIndex();
14105 Mask = VPSN->getMask();
14106 Val = VPSN->getValue();
14107 VL = VPSN->getVectorLength();
14108 // VP doesn't support truncating stores.
14109 IsTruncatingStore = false;
14110 } else {
14111 // Else it must be a MSCATTER.
14112 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14113 Index = MSN->getIndex();
14114 Mask = MSN->getMask();
14115 Val = MSN->getValue();
14116 IsTruncatingStore = MSN->isTruncatingStore();
14117 }
14118
14119 MVT VT = Val.getSimpleValueType();
14120 MVT IndexVT = Index.getSimpleValueType();
14121 MVT XLenVT = Subtarget.getXLenVT();
14122
14124 "Unexpected VTs!");
14125 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14126 // Targets have to explicitly opt-in for extending vector loads and
14127 // truncating vector stores.
14128 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14129
14130 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14131 // the selection of the masked intrinsics doesn't do this for us.
14132 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14133
14134 MVT ContainerVT = VT;
14135 if (VT.isFixedLengthVector()) {
14136 ContainerVT = getContainerForFixedLengthVector(VT);
14137 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14138 ContainerVT.getVectorElementCount());
14139
14140 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14141 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14142
14143 if (!IsUnmasked) {
14144 MVT MaskVT = getMaskTypeFor(ContainerVT);
14145 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14146 }
14147 }
14148
14149 if (!VL)
14150 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14151
14152 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14153 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14154 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14155 }
14156
14157 unsigned IntID =
14158 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14159 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14160 Ops.push_back(Val);
14161 Ops.push_back(BasePtr);
14162 Ops.push_back(Index);
14163 if (!IsUnmasked)
14164 Ops.push_back(Mask);
14165 Ops.push_back(VL);
14166
14168 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14169}
14170
14171SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14172 SelectionDAG &DAG) const {
14173 const MVT XLenVT = Subtarget.getXLenVT();
14174 SDLoc DL(Op);
14175 SDValue Chain = Op->getOperand(0);
14176 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14177 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14178 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14179
14180 // Encoding used for rounding mode in RISC-V differs from that used in
14181 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14182 // table, which consists of a sequence of 4-bit fields, each representing
14183 // corresponding FLT_ROUNDS mode.
14184 static const int Table =
14190
14191 SDValue Shift =
14192 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14193 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14194 DAG.getConstant(Table, DL, XLenVT), Shift);
14195 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14196 DAG.getConstant(7, DL, XLenVT));
14197
14198 return DAG.getMergeValues({Masked, Chain}, DL);
14199}
14200
14201SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14202 SelectionDAG &DAG) const {
14203 const MVT XLenVT = Subtarget.getXLenVT();
14204 SDLoc DL(Op);
14205 SDValue Chain = Op->getOperand(0);
14206 SDValue RMValue = Op->getOperand(1);
14207 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14208
14209 // Encoding used for rounding mode in RISC-V differs from that used in
14210 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14211 // a table, which consists of a sequence of 4-bit fields, each representing
14212 // corresponding RISC-V mode.
14213 static const unsigned Table =
14219
14220 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14221
14222 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14223 DAG.getConstant(2, DL, XLenVT));
14224 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14225 DAG.getConstant(Table, DL, XLenVT), Shift);
14226 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14227 DAG.getConstant(0x7, DL, XLenVT));
14228 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14229 RMValue);
14230}
14231
14232SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14233 SelectionDAG &DAG) const {
14234 const MVT XLenVT = Subtarget.getXLenVT();
14235 SDLoc DL(Op);
14236 SDValue Chain = Op->getOperand(0);
14237 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14238 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14239 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14240}
14241
14242SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14243 SelectionDAG &DAG) const {
14244 const MVT XLenVT = Subtarget.getXLenVT();
14245 SDLoc DL(Op);
14246 SDValue Chain = Op->getOperand(0);
14247 SDValue EnvValue = Op->getOperand(1);
14248 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14249
14250 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14251 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14252 EnvValue);
14253}
14254
14255SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14256 SelectionDAG &DAG) const {
14257 const MVT XLenVT = Subtarget.getXLenVT();
14258 SDLoc DL(Op);
14259 SDValue Chain = Op->getOperand(0);
14260 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14261 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14262
14263 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14264 EnvValue);
14265}
14266
14269
14270SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14271 SelectionDAG &DAG) const {
14272 const MVT XLenVT = Subtarget.getXLenVT();
14273 SDLoc DL(Op);
14274 SDValue Chain = Op->getOperand(0);
14275 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14276 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14277 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14278 Chain = Result.getValue(1);
14279 return DAG.getMergeValues({Result, Chain}, DL);
14280}
14281
14282SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14283 SelectionDAG &DAG) const {
14284 const MVT XLenVT = Subtarget.getXLenVT();
14285 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14286 SDLoc DL(Op);
14287 SDValue Chain = Op->getOperand(0);
14288 SDValue EnvValue = Op->getOperand(1);
14289 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14290 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14291
14292 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14293 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14294 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14295 ModeMask);
14296 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14297 EnvValue);
14298}
14299
14300SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14301 SelectionDAG &DAG) const {
14302 const MVT XLenVT = Subtarget.getXLenVT();
14303 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14304 SDLoc DL(Op);
14305 SDValue Chain = Op->getOperand(0);
14306 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14307 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14308
14309 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14310 ModeMask);
14311}
14312
14313SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14314 SelectionDAG &DAG) const {
14315 MachineFunction &MF = DAG.getMachineFunction();
14316
14317 bool isRISCV64 = Subtarget.is64Bit();
14318 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14319
14320 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14321 return DAG.getFrameIndex(FI, PtrVT);
14322}
14323
14324// Returns the opcode of the target-specific SDNode that implements the 32-bit
14325// form of the given Opcode.
14326static unsigned getRISCVWOpcode(unsigned Opcode) {
14327 switch (Opcode) {
14328 default:
14329 llvm_unreachable("Unexpected opcode");
14330 case ISD::SHL:
14331 return RISCVISD::SLLW;
14332 case ISD::SRA:
14333 return RISCVISD::SRAW;
14334 case ISD::SRL:
14335 return RISCVISD::SRLW;
14336 case ISD::SDIV:
14337 return RISCVISD::DIVW;
14338 case ISD::UDIV:
14339 return RISCVISD::DIVUW;
14340 case ISD::UREM:
14341 return RISCVISD::REMUW;
14342 case ISD::ROTL:
14343 return RISCVISD::ROLW;
14344 case ISD::ROTR:
14345 return RISCVISD::RORW;
14346 }
14347}
14348
14349// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14350// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14351// otherwise be promoted to i64, making it difficult to select the
14352// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14353// type i8/i16/i32 is lost.
14355 unsigned ExtOpc = ISD::ANY_EXTEND) {
14356 SDLoc DL(N);
14357 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14358 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14359 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14360 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14361 // ReplaceNodeResults requires we maintain the same type for the return value.
14362 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14363}
14364
14365// Converts the given 32-bit operation to a i64 operation with signed extension
14366// semantic to reduce the signed extension instructions.
14368 SDLoc DL(N);
14369 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14370 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14371 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14372 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14373 DAG.getValueType(MVT::i32));
14374 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14375}
14376
14379 SelectionDAG &DAG) const {
14380 SDLoc DL(N);
14381 switch (N->getOpcode()) {
14382 default:
14383 llvm_unreachable("Don't know how to custom type legalize this operation!");
14386 case ISD::FP_TO_SINT:
14387 case ISD::FP_TO_UINT: {
14388 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14389 "Unexpected custom legalisation");
14390 bool IsStrict = N->isStrictFPOpcode();
14391 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14392 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14393 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14394 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14396 if (!isTypeLegal(Op0.getValueType()))
14397 return;
14398 if (IsStrict) {
14399 SDValue Chain = N->getOperand(0);
14400 // In absence of Zfh, promote f16 to f32, then convert.
14401 if (Op0.getValueType() == MVT::f16 &&
14402 !Subtarget.hasStdExtZfhOrZhinx()) {
14403 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14404 {Chain, Op0});
14405 Chain = Op0.getValue(1);
14406 }
14407 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14408 : RISCVISD::STRICT_FCVT_WU_RV64;
14409 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14410 SDValue Res = DAG.getNode(
14411 Opc, DL, VTs, Chain, Op0,
14412 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14413 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14414 Results.push_back(Res.getValue(1));
14415 return;
14416 }
14417 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14418 // convert.
14419 if ((Op0.getValueType() == MVT::f16 &&
14420 !Subtarget.hasStdExtZfhOrZhinx()) ||
14421 Op0.getValueType() == MVT::bf16)
14422 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14423
14424 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14425 SDValue Res =
14426 DAG.getNode(Opc, DL, MVT::i64, Op0,
14427 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14428 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14429 return;
14430 }
14431 // If the FP type needs to be softened, emit a library call using the 'si'
14432 // version. If we left it to default legalization we'd end up with 'di'. If
14433 // the FP type doesn't need to be softened just let generic type
14434 // legalization promote the result type.
14435 RTLIB::Libcall LC;
14436 if (IsSigned)
14437 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14438 else
14439 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14440 MakeLibCallOptions CallOptions;
14441 EVT OpVT = Op0.getValueType();
14442 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14443 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14444 SDValue Result;
14445 std::tie(Result, Chain) =
14446 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14447 Results.push_back(Result);
14448 if (IsStrict)
14449 Results.push_back(Chain);
14450 break;
14451 }
14452 case ISD::LROUND: {
14453 SDValue Op0 = N->getOperand(0);
14454 EVT Op0VT = Op0.getValueType();
14455 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14457 if (!isTypeLegal(Op0VT))
14458 return;
14459
14460 // In absence of Zfh, promote f16 to f32, then convert.
14461 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14462 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14463
14464 SDValue Res =
14465 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14466 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14467 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14468 return;
14469 }
14470 // If the FP type needs to be softened, emit a library call to lround. We'll
14471 // need to truncate the result. We assume any value that doesn't fit in i32
14472 // is allowed to return an unspecified value.
14473 RTLIB::Libcall LC =
14474 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14475 MakeLibCallOptions CallOptions;
14476 EVT OpVT = Op0.getValueType();
14477 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14478 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14479 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14480 Results.push_back(Result);
14481 break;
14482 }
14483 case ISD::READCYCLECOUNTER:
14484 case ISD::READSTEADYCOUNTER: {
14485 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14486 "has custom type legalization on riscv32");
14487
14488 SDValue LoCounter, HiCounter;
14489 MVT XLenVT = Subtarget.getXLenVT();
14490 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14491 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14492 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14493 } else {
14494 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14495 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14496 }
14497 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14498 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14499 N->getOperand(0), LoCounter, HiCounter);
14500
14501 Results.push_back(
14502 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14503 Results.push_back(RCW.getValue(2));
14504 break;
14505 }
14506 case ISD::LOAD: {
14507 if (!ISD::isNON_EXTLoad(N))
14508 return;
14509
14510 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14511 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14513
14514 if (N->getValueType(0) == MVT::i64) {
14515 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14516 "Unexpected custom legalisation");
14517
14518 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14519 return;
14520
14521 SDLoc DL(N);
14522 SDValue Result = DAG.getMemIntrinsicNode(
14523 RISCVISD::LD_RV32, DL,
14524 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14525 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14526 SDValue Lo = Result.getValue(0);
14527 SDValue Hi = Result.getValue(1);
14528 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14529 Results.append({Pair, Result.getValue(2)});
14530 return;
14531 }
14532
14533 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14534 "Unexpected custom legalisation");
14535
14536 SDLoc dl(N);
14537 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14538 Ld->getBasePtr(), Ld->getMemoryVT(),
14539 Ld->getMemOperand());
14540 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14541 Results.push_back(Res.getValue(1));
14542 return;
14543 }
14544 case ISD::MUL: {
14545 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14546 unsigned XLen = Subtarget.getXLen();
14547 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14548 if (Size > XLen) {
14549 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14550 SDValue LHS = N->getOperand(0);
14551 SDValue RHS = N->getOperand(1);
14552 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14553
14554 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14555 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14556 // We need exactly one side to be unsigned.
14557 if (LHSIsU == RHSIsU)
14558 return;
14559
14560 auto MakeMULPair = [&](SDValue S, SDValue U) {
14561 MVT XLenVT = Subtarget.getXLenVT();
14562 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14563 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14564 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14565 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14566 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14567 };
14568
14569 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14570 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14571
14572 // The other operand should be signed, but still prefer MULH when
14573 // possible.
14574 if (RHSIsU && LHSIsS && !RHSIsS)
14575 Results.push_back(MakeMULPair(LHS, RHS));
14576 else if (LHSIsU && RHSIsS && !LHSIsS)
14577 Results.push_back(MakeMULPair(RHS, LHS));
14578
14579 return;
14580 }
14581 [[fallthrough]];
14582 }
14583 case ISD::ADD:
14584 case ISD::SUB:
14585 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14586 "Unexpected custom legalisation");
14587 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14588 break;
14589 case ISD::SHL:
14590 case ISD::SRA:
14591 case ISD::SRL:
14592 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14593 "Unexpected custom legalisation");
14594 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14595 // If we can use a BSET instruction, allow default promotion to apply.
14596 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14597 isOneConstant(N->getOperand(0)))
14598 break;
14599 Results.push_back(customLegalizeToWOp(N, DAG));
14600 break;
14601 }
14602
14603 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14604 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14605 // shift amount.
14606 if (N->getOpcode() == ISD::SHL) {
14607 SDLoc DL(N);
14608 SDValue NewOp0 =
14609 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14610 SDValue NewOp1 =
14611 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14612 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14613 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14614 DAG.getValueType(MVT::i32));
14615 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14616 }
14617
14618 break;
14619 case ISD::ROTL:
14620 case ISD::ROTR:
14621 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14622 "Unexpected custom legalisation");
14623 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14624 Subtarget.hasVendorXTHeadBb()) &&
14625 "Unexpected custom legalization");
14626 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14627 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14628 return;
14629 Results.push_back(customLegalizeToWOp(N, DAG));
14630 break;
14631 case ISD::CTTZ:
14633 case ISD::CTLZ:
14634 case ISD::CTLZ_ZERO_UNDEF: {
14635 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14636 "Unexpected custom legalisation");
14637
14638 SDValue NewOp0 =
14639 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14640 bool IsCTZ =
14641 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14642 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14643 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14644 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14645 return;
14646 }
14647 case ISD::SDIV:
14648 case ISD::UDIV:
14649 case ISD::UREM: {
14650 MVT VT = N->getSimpleValueType(0);
14651 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14652 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14653 "Unexpected custom legalisation");
14654 // Don't promote division/remainder by constant since we should expand those
14655 // to multiply by magic constant.
14656 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14657 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14658 !isIntDivCheap(N->getValueType(0), Attr))
14659 return;
14660
14661 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14662 // the upper 32 bits. For other types we need to sign or zero extend
14663 // based on the opcode.
14664 unsigned ExtOpc = ISD::ANY_EXTEND;
14665 if (VT != MVT::i32)
14666 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14668
14669 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14670 break;
14671 }
14672 case ISD::SADDO: {
14673 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14674 "Unexpected custom legalisation");
14675
14676 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14677 // use the default legalization.
14678 if (!isa<ConstantSDNode>(N->getOperand(1)))
14679 return;
14680
14681 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14682 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14683 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14684 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14685 DAG.getValueType(MVT::i32));
14686
14687 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14688
14689 // For an addition, the result should be less than one of the operands (LHS)
14690 // if and only if the other operand (RHS) is negative, otherwise there will
14691 // be overflow.
14692 // For a subtraction, the result should be less than one of the operands
14693 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14694 // otherwise there will be overflow.
14695 EVT OType = N->getValueType(1);
14696 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14697 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14698
14699 SDValue Overflow =
14700 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14701 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14702 Results.push_back(Overflow);
14703 return;
14704 }
14705 case ISD::UADDO:
14706 case ISD::USUBO: {
14707 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14708 "Unexpected custom legalisation");
14709 bool IsAdd = N->getOpcode() == ISD::UADDO;
14710 // Create an ADDW or SUBW.
14711 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14712 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14713 SDValue Res =
14714 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14715 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14716 DAG.getValueType(MVT::i32));
14717
14718 SDValue Overflow;
14719 if (IsAdd && isOneConstant(RHS)) {
14720 // Special case uaddo X, 1 overflowed if the addition result is 0.
14721 // The general case (X + C) < C is not necessarily beneficial. Although we
14722 // reduce the live range of X, we may introduce the materialization of
14723 // constant C, especially when the setcc result is used by branch. We have
14724 // no compare with constant and branch instructions.
14725 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14726 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14727 } else if (IsAdd && isAllOnesConstant(RHS)) {
14728 // Special case uaddo X, -1 overflowed if X != 0.
14729 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14730 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14731 } else {
14732 // Sign extend the LHS and perform an unsigned compare with the ADDW
14733 // result. Since the inputs are sign extended from i32, this is equivalent
14734 // to comparing the lower 32 bits.
14735 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14736 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14737 IsAdd ? ISD::SETULT : ISD::SETUGT);
14738 }
14739
14740 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14741 Results.push_back(Overflow);
14742 return;
14743 }
14744 case ISD::UADDSAT:
14745 case ISD::USUBSAT: {
14746 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14747 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14748 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14749 // promotion for UADDO/USUBO.
14750 Results.push_back(expandAddSubSat(N, DAG));
14751 return;
14752 }
14753 case ISD::SADDSAT:
14754 case ISD::SSUBSAT: {
14755 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14756 "Unexpected custom legalisation");
14757 Results.push_back(expandAddSubSat(N, DAG));
14758 return;
14759 }
14760 case ISD::ABS: {
14761 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14762 "Unexpected custom legalisation");
14763
14764 if (Subtarget.hasStdExtZbb()) {
14765 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14766 // This allows us to remember that the result is sign extended. Expanding
14767 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14768 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14769 N->getOperand(0));
14770 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14771 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14772 return;
14773 }
14774
14775 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14776 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14777
14778 // Freeze the source so we can increase it's use count.
14779 Src = DAG.getFreeze(Src);
14780
14781 // Copy sign bit to all bits using the sraiw pattern.
14782 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14783 DAG.getValueType(MVT::i32));
14784 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14785 DAG.getConstant(31, DL, MVT::i64));
14786
14787 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14788 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14789
14790 // NOTE: The result is only required to be anyextended, but sext is
14791 // consistent with type legalization of sub.
14792 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14793 DAG.getValueType(MVT::i32));
14794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14795 return;
14796 }
14797 case ISD::BITCAST: {
14798 EVT VT = N->getValueType(0);
14799 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14800 SDValue Op0 = N->getOperand(0);
14801 EVT Op0VT = Op0.getValueType();
14802 MVT XLenVT = Subtarget.getXLenVT();
14803 if (VT == MVT::i16 &&
14804 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14805 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14806 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14807 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14808 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14809 Subtarget.hasStdExtFOrZfinx()) {
14810 SDValue FPConv =
14811 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14812 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14813 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14814 Subtarget.hasStdExtDOrZdinx()) {
14815 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14816 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14817 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14818 NewReg.getValue(0), NewReg.getValue(1));
14819 Results.push_back(RetReg);
14820 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14821 isTypeLegal(Op0VT)) {
14822 // Custom-legalize bitcasts from fixed-length vector types to illegal
14823 // scalar types in order to improve codegen. Bitcast the vector to a
14824 // one-element vector type whose element type is the same as the result
14825 // type, and extract the first element.
14826 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14827 if (isTypeLegal(BVT)) {
14828 SDValue BVec = DAG.getBitcast(BVT, Op0);
14829 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14830 }
14831 }
14832 break;
14833 }
14834 case ISD::BITREVERSE: {
14835 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14836 "Unexpected custom legalisation");
14837 MVT XLenVT = Subtarget.getXLenVT();
14838 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14839 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14840 // ReplaceNodeResults requires we maintain the same type for the return
14841 // value.
14842 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14843 break;
14844 }
14845 case RISCVISD::BREV8:
14846 case RISCVISD::ORC_B: {
14847 MVT VT = N->getSimpleValueType(0);
14848 MVT XLenVT = Subtarget.getXLenVT();
14849 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14850 "Unexpected custom legalisation");
14851 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14852 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14853 "Unexpected extension");
14854 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14855 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14856 // ReplaceNodeResults requires we maintain the same type for the return
14857 // value.
14858 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14859 break;
14860 }
14862 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14863 // type is illegal (currently only vXi64 RV32).
14864 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14865 // transferred to the destination register. We issue two of these from the
14866 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14867 // first element.
14868 SDValue Vec = N->getOperand(0);
14869 SDValue Idx = N->getOperand(1);
14870
14871 // The vector type hasn't been legalized yet so we can't issue target
14872 // specific nodes if it needs legalization.
14873 // FIXME: We would manually legalize if it's important.
14874 if (!isTypeLegal(Vec.getValueType()))
14875 return;
14876
14877 MVT VecVT = Vec.getSimpleValueType();
14878
14879 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14880 VecVT.getVectorElementType() == MVT::i64 &&
14881 "Unexpected EXTRACT_VECTOR_ELT legalization");
14882
14883 // If this is a fixed vector, we need to convert it to a scalable vector.
14884 MVT ContainerVT = VecVT;
14885 if (VecVT.isFixedLengthVector()) {
14886 ContainerVT = getContainerForFixedLengthVector(VecVT);
14887 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14888 }
14889
14890 MVT XLenVT = Subtarget.getXLenVT();
14891
14892 // Use a VL of 1 to avoid processing more elements than we need.
14893 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14894
14895 // Unless the index is known to be 0, we must slide the vector down to get
14896 // the desired element into index 0.
14897 if (!isNullConstant(Idx)) {
14898 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14899 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14900 }
14901
14902 // Extract the lower XLEN bits of the correct vector element.
14903 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14904
14905 // To extract the upper XLEN bits of the vector element, shift the first
14906 // element right by 32 bits and re-extract the lower XLEN bits.
14907 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14908 DAG.getUNDEF(ContainerVT),
14909 DAG.getConstant(32, DL, XLenVT), VL);
14910 SDValue LShr32 =
14911 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14912 DAG.getUNDEF(ContainerVT), Mask, VL);
14913
14914 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14915
14916 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14917 break;
14918 }
14920 unsigned IntNo = N->getConstantOperandVal(0);
14921 switch (IntNo) {
14922 default:
14924 "Don't know how to custom type legalize this intrinsic!");
14925 case Intrinsic::experimental_get_vector_length: {
14926 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14927 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14928 return;
14929 }
14930 case Intrinsic::experimental_cttz_elts: {
14931 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14932 Results.push_back(
14933 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14934 return;
14935 }
14936 case Intrinsic::riscv_orc_b:
14937 case Intrinsic::riscv_brev8:
14938 case Intrinsic::riscv_sha256sig0:
14939 case Intrinsic::riscv_sha256sig1:
14940 case Intrinsic::riscv_sha256sum0:
14941 case Intrinsic::riscv_sha256sum1:
14942 case Intrinsic::riscv_sm3p0:
14943 case Intrinsic::riscv_sm3p1: {
14944 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14945 return;
14946 unsigned Opc;
14947 switch (IntNo) {
14948 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14949 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14950 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14951 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14952 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14953 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14954 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14955 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14956 }
14957
14958 SDValue NewOp =
14959 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14960 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14961 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14962 return;
14963 }
14964 case Intrinsic::riscv_sm4ks:
14965 case Intrinsic::riscv_sm4ed: {
14966 unsigned Opc =
14967 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14968 SDValue NewOp0 =
14969 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14970 SDValue NewOp1 =
14971 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14972 SDValue Res =
14973 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14974 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14975 return;
14976 }
14977 case Intrinsic::riscv_mopr: {
14978 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14979 return;
14980 SDValue NewOp =
14981 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14982 SDValue Res = DAG.getNode(
14983 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
14984 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
14985 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14986 return;
14987 }
14988 case Intrinsic::riscv_moprr: {
14989 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14990 return;
14991 SDValue NewOp0 =
14992 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14993 SDValue NewOp1 =
14994 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14995 SDValue Res = DAG.getNode(
14996 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
14997 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
14998 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14999 return;
15000 }
15001 case Intrinsic::riscv_clmul: {
15002 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15003 return;
15004
15005 SDValue NewOp0 =
15006 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15007 SDValue NewOp1 =
15008 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15009 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15010 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15011 return;
15012 }
15013 case Intrinsic::riscv_clmulh:
15014 case Intrinsic::riscv_clmulr: {
15015 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15016 return;
15017
15018 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15019 // to the full 128-bit clmul result of multiplying two xlen values.
15020 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15021 // upper 32 bits.
15022 //
15023 // The alternative is to mask the inputs to 32 bits and use clmul, but
15024 // that requires two shifts to mask each input without zext.w.
15025 // FIXME: If the inputs are known zero extended or could be freely
15026 // zero extended, the mask form would be better.
15027 SDValue NewOp0 =
15028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15029 SDValue NewOp1 =
15030 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15031 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15032 DAG.getConstant(32, DL, MVT::i64));
15033 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15034 DAG.getConstant(32, DL, MVT::i64));
15035 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15036 : RISCVISD::CLMULR;
15037 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15038 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15039 DAG.getConstant(32, DL, MVT::i64));
15040 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15041 return;
15042 }
15043 case Intrinsic::riscv_vmv_x_s: {
15044 EVT VT = N->getValueType(0);
15045 MVT XLenVT = Subtarget.getXLenVT();
15046 if (VT.bitsLT(XLenVT)) {
15047 // Simple case just extract using vmv.x.s and truncate.
15048 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15049 Subtarget.getXLenVT(), N->getOperand(1));
15050 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15051 return;
15052 }
15053
15054 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15055 "Unexpected custom legalization");
15056
15057 // We need to do the move in two steps.
15058 SDValue Vec = N->getOperand(1);
15059 MVT VecVT = Vec.getSimpleValueType();
15060
15061 // First extract the lower XLEN bits of the element.
15062 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15063
15064 // To extract the upper XLEN bits of the vector element, shift the first
15065 // element right by 32 bits and re-extract the lower XLEN bits.
15066 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15067
15068 SDValue ThirtyTwoV =
15069 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15070 DAG.getConstant(32, DL, XLenVT), VL);
15071 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15072 DAG.getUNDEF(VecVT), Mask, VL);
15073 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15074
15075 Results.push_back(
15076 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15077 break;
15078 }
15079 }
15080 break;
15081 }
15082 case ISD::VECREDUCE_ADD:
15083 case ISD::VECREDUCE_AND:
15084 case ISD::VECREDUCE_OR:
15085 case ISD::VECREDUCE_XOR:
15086 case ISD::VECREDUCE_SMAX:
15087 case ISD::VECREDUCE_UMAX:
15088 case ISD::VECREDUCE_SMIN:
15089 case ISD::VECREDUCE_UMIN:
15090 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15091 Results.push_back(V);
15092 break;
15093 case ISD::VP_REDUCE_ADD:
15094 case ISD::VP_REDUCE_AND:
15095 case ISD::VP_REDUCE_OR:
15096 case ISD::VP_REDUCE_XOR:
15097 case ISD::VP_REDUCE_SMAX:
15098 case ISD::VP_REDUCE_UMAX:
15099 case ISD::VP_REDUCE_SMIN:
15100 case ISD::VP_REDUCE_UMIN:
15101 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15102 Results.push_back(V);
15103 break;
15104 case ISD::GET_ROUNDING: {
15105 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15106 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15107 Results.push_back(Res.getValue(0));
15108 Results.push_back(Res.getValue(1));
15109 break;
15110 }
15111 }
15112}
15113
15114/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15115/// which corresponds to it.
15116static unsigned getVecReduceOpcode(unsigned Opc) {
15117 switch (Opc) {
15118 default:
15119 llvm_unreachable("Unhandled binary to transform reduction");
15120 case ISD::ADD:
15121 return ISD::VECREDUCE_ADD;
15122 case ISD::UMAX:
15123 return ISD::VECREDUCE_UMAX;
15124 case ISD::SMAX:
15125 return ISD::VECREDUCE_SMAX;
15126 case ISD::UMIN:
15127 return ISD::VECREDUCE_UMIN;
15128 case ISD::SMIN:
15129 return ISD::VECREDUCE_SMIN;
15130 case ISD::AND:
15131 return ISD::VECREDUCE_AND;
15132 case ISD::OR:
15133 return ISD::VECREDUCE_OR;
15134 case ISD::XOR:
15135 return ISD::VECREDUCE_XOR;
15136 case ISD::FADD:
15137 // Note: This is the associative form of the generic reduction opcode.
15138 return ISD::VECREDUCE_FADD;
15139 }
15140}
15141
15142/// Perform two related transforms whose purpose is to incrementally recognize
15143/// an explode_vector followed by scalar reduction as a vector reduction node.
15144/// This exists to recover from a deficiency in SLP which can't handle
15145/// forests with multiple roots sharing common nodes. In some cases, one
15146/// of the trees will be vectorized, and the other will remain (unprofitably)
15147/// scalarized.
15148static SDValue
15150 const RISCVSubtarget &Subtarget) {
15151
15152 // This transforms need to run before all integer types have been legalized
15153 // to i64 (so that the vector element type matches the add type), and while
15154 // it's safe to introduce odd sized vector types.
15156 return SDValue();
15157
15158 // Without V, this transform isn't useful. We could form the (illegal)
15159 // operations and let them be scalarized again, but there's really no point.
15160 if (!Subtarget.hasVInstructions())
15161 return SDValue();
15162
15163 const SDLoc DL(N);
15164 const EVT VT = N->getValueType(0);
15165 const unsigned Opc = N->getOpcode();
15166
15167 // For FADD, we only handle the case with reassociation allowed. We
15168 // could handle strict reduction order, but at the moment, there's no
15169 // known reason to, and the complexity isn't worth it.
15170 // TODO: Handle fminnum and fmaxnum here
15171 if (!VT.isInteger() &&
15172 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
15173 return SDValue();
15174
15175 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15176 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15177 "Inconsistent mappings");
15178 SDValue LHS = N->getOperand(0);
15179 SDValue RHS = N->getOperand(1);
15180
15181 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15182 return SDValue();
15183
15184 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15185 std::swap(LHS, RHS);
15186
15187 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15188 !isa<ConstantSDNode>(RHS.getOperand(1)))
15189 return SDValue();
15190
15191 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15192 SDValue SrcVec = RHS.getOperand(0);
15193 EVT SrcVecVT = SrcVec.getValueType();
15194 assert(SrcVecVT.getVectorElementType() == VT);
15195 if (SrcVecVT.isScalableVector())
15196 return SDValue();
15197
15198 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15199 return SDValue();
15200
15201 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15202 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15203 // root of our reduction tree. TODO: We could extend this to any two
15204 // adjacent aligned constant indices if desired.
15205 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15206 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15207 uint64_t LHSIdx =
15208 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15209 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15210 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15211 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15212 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15213 }
15214 }
15215
15216 // Match (binop (reduce (extract_subvector V, 0),
15217 // (extract_vector_elt V, sizeof(SubVec))))
15218 // into a reduction of one more element from the original vector V.
15219 if (LHS.getOpcode() != ReduceOpc)
15220 return SDValue();
15221
15222 SDValue ReduceVec = LHS.getOperand(0);
15223 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15224 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15225 isNullConstant(ReduceVec.getOperand(1)) &&
15226 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15227 // For illegal types (e.g. 3xi32), most will be combined again into a
15228 // wider (hopefully legal) type. If this is a terminal state, we are
15229 // relying on type legalization here to produce something reasonable
15230 // and this lowering quality could probably be improved. (TODO)
15231 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15232 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15233 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15234 ReduceVec->getFlags() & N->getFlags());
15235 }
15236
15237 return SDValue();
15238}
15239
15240
15241// Try to fold (<bop> x, (reduction.<bop> vec, start))
15243 const RISCVSubtarget &Subtarget) {
15244 auto BinOpToRVVReduce = [](unsigned Opc) {
15245 switch (Opc) {
15246 default:
15247 llvm_unreachable("Unhandled binary to transform reduction");
15248 case ISD::ADD:
15249 return RISCVISD::VECREDUCE_ADD_VL;
15250 case ISD::UMAX:
15251 return RISCVISD::VECREDUCE_UMAX_VL;
15252 case ISD::SMAX:
15253 return RISCVISD::VECREDUCE_SMAX_VL;
15254 case ISD::UMIN:
15255 return RISCVISD::VECREDUCE_UMIN_VL;
15256 case ISD::SMIN:
15257 return RISCVISD::VECREDUCE_SMIN_VL;
15258 case ISD::AND:
15259 return RISCVISD::VECREDUCE_AND_VL;
15260 case ISD::OR:
15261 return RISCVISD::VECREDUCE_OR_VL;
15262 case ISD::XOR:
15263 return RISCVISD::VECREDUCE_XOR_VL;
15264 case ISD::FADD:
15265 return RISCVISD::VECREDUCE_FADD_VL;
15266 case ISD::FMAXNUM:
15267 return RISCVISD::VECREDUCE_FMAX_VL;
15268 case ISD::FMINNUM:
15269 return RISCVISD::VECREDUCE_FMIN_VL;
15270 }
15271 };
15272
15273 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15274 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15275 isNullConstant(V.getOperand(1)) &&
15276 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15277 };
15278
15279 unsigned Opc = N->getOpcode();
15280 unsigned ReduceIdx;
15281 if (IsReduction(N->getOperand(0), Opc))
15282 ReduceIdx = 0;
15283 else if (IsReduction(N->getOperand(1), Opc))
15284 ReduceIdx = 1;
15285 else
15286 return SDValue();
15287
15288 // Skip if FADD disallows reassociation but the combiner needs.
15289 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15290 return SDValue();
15291
15292 SDValue Extract = N->getOperand(ReduceIdx);
15293 SDValue Reduce = Extract.getOperand(0);
15294 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15295 return SDValue();
15296
15297 SDValue ScalarV = Reduce.getOperand(2);
15298 EVT ScalarVT = ScalarV.getValueType();
15299 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15300 ScalarV.getOperand(0)->isUndef() &&
15301 isNullConstant(ScalarV.getOperand(2)))
15302 ScalarV = ScalarV.getOperand(1);
15303
15304 // Make sure that ScalarV is a splat with VL=1.
15305 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15306 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15307 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15308 return SDValue();
15309
15310 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15311 return SDValue();
15312
15313 // Check the scalar of ScalarV is neutral element
15314 // TODO: Deal with value other than neutral element.
15315 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15316 0))
15317 return SDValue();
15318
15319 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15320 // FIXME: We might be able to improve this if operand 0 is undef.
15321 if (!isNonZeroAVL(Reduce.getOperand(5)))
15322 return SDValue();
15323
15324 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15325
15326 SDLoc DL(N);
15327 SDValue NewScalarV =
15328 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15329 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15330
15331 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15332 if (ScalarVT != ScalarV.getValueType())
15333 NewScalarV =
15334 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15335
15336 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15337 NewScalarV, Reduce.getOperand(3),
15338 Reduce.getOperand(4), Reduce.getOperand(5)};
15339 SDValue NewReduce =
15340 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15341 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15342 Extract.getOperand(1));
15343}
15344
15345// Optimize (add (shl x, c0), (shl y, c1)) ->
15346// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15347// or
15348// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15350 const RISCVSubtarget &Subtarget) {
15351 const bool HasStdExtZba = Subtarget.hasStdExtZba();
15352 const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf();
15353 const bool HasVendorXqciac = Subtarget.hasVendorXqciac();
15354 // Perform this optimization only in the zba/xandesperf/xqciac extension.
15355 if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac)
15356 return SDValue();
15357
15358 // Skip for vector types and larger types.
15359 EVT VT = N->getValueType(0);
15360 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15361 return SDValue();
15362
15363 // The two operand nodes must be SHL and have no other use.
15364 SDValue N0 = N->getOperand(0);
15365 SDValue N1 = N->getOperand(1);
15366 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15367 !N0->hasOneUse() || !N1->hasOneUse())
15368 return SDValue();
15369
15370 // Check c0 and c1.
15371 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15372 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15373 if (!N0C || !N1C)
15374 return SDValue();
15375 int64_t C0 = N0C->getSExtValue();
15376 int64_t C1 = N1C->getSExtValue();
15377 if (C0 <= 0 || C1 <= 0)
15378 return SDValue();
15379
15380 int64_t Diff = std::abs(C0 - C1);
15381 bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3;
15382 bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf;
15383
15384 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
15385 if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) ||
15386 (IsShXaddDiff && !HasShXadd && HasVendorXqciac))
15387 return SDValue();
15388
15389 // Skip if QC_SHLADD is not applicable.
15390 if (Diff == 0 || Diff > 31)
15391 return SDValue();
15392
15393 // Build nodes.
15394 SDLoc DL(N);
15395 int64_t Bits = std::min(C0, C1);
15396 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15397 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15398 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15399 DAG.getConstant(Diff, DL, VT), NS);
15400 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15401}
15402
15403// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15404// or 3.
15406 SelectionDAG &DAG) {
15407 using namespace llvm::SDPatternMatch;
15408
15409 // Looking for a reg-reg add and not an addi.
15410 if (isa<ConstantSDNode>(N->getOperand(1)))
15411 return SDValue();
15412
15413 // Based on testing it seems that performance degrades if the ADDI has
15414 // more than 2 uses.
15415 if (AddI->use_size() > 2)
15416 return SDValue();
15417
15418 APInt AddVal;
15419 SDValue SHLVal;
15420 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15421 return SDValue();
15422
15423 APInt VShift;
15424 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15425 return SDValue();
15426
15427 if (VShift.slt(1) || VShift.sgt(3))
15428 return SDValue();
15429
15430 SDLoc DL(N);
15431 EVT VT = N->getValueType(0);
15432 // The shift must be positive but the add can be signed.
15433 uint64_t ShlConst = VShift.getZExtValue();
15434 int64_t AddConst = AddVal.getSExtValue();
15435
15436 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15437 DAG.getConstant(ShlConst, DL, VT), Other);
15438 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15439 DAG.getSignedConstant(AddConst, DL, VT));
15440}
15441
15442// Optimize (add (add (shl x, c0), c1), y) ->
15443// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15445 const RISCVSubtarget &Subtarget) {
15446 // Perform this optimization only in the zba extension.
15447 if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
15448 return SDValue();
15449
15450 // Skip for vector types and larger types.
15451 EVT VT = N->getValueType(0);
15452 if (VT != Subtarget.getXLenVT())
15453 return SDValue();
15454
15455 SDValue AddI = N->getOperand(0);
15456 SDValue Other = N->getOperand(1);
15457 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15458 return V;
15459 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15460 return V;
15461 return SDValue();
15462}
15463
15464// Combine a constant select operand into its use:
15465//
15466// (and (select cond, -1, c), x)
15467// -> (select cond, x, (and x, c)) [AllOnes=1]
15468// (or (select cond, 0, c), x)
15469// -> (select cond, x, (or x, c)) [AllOnes=0]
15470// (xor (select cond, 0, c), x)
15471// -> (select cond, x, (xor x, c)) [AllOnes=0]
15472// (add (select cond, 0, c), x)
15473// -> (select cond, x, (add x, c)) [AllOnes=0]
15474// (sub x, (select cond, 0, c))
15475// -> (select cond, x, (sub x, c)) [AllOnes=0]
15477 SelectionDAG &DAG, bool AllOnes,
15478 const RISCVSubtarget &Subtarget) {
15479 EVT VT = N->getValueType(0);
15480
15481 // Skip vectors.
15482 if (VT.isVector())
15483 return SDValue();
15484
15485 if (!Subtarget.hasConditionalMoveFusion()) {
15486 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15487 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15488 return SDValue();
15489
15490 // Maybe harmful when condition code has multiple use.
15491 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15492 return SDValue();
15493
15494 // Maybe harmful when VT is wider than XLen.
15495 if (VT.getSizeInBits() > Subtarget.getXLen())
15496 return SDValue();
15497 }
15498
15499 if ((Slct.getOpcode() != ISD::SELECT &&
15500 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15501 !Slct.hasOneUse())
15502 return SDValue();
15503
15504 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15506 };
15507
15508 bool SwapSelectOps;
15509 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15510 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15511 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15512 SDValue NonConstantVal;
15513 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15514 SwapSelectOps = false;
15515 NonConstantVal = FalseVal;
15516 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15517 SwapSelectOps = true;
15518 NonConstantVal = TrueVal;
15519 } else
15520 return SDValue();
15521
15522 // Slct is now know to be the desired identity constant when CC is true.
15523 TrueVal = OtherOp;
15524 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15525 // Unless SwapSelectOps says the condition should be false.
15526 if (SwapSelectOps)
15527 std::swap(TrueVal, FalseVal);
15528
15529 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15530 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15531 {Slct.getOperand(0), Slct.getOperand(1),
15532 Slct.getOperand(2), TrueVal, FalseVal});
15533
15534 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15535 {Slct.getOperand(0), TrueVal, FalseVal});
15536}
15537
15538// Attempt combineSelectAndUse on each operand of a commutative operator N.
15540 bool AllOnes,
15541 const RISCVSubtarget &Subtarget) {
15542 SDValue N0 = N->getOperand(0);
15543 SDValue N1 = N->getOperand(1);
15544 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15545 return Result;
15546 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15547 return Result;
15548 return SDValue();
15549}
15550
15551// Transform (add (mul x, c0), c1) ->
15552// (add (mul (add x, c1/c0), c0), c1%c0).
15553// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15554// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15555// to an infinite loop in DAGCombine if transformed.
15556// Or transform (add (mul x, c0), c1) ->
15557// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15558// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15559// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15560// lead to an infinite loop in DAGCombine if transformed.
15561// Or transform (add (mul x, c0), c1) ->
15562// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15563// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15564// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15565// lead to an infinite loop in DAGCombine if transformed.
15566// Or transform (add (mul x, c0), c1) ->
15567// (mul (add x, c1/c0), c0).
15568// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15570 const RISCVSubtarget &Subtarget) {
15571 // Skip for vector types and larger types.
15572 EVT VT = N->getValueType(0);
15573 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15574 return SDValue();
15575 // The first operand node must be a MUL and has no other use.
15576 SDValue N0 = N->getOperand(0);
15577 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15578 return SDValue();
15579 // Check if c0 and c1 match above conditions.
15580 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15581 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15582 if (!N0C || !N1C)
15583 return SDValue();
15584 // If N0C has multiple uses it's possible one of the cases in
15585 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15586 // in an infinite loop.
15587 if (!N0C->hasOneUse())
15588 return SDValue();
15589 int64_t C0 = N0C->getSExtValue();
15590 int64_t C1 = N1C->getSExtValue();
15591 int64_t CA, CB;
15592 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15593 return SDValue();
15594 // Search for proper CA (non-zero) and CB that both are simm12.
15595 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15596 !isInt<12>(C0 * (C1 / C0))) {
15597 CA = C1 / C0;
15598 CB = C1 % C0;
15599 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15600 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15601 CA = C1 / C0 + 1;
15602 CB = C1 % C0 - C0;
15603 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15604 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15605 CA = C1 / C0 - 1;
15606 CB = C1 % C0 + C0;
15607 } else
15608 return SDValue();
15609 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15610 SDLoc DL(N);
15611 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15612 DAG.getSignedConstant(CA, DL, VT));
15613 SDValue New1 =
15614 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15615 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15616}
15617
15618// add (zext, zext) -> zext (add (zext, zext))
15619// sub (zext, zext) -> sext (sub (zext, zext))
15620// mul (zext, zext) -> zext (mul (zext, zext))
15621// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15622// udiv (zext, zext) -> zext (udiv (zext, zext))
15623// srem (zext, zext) -> zext (srem (zext, zext))
15624// urem (zext, zext) -> zext (urem (zext, zext))
15625//
15626// where the sum of the extend widths match, and the the range of the bin op
15627// fits inside the width of the narrower bin op. (For profitability on rvv, we
15628// use a power of two for both inner and outer extend.)
15630
15631 EVT VT = N->getValueType(0);
15632 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15633 return SDValue();
15634
15635 SDValue N0 = N->getOperand(0);
15636 SDValue N1 = N->getOperand(1);
15638 return SDValue();
15639 if (!N0.hasOneUse() || !N1.hasOneUse())
15640 return SDValue();
15641
15642 SDValue Src0 = N0.getOperand(0);
15643 SDValue Src1 = N1.getOperand(0);
15644 EVT SrcVT = Src0.getValueType();
15645 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15646 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15647 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15648 return SDValue();
15649
15650 LLVMContext &C = *DAG.getContext();
15652 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15653
15654 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15655 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15656
15657 // Src0 and Src1 are zero extended, so they're always positive if signed.
15658 //
15659 // sub can produce a negative from two positive operands, so it needs sign
15660 // extended. Other nodes produce a positive from two positive operands, so
15661 // zero extend instead.
15662 unsigned OuterExtend =
15663 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15664
15665 return DAG.getNode(
15666 OuterExtend, SDLoc(N), VT,
15667 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15668}
15669
15670// Try to turn (add (xor bool, 1) -1) into (neg bool).
15672 SDValue N0 = N->getOperand(0);
15673 SDValue N1 = N->getOperand(1);
15674 EVT VT = N->getValueType(0);
15675 SDLoc DL(N);
15676
15677 // RHS should be -1.
15678 if (!isAllOnesConstant(N1))
15679 return SDValue();
15680
15681 // Look for (xor X, 1).
15682 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15683 return SDValue();
15684
15685 // First xor input should be 0 or 1.
15687 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15688 return SDValue();
15689
15690 // Emit a negate of the setcc.
15691 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15692 N0.getOperand(0));
15693}
15694
15697 const RISCVSubtarget &Subtarget) {
15698 SelectionDAG &DAG = DCI.DAG;
15699 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15700 return V;
15701 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15702 return V;
15703 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15704 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15705 return V;
15706 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15707 return V;
15708 }
15709 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15710 return V;
15711 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15712 return V;
15713 if (SDValue V = combineBinOpOfZExt(N, DAG))
15714 return V;
15715
15716 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15717 // (select lhs, rhs, cc, x, (add x, y))
15718 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15719}
15720
15721// Try to turn a sub boolean RHS and constant LHS into an addi.
15723 SDValue N0 = N->getOperand(0);
15724 SDValue N1 = N->getOperand(1);
15725 EVT VT = N->getValueType(0);
15726 SDLoc DL(N);
15727
15728 // Require a constant LHS.
15729 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15730 if (!N0C)
15731 return SDValue();
15732
15733 // All our optimizations involve subtracting 1 from the immediate and forming
15734 // an ADDI. Make sure the new immediate is valid for an ADDI.
15735 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15736 if (!ImmValMinus1.isSignedIntN(12))
15737 return SDValue();
15738
15739 SDValue NewLHS;
15740 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15741 // (sub constant, (setcc x, y, eq/neq)) ->
15742 // (add (setcc x, y, neq/eq), constant - 1)
15743 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15744 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15745 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15746 return SDValue();
15747 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15748 NewLHS =
15749 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15750 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15751 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15752 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15753 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15754 NewLHS = N1.getOperand(0);
15755 } else
15756 return SDValue();
15757
15758 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15759 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15760}
15761
15762// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15763// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15764// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15765// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15767 const RISCVSubtarget &Subtarget) {
15768 if (!Subtarget.hasStdExtZbb())
15769 return SDValue();
15770
15771 EVT VT = N->getValueType(0);
15772
15773 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15774 return SDValue();
15775
15776 SDValue N0 = N->getOperand(0);
15777 SDValue N1 = N->getOperand(1);
15778
15779 if (N0->getOpcode() != ISD::SHL)
15780 return SDValue();
15781
15782 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15783 if (!ShAmtCLeft)
15784 return SDValue();
15785 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15786
15787 if (ShiftedAmount >= 8)
15788 return SDValue();
15789
15790 SDValue LeftShiftOperand = N0->getOperand(0);
15791 SDValue RightShiftOperand = N1;
15792
15793 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15794 if (N1->getOpcode() != ISD::SRL)
15795 return SDValue();
15796 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15797 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15798 return SDValue();
15799 RightShiftOperand = N1.getOperand(0);
15800 }
15801
15802 // At least one shift should have a single use.
15803 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15804 return SDValue();
15805
15806 if (LeftShiftOperand != RightShiftOperand)
15807 return SDValue();
15808
15809 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15810 Mask <<= ShiftedAmount;
15811 // Check that X has indeed the right shape (only the Y-th bit can be set in
15812 // every byte).
15813 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15814 return SDValue();
15815
15816 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15817}
15818
15820 const RISCVSubtarget &Subtarget) {
15821 if (SDValue V = combineSubOfBoolean(N, DAG))
15822 return V;
15823
15824 EVT VT = N->getValueType(0);
15825 SDValue N0 = N->getOperand(0);
15826 SDValue N1 = N->getOperand(1);
15827 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15828 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15829 isNullConstant(N1.getOperand(1))) {
15830 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15831 if (CCVal == ISD::SETLT) {
15832 SDLoc DL(N);
15833 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15834 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15835 DAG.getConstant(ShAmt, DL, VT));
15836 }
15837 }
15838
15839 if (SDValue V = combineBinOpOfZExt(N, DAG))
15840 return V;
15841 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15842 return V;
15843
15844 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15845 // (select lhs, rhs, cc, x, (sub x, y))
15846 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15847}
15848
15849// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15850// Legalizing setcc can introduce xors like this. Doing this transform reduces
15851// the number of xors and may allow the xor to fold into a branch condition.
15853 SDValue N0 = N->getOperand(0);
15854 SDValue N1 = N->getOperand(1);
15855 bool IsAnd = N->getOpcode() == ISD::AND;
15856
15857 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15858 return SDValue();
15859
15860 if (!N0.hasOneUse() || !N1.hasOneUse())
15861 return SDValue();
15862
15863 SDValue N01 = N0.getOperand(1);
15864 SDValue N11 = N1.getOperand(1);
15865
15866 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15867 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15868 // operation is And, allow one of the Xors to use -1.
15869 if (isOneConstant(N01)) {
15870 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15871 return SDValue();
15872 } else if (isOneConstant(N11)) {
15873 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15874 if (!(IsAnd && isAllOnesConstant(N01)))
15875 return SDValue();
15876 } else
15877 return SDValue();
15878
15879 EVT VT = N->getValueType(0);
15880
15881 SDValue N00 = N0.getOperand(0);
15882 SDValue N10 = N1.getOperand(0);
15883
15884 // The LHS of the xors needs to be 0/1.
15886 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15887 return SDValue();
15888
15889 // Invert the opcode and insert a new xor.
15890 SDLoc DL(N);
15891 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15892 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15893 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15894}
15895
15896// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15897// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15898// value to an unsigned value. This will be lowered to vmax and series of
15899// vnclipu instructions later. This can be extended to other truncated types
15900// other than i8 by replacing 256 and 255 with the equivalent constants for the
15901// type.
15903 EVT VT = N->getValueType(0);
15904 SDValue N0 = N->getOperand(0);
15905 EVT SrcVT = N0.getValueType();
15906
15907 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15908 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15909 return SDValue();
15910
15911 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15912 return SDValue();
15913
15914 SDValue Cond = N0.getOperand(0);
15915 SDValue True = N0.getOperand(1);
15916 SDValue False = N0.getOperand(2);
15917
15918 if (Cond.getOpcode() != ISD::SETCC)
15919 return SDValue();
15920
15921 // FIXME: Support the version of this pattern with the select operands
15922 // swapped.
15923 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15924 if (CCVal != ISD::SETULT)
15925 return SDValue();
15926
15927 SDValue CondLHS = Cond.getOperand(0);
15928 SDValue CondRHS = Cond.getOperand(1);
15929
15930 if (CondLHS != True)
15931 return SDValue();
15932
15933 unsigned ScalarBits = VT.getScalarSizeInBits();
15934
15935 // FIXME: Support other constants.
15936 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15937 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15938 return SDValue();
15939
15940 if (False.getOpcode() != ISD::SIGN_EXTEND)
15941 return SDValue();
15942
15943 False = False.getOperand(0);
15944
15945 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15946 return SDValue();
15947
15948 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15949 if (!FalseRHSC || !FalseRHSC->isZero())
15950 return SDValue();
15951
15952 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15953 if (CCVal2 != ISD::SETGT)
15954 return SDValue();
15955
15956 // Emit the signed to unsigned saturation pattern.
15957 SDLoc DL(N);
15958 SDValue Max =
15959 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15960 SDValue Min =
15961 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15962 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15963 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15964}
15965
15967 const RISCVSubtarget &Subtarget) {
15968 SDValue N0 = N->getOperand(0);
15969 EVT VT = N->getValueType(0);
15970
15971 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15972 // extending X. This is safe since we only need the LSB after the shift and
15973 // shift amounts larger than 31 would produce poison. If we wait until
15974 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
15975 // to use a BEXT instruction.
15976 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15977 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15978 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
15979 SDLoc DL(N0);
15980 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
15981 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
15982 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
15983 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
15984 }
15985
15986 return combineTruncSelectToSMaxUSat(N, DAG);
15987}
15988
15989// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15990// truncation. But RVV doesn't have truncation instructions for more than twice
15991// the bitwidth.
15992//
15993// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15994//
15995// vsetvli a0, zero, e32, m2, ta, ma
15996// vnsrl.wi v12, v8, 0
15997// vsetvli zero, zero, e16, m1, ta, ma
15998// vnsrl.wi v8, v12, 0
15999// vsetvli zero, zero, e8, mf2, ta, ma
16000// vnsrl.wi v8, v8, 0
16001//
16002// So reverse the combine so we generate an vmseq/vmsne again:
16003//
16004// and (lshr (trunc X), ShAmt), 1
16005// -->
16006// zext (icmp ne (and X, (1 << ShAmt)), 0)
16007//
16008// and (lshr (not (trunc X)), ShAmt), 1
16009// -->
16010// zext (icmp eq (and X, (1 << ShAmt)), 0)
16012 const RISCVSubtarget &Subtarget) {
16013 using namespace SDPatternMatch;
16014 SDLoc DL(N);
16015
16016 if (!Subtarget.hasVInstructions())
16017 return SDValue();
16018
16019 EVT VT = N->getValueType(0);
16020 if (!VT.isVector())
16021 return SDValue();
16022
16023 APInt ShAmt;
16024 SDValue Inner;
16025 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16026 m_One())))
16027 return SDValue();
16028
16029 SDValue X;
16030 bool IsNot;
16031 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16032 IsNot = true;
16033 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16034 IsNot = false;
16035 else
16036 return SDValue();
16037
16038 EVT WideVT = X.getValueType();
16039 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16040 return SDValue();
16041
16042 SDValue Res =
16043 DAG.getNode(ISD::AND, DL, WideVT, X,
16044 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16045 Res = DAG.getSetCC(DL,
16046 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16047 WideVT.getVectorElementCount()),
16048 Res, DAG.getConstant(0, DL, WideVT),
16049 IsNot ? ISD::SETEQ : ISD::SETNE);
16050 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16051}
16052
16055 SelectionDAG &DAG = DCI.DAG;
16056 if (N->getOpcode() != ISD::AND)
16057 return SDValue();
16058
16059 SDValue N0 = N->getOperand(0);
16060 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16061 return SDValue();
16062 if (!N0.hasOneUse())
16063 return SDValue();
16064
16067 return SDValue();
16068
16069 EVT LoadedVT = ALoad->getMemoryVT();
16070 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16071 if (!MaskConst)
16072 return SDValue();
16073 uint64_t Mask = MaskConst->getZExtValue();
16074 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16075 if (Mask != ExpectedMask)
16076 return SDValue();
16077
16078 SDValue ZextLoad = DAG.getAtomicLoad(
16079 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16080 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16081 DCI.CombineTo(N, ZextLoad);
16082 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16084 return SDValue(N, 0);
16085}
16086
16087// Combines two comparison operation and logic operation to one selection
16088// operation(min, max) and logic operation. Returns new constructed Node if
16089// conditions for optimization are satisfied.
16092 const RISCVSubtarget &Subtarget) {
16093 SelectionDAG &DAG = DCI.DAG;
16094
16095 SDValue N0 = N->getOperand(0);
16096 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16097 // extending X. This is safe since we only need the LSB after the shift and
16098 // shift amounts larger than 31 would produce poison. If we wait until
16099 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16100 // to use a BEXT instruction.
16101 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16102 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16103 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16104 N0.hasOneUse()) {
16105 SDLoc DL(N);
16106 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16107 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16108 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16109 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16110 DAG.getConstant(1, DL, MVT::i64));
16111 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16112 }
16113
16114 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16115 return V;
16116
16117 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16118 return V;
16119 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16120 return V;
16121 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16122 return V;
16123
16124 if (DCI.isAfterLegalizeDAG())
16125 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16126 return V;
16127
16128 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16129 // (select lhs, rhs, cc, x, (and x, y))
16130 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16131}
16132
16133// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16134// FIXME: Generalize to other binary operators with same operand.
16136 SelectionDAG &DAG) {
16137 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16138
16139 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16140 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16141 !N0.hasOneUse() || !N1.hasOneUse())
16142 return SDValue();
16143
16144 // Should have the same condition.
16145 SDValue Cond = N0.getOperand(1);
16146 if (Cond != N1.getOperand(1))
16147 return SDValue();
16148
16149 SDValue TrueV = N0.getOperand(0);
16150 SDValue FalseV = N1.getOperand(0);
16151
16152 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16153 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16154 !isOneConstant(TrueV.getOperand(1)) ||
16155 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16156 return SDValue();
16157
16158 EVT VT = N->getValueType(0);
16159 SDLoc DL(N);
16160
16161 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16162 Cond);
16163 SDValue NewN1 =
16164 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16165 SDValue NewOr =
16166 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16167 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16168}
16169
16170// (xor X, (xor (and X, C2), Y))
16171// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16172// where C2 is a shifted mask with width = Width and shift = ShAmt
16173// qc_insb might become qc.insb or qc.insbi depending on the operands.
16175 const RISCVSubtarget &Subtarget) {
16176 if (!Subtarget.hasVendorXqcibm())
16177 return SDValue();
16178
16179 using namespace SDPatternMatch;
16180
16181 SDValue Base, Inserted;
16182 APInt CMask;
16183 if (!sd_match(N, m_Xor(m_Value(Base),
16185 m_ConstInt(CMask))),
16186 m_Value(Inserted))))))
16187 return SDValue();
16188
16189 if (N->getValueType(0) != MVT::i32)
16190 return SDValue();
16191
16192 unsigned Width, ShAmt;
16193 if (!CMask.isShiftedMask(ShAmt, Width))
16194 return SDValue();
16195
16196 // Check if all zero bits in CMask are also zero in Inserted
16197 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16198 return SDValue();
16199
16200 SDLoc DL(N);
16201
16202 // `Inserted` needs to be right shifted before it is put into the
16203 // instruction.
16204 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16205 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16206
16207 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16208 DAG.getConstant(ShAmt, DL, MVT::i32)};
16209 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16210}
16211
16213 const RISCVSubtarget &Subtarget) {
16214 SelectionDAG &DAG = DCI.DAG;
16215
16216 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16217 return V;
16218 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16219 return V;
16220
16221 if (DCI.isAfterLegalizeDAG())
16222 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16223 return V;
16224
16225 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16226 // We may be able to pull a common operation out of the true and false value.
16227 SDValue N0 = N->getOperand(0);
16228 SDValue N1 = N->getOperand(1);
16229 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16230 return V;
16231 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16232 return V;
16233
16234 // fold (or (select cond, 0, y), x) ->
16235 // (select cond, x, (or x, y))
16236 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16237}
16238
16240 const RISCVSubtarget &Subtarget) {
16241 SDValue N0 = N->getOperand(0);
16242 SDValue N1 = N->getOperand(1);
16243
16244 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16245 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
16246 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16247 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16248 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16249 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16250 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16251 SDLoc DL(N);
16252 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16253 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16254 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16255 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16256 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16257 }
16258
16259 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16260 // NOTE: Assumes ROL being legal means ROLW is legal.
16261 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16262 if (N0.getOpcode() == RISCVISD::SLLW &&
16264 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16265 SDLoc DL(N);
16266 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16267 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16268 }
16269
16270 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16271 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16272 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16274 if (ConstN00 && CC == ISD::SETLT) {
16275 EVT VT = N0.getValueType();
16276 SDLoc DL(N0);
16277 const APInt &Imm = ConstN00->getAPIntValue();
16278 if ((Imm + 1).isSignedIntN(12))
16279 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16280 DAG.getConstant(Imm + 1, DL, VT), CC);
16281 }
16282 }
16283
16284 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16285 return V;
16286
16287 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16288 return V;
16289 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16290 return V;
16291
16292 // fold (xor (select cond, 0, y), x) ->
16293 // (select cond, x, (xor x, y))
16294 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16295}
16296
16297// Try to expand a multiply to a sequence of shifts and add/subs,
16298// for a machine without native mul instruction.
16300 uint64_t MulAmt) {
16301 SDLoc DL(N);
16302 EVT VT = N->getValueType(0);
16304
16305 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16306 SDValue N0 = N->getOperand(0);
16307
16308 // Find the Non-adjacent form of the multiplier.
16309 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16310 if (E & 1) {
16311 bool IsAdd = (E & 3) == 1;
16312 E -= IsAdd ? 1 : -1;
16313 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16314 DAG.getShiftAmountConstant(I, VT, DL));
16315 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16316 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16317 }
16318 }
16319
16320 return Result;
16321}
16322
16323// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16325 uint64_t MulAmt) {
16326 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16328 uint64_t ShiftAmt1;
16329 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16330 Op = ISD::SUB;
16331 ShiftAmt1 = MulAmt + MulAmtLowBit;
16332 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16333 Op = ISD::ADD;
16334 ShiftAmt1 = MulAmt - MulAmtLowBit;
16335 } else {
16336 return SDValue();
16337 }
16338 EVT VT = N->getValueType(0);
16339 SDLoc DL(N);
16340 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16341 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16342 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16343 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16344 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16345}
16346
16347// Try to expand a scalar multiply to a faster sequence.
16350 const RISCVSubtarget &Subtarget) {
16351
16352 EVT VT = N->getValueType(0);
16353
16354 // LI + MUL is usually smaller than the alternative sequence.
16356 return SDValue();
16357
16358 if (VT != Subtarget.getXLenVT())
16359 return SDValue();
16360
16361 bool ShouldExpandMul =
16362 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16363 !Subtarget.hasStdExtZmmul();
16364 if (!ShouldExpandMul)
16365 return SDValue();
16366
16367 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16368 if (!CNode)
16369 return SDValue();
16370 uint64_t MulAmt = CNode->getZExtValue();
16371
16372 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16373 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16374 return SDValue();
16375
16376 const bool HasShlAdd = Subtarget.hasStdExtZba() ||
16377 Subtarget.hasVendorXTHeadBa() ||
16378 Subtarget.hasVendorXAndesPerf();
16379
16380 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16381 // We're adding additional uses of X here, and in principle, we should be freezing
16382 // X before doing so. However, adding freeze here causes real regressions, and no
16383 // other target properly freezes X in these cases either.
16384 SDValue X = N->getOperand(0);
16385
16386 if (HasShlAdd) {
16387 for (uint64_t Divisor : {3, 5, 9}) {
16388 if (MulAmt % Divisor != 0)
16389 continue;
16390 uint64_t MulAmt2 = MulAmt / Divisor;
16391 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16392 if (isPowerOf2_64(MulAmt2)) {
16393 SDLoc DL(N);
16394 SDValue X = N->getOperand(0);
16395 // Put the shift first if we can fold a zext into the
16396 // shift forming a slli.uw.
16397 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16398 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16399 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16400 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16401 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16402 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16403 Shl);
16404 }
16405 // Otherwise, put rhe shl second so that it can fold with following
16406 // instructions (e.g. sext or add).
16407 SDValue Mul359 =
16408 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16409 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16410 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16411 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16412 }
16413
16414 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16415 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16416 SDLoc DL(N);
16417 SDValue Mul359 =
16418 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16419 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16420 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16421 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16422 Mul359);
16423 }
16424 }
16425
16426 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16427 // shXadd. First check if this a sum of two power of 2s because that's
16428 // easy. Then count how many zeros are up to the first bit.
16429 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16430 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16431 if (ScaleShift >= 1 && ScaleShift < 4) {
16432 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16433 SDLoc DL(N);
16434 SDValue Shift1 =
16435 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16436 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16437 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16438 }
16439 }
16440
16441 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16442 // This is the two instruction form, there are also three instruction
16443 // variants we could implement. e.g.
16444 // (2^(1,2,3) * 3,5,9 + 1) << C2
16445 // 2^(C1>3) * 3,5,9 +/- 1
16446 for (uint64_t Divisor : {3, 5, 9}) {
16447 uint64_t C = MulAmt - 1;
16448 if (C <= Divisor)
16449 continue;
16450 unsigned TZ = llvm::countr_zero(C);
16451 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16452 SDLoc DL(N);
16453 SDValue Mul359 =
16454 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16455 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16456 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16457 DAG.getConstant(TZ, DL, VT), X);
16458 }
16459 }
16460
16461 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16462 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16463 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16464 if (ScaleShift >= 1 && ScaleShift < 4) {
16465 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16466 SDLoc DL(N);
16467 SDValue Shift1 =
16468 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16469 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16470 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16471 DAG.getConstant(ScaleShift, DL, VT), X));
16472 }
16473 }
16474
16475 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16476 for (uint64_t Offset : {3, 5, 9}) {
16477 if (isPowerOf2_64(MulAmt + Offset)) {
16478 unsigned ShAmt = Log2_64(MulAmt + Offset);
16479 if (ShAmt >= VT.getSizeInBits())
16480 continue;
16481 SDLoc DL(N);
16482 SDValue Shift1 =
16483 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16484 SDValue Mul359 =
16485 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16486 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16487 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16488 }
16489 }
16490
16491 for (uint64_t Divisor : {3, 5, 9}) {
16492 if (MulAmt % Divisor != 0)
16493 continue;
16494 uint64_t MulAmt2 = MulAmt / Divisor;
16495 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16496 // of 25 which happen to be quite common.
16497 for (uint64_t Divisor2 : {3, 5, 9}) {
16498 if (MulAmt2 % Divisor2 != 0)
16499 continue;
16500 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16501 if (isPowerOf2_64(MulAmt3)) {
16502 SDLoc DL(N);
16503 SDValue Mul359A =
16504 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16505 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16506 SDValue Mul359B = DAG.getNode(
16507 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16508 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16509 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16510 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16511 }
16512 }
16513 }
16514 }
16515
16516 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16517 return V;
16518
16519 if (!Subtarget.hasStdExtZmmul())
16520 return expandMulToNAFSequence(N, DAG, MulAmt);
16521
16522 return SDValue();
16523}
16524
16525// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16526// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16527// Same for other equivalent types with other equivalent constants.
16529 EVT VT = N->getValueType(0);
16530 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16531
16532 // Do this for legal vectors unless they are i1 or i8 vectors.
16533 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16534 return SDValue();
16535
16536 if (N->getOperand(0).getOpcode() != ISD::AND ||
16537 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16538 return SDValue();
16539
16540 SDValue And = N->getOperand(0);
16541 SDValue Srl = And.getOperand(0);
16542
16543 APInt V1, V2, V3;
16544 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16545 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16547 return SDValue();
16548
16549 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16550 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16551 V3 != (HalfSize - 1))
16552 return SDValue();
16553
16554 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16555 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16556 VT.getVectorElementCount() * 2);
16557 SDLoc DL(N);
16558 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16559 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16560 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16561 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16562}
16563
16566 const RISCVSubtarget &Subtarget) {
16567 EVT VT = N->getValueType(0);
16568 if (!VT.isVector())
16569 return expandMul(N, DAG, DCI, Subtarget);
16570
16571 SDLoc DL(N);
16572 SDValue N0 = N->getOperand(0);
16573 SDValue N1 = N->getOperand(1);
16574 SDValue MulOper;
16575 unsigned AddSubOpc;
16576
16577 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16578 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16579 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16580 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16581 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16582 AddSubOpc = V->getOpcode();
16583 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16584 SDValue Opnd = V->getOperand(1);
16585 MulOper = V->getOperand(0);
16586 if (AddSubOpc == ISD::SUB)
16587 std::swap(Opnd, MulOper);
16588 if (isOneOrOneSplat(Opnd))
16589 return true;
16590 }
16591 return false;
16592 };
16593
16594 if (IsAddSubWith1(N0)) {
16595 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16596 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16597 }
16598
16599 if (IsAddSubWith1(N1)) {
16600 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16601 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16602 }
16603
16604 if (SDValue V = combineBinOpOfZExt(N, DAG))
16605 return V;
16606
16608 return V;
16609
16610 return SDValue();
16611}
16612
16613/// According to the property that indexed load/store instructions zero-extend
16614/// their indices, try to narrow the type of index operand.
16615static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16616 if (isIndexTypeSigned(IndexType))
16617 return false;
16618
16619 if (!N->hasOneUse())
16620 return false;
16621
16622 EVT VT = N.getValueType();
16623 SDLoc DL(N);
16624
16625 // In general, what we're doing here is seeing if we can sink a truncate to
16626 // a smaller element type into the expression tree building our index.
16627 // TODO: We can generalize this and handle a bunch more cases if useful.
16628
16629 // Narrow a buildvector to the narrowest element type. This requires less
16630 // work and less register pressure at high LMUL, and creates smaller constants
16631 // which may be cheaper to materialize.
16632 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16633 KnownBits Known = DAG.computeKnownBits(N);
16634 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16635 LLVMContext &C = *DAG.getContext();
16636 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16637 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16638 N = DAG.getNode(ISD::TRUNCATE, DL,
16639 VT.changeVectorElementType(ResultVT), N);
16640 return true;
16641 }
16642 }
16643
16644 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16645 if (N.getOpcode() != ISD::SHL)
16646 return false;
16647
16648 SDValue N0 = N.getOperand(0);
16649 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16650 N0.getOpcode() != RISCVISD::VZEXT_VL)
16651 return false;
16652 if (!N0->hasOneUse())
16653 return false;
16654
16655 APInt ShAmt;
16656 SDValue N1 = N.getOperand(1);
16657 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16658 return false;
16659
16660 SDValue Src = N0.getOperand(0);
16661 EVT SrcVT = Src.getValueType();
16662 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16663 unsigned ShAmtV = ShAmt.getZExtValue();
16664 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16665 NewElen = std::max(NewElen, 8U);
16666
16667 // Skip if NewElen is not narrower than the original extended type.
16668 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16669 return false;
16670
16671 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16672 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16673
16674 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16675 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16676 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16677 return true;
16678}
16679
16680/// Try to map an integer comparison with size > XLEN to vector instructions
16681/// before type legalization splits it up into chunks.
16682static SDValue
16684 const SDLoc &DL, SelectionDAG &DAG,
16685 const RISCVSubtarget &Subtarget) {
16686 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16687
16688 if (!Subtarget.hasVInstructions())
16689 return SDValue();
16690
16691 MVT XLenVT = Subtarget.getXLenVT();
16692 EVT OpVT = X.getValueType();
16693 // We're looking for an oversized integer equality comparison.
16694 if (!OpVT.isScalarInteger())
16695 return SDValue();
16696
16697 unsigned OpSize = OpVT.getSizeInBits();
16698 // The size should be larger than XLen and smaller than the maximum vector
16699 // size.
16700 if (OpSize <= Subtarget.getXLen() ||
16701 OpSize > Subtarget.getRealMinVLen() *
16703 return SDValue();
16704
16705 // Don't perform this combine if constructing the vector will be expensive.
16706 auto IsVectorBitCastCheap = [](SDValue X) {
16708 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16709 X.getOpcode() == ISD::LOAD;
16710 };
16711 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16712 return SDValue();
16713
16715 Attribute::NoImplicitFloat))
16716 return SDValue();
16717
16718 // Bail out for non-byte-sized types.
16719 if (!OpVT.isByteSized())
16720 return SDValue();
16721
16722 unsigned VecSize = OpSize / 8;
16723 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16724 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16725
16726 SDValue VecX = DAG.getBitcast(VecVT, X);
16727 SDValue VecY = DAG.getBitcast(VecVT, Y);
16728 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16729 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16730
16731 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16732 DAG.getCondCode(ISD::SETNE), Mask, VL);
16733 return DAG.getSetCC(DL, VT,
16734 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16735 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16736 VL),
16737 DAG.getConstant(0, DL, XLenVT), CC);
16738}
16739
16742 const RISCVSubtarget &Subtarget) {
16743 SelectionDAG &DAG = DCI.DAG;
16744 SDLoc dl(N);
16745 SDValue N0 = N->getOperand(0);
16746 SDValue N1 = N->getOperand(1);
16747 EVT VT = N->getValueType(0);
16748 EVT OpVT = N0.getValueType();
16749
16750 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16751 // Looking for an equality compare.
16752 if (!isIntEqualitySetCC(Cond))
16753 return SDValue();
16754
16755 if (SDValue V =
16756 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16757 return V;
16758
16759 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16760 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16762 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16763 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16764 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16765 AndRHSC.isNegatedPowerOf2()) {
16766 unsigned ShiftBits = AndRHSC.countr_zero();
16767 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16768 DAG.getConstant(ShiftBits, dl, OpVT));
16769 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16770 }
16771
16772 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16773 // comparing with constants other than 0 if the constant can be folded into
16774 // addi or xori after shifting.
16775 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16776 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16777 if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
16778 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16779 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16780 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16781 if (NewC >= -2048 && NewC <= 2048) {
16782 SDValue SExt =
16783 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16784 DAG.getValueType(MVT::i32));
16785 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16786 DAG.getConstant(ShiftBits, dl, OpVT));
16787 return DAG.getSetCC(dl, VT, Shift,
16788 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16789 }
16790 }
16791 }
16792
16793 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16794 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16795 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16796 // sext_inreg can become a sext.w instead of a shift pair.
16797 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16798 return SDValue();
16799
16800 // RHS needs to be a constant.
16801 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16802 if (!N1C)
16803 return SDValue();
16804
16805 // LHS needs to be (and X, 0xffffffff).
16806 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16808 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16809 return SDValue();
16810
16811 // Don't do this if the sign bit is provably zero, it will be turned back into
16812 // an AND.
16813 APInt SignMask = APInt::getOneBitSet(64, 31);
16814 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16815 return SDValue();
16816
16817 const APInt &C1 = N1C->getAPIntValue();
16818
16819 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16820 // to be equal.
16821 if (C1.getActiveBits() > 32)
16822 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16823
16824 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16825 N0.getOperand(0), DAG.getValueType(MVT::i32));
16826 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16827 dl, OpVT), Cond);
16828}
16829
16830static SDValue
16832 const RISCVSubtarget &Subtarget) {
16833 SelectionDAG &DAG = DCI.DAG;
16834 SDValue Src = N->getOperand(0);
16835 EVT VT = N->getValueType(0);
16836 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16837 unsigned Opc = Src.getOpcode();
16838 SDLoc DL(N);
16839
16840 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16841 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16842 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16843 Subtarget.hasStdExtZfhmin())
16844 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16845
16846 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16847 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16848 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16849 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16850 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16851 Src.getOperand(1));
16852
16853 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16854 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16855 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16856
16857 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16858 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16859 isAllOnesConstant(Src.getOperand(1)) &&
16860 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16861 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16862 DAG.getAllOnesConstant(DL, VT));
16863
16864 return SDValue();
16865}
16866
16867namespace {
16868// Forward declaration of the structure holding the necessary information to
16869// apply a combine.
16870struct CombineResult;
16871
16872enum ExtKind : uint8_t {
16873 ZExt = 1 << 0,
16874 SExt = 1 << 1,
16875 FPExt = 1 << 2,
16876 BF16Ext = 1 << 3
16877};
16878/// Helper class for folding sign/zero extensions.
16879/// In particular, this class is used for the following combines:
16880/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16881/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16882/// mul | mul_vl -> vwmul(u) | vwmul_su
16883/// shl | shl_vl -> vwsll
16884/// fadd -> vfwadd | vfwadd_w
16885/// fsub -> vfwsub | vfwsub_w
16886/// fmul -> vfwmul
16887/// An object of this class represents an operand of the operation we want to
16888/// combine.
16889/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16890/// NodeExtensionHelper for `a` and one for `b`.
16891///
16892/// This class abstracts away how the extension is materialized and
16893/// how its number of users affect the combines.
16894///
16895/// In particular:
16896/// - VWADD_W is conceptually == add(op0, sext(op1))
16897/// - VWADDU_W == add(op0, zext(op1))
16898/// - VWSUB_W == sub(op0, sext(op1))
16899/// - VWSUBU_W == sub(op0, zext(op1))
16900/// - VFWADD_W == fadd(op0, fpext(op1))
16901/// - VFWSUB_W == fsub(op0, fpext(op1))
16902/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16903/// zext|sext(smaller_value).
16904struct NodeExtensionHelper {
16905 /// Records if this operand is like being zero extended.
16906 bool SupportsZExt;
16907 /// Records if this operand is like being sign extended.
16908 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16909 /// instance, a splat constant (e.g., 3), would support being both sign and
16910 /// zero extended.
16911 bool SupportsSExt;
16912 /// Records if this operand is like being floating point extended.
16913 bool SupportsFPExt;
16914 /// Records if this operand is extended from bf16.
16915 bool SupportsBF16Ext;
16916 /// This boolean captures whether we care if this operand would still be
16917 /// around after the folding happens.
16918 bool EnforceOneUse;
16919 /// Original value that this NodeExtensionHelper represents.
16920 SDValue OrigOperand;
16921
16922 /// Get the value feeding the extension or the value itself.
16923 /// E.g., for zext(a), this would return a.
16924 SDValue getSource() const {
16925 switch (OrigOperand.getOpcode()) {
16926 case ISD::ZERO_EXTEND:
16927 case ISD::SIGN_EXTEND:
16928 case RISCVISD::VSEXT_VL:
16929 case RISCVISD::VZEXT_VL:
16930 case RISCVISD::FP_EXTEND_VL:
16931 return OrigOperand.getOperand(0);
16932 default:
16933 return OrigOperand;
16934 }
16935 }
16936
16937 /// Check if this instance represents a splat.
16938 bool isSplat() const {
16939 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16940 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16941 }
16942
16943 /// Get the extended opcode.
16944 unsigned getExtOpc(ExtKind SupportsExt) const {
16945 switch (SupportsExt) {
16946 case ExtKind::SExt:
16947 return RISCVISD::VSEXT_VL;
16948 case ExtKind::ZExt:
16949 return RISCVISD::VZEXT_VL;
16950 case ExtKind::FPExt:
16951 case ExtKind::BF16Ext:
16952 return RISCVISD::FP_EXTEND_VL;
16953 }
16954 llvm_unreachable("Unknown ExtKind enum");
16955 }
16956
16957 /// Get or create a value that can feed \p Root with the given extension \p
16958 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16959 /// operand. \see ::getSource().
16960 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16961 const RISCVSubtarget &Subtarget,
16962 std::optional<ExtKind> SupportsExt) const {
16963 if (!SupportsExt.has_value())
16964 return OrigOperand;
16965
16966 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16967
16968 SDValue Source = getSource();
16969 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16970 if (Source.getValueType() == NarrowVT)
16971 return Source;
16972
16973 unsigned ExtOpc = getExtOpc(*SupportsExt);
16974
16975 // If we need an extension, we should be changing the type.
16976 SDLoc DL(OrigOperand);
16977 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16978 switch (OrigOperand.getOpcode()) {
16979 case ISD::ZERO_EXTEND:
16980 case ISD::SIGN_EXTEND:
16981 case RISCVISD::VSEXT_VL:
16982 case RISCVISD::VZEXT_VL:
16983 case RISCVISD::FP_EXTEND_VL:
16984 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
16985 case ISD::SPLAT_VECTOR:
16986 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
16987 case RISCVISD::VMV_V_X_VL:
16988 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
16989 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
16990 case RISCVISD::VFMV_V_F_VL:
16991 Source = Source.getOperand(1);
16992 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
16993 Source = Source.getOperand(0);
16994 assert(Source.getValueType() == NarrowVT.getVectorElementType());
16995 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
16996 DAG.getUNDEF(NarrowVT), Source, VL);
16997 default:
16998 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
16999 // and that operand should already have the right NarrowVT so no
17000 // extension should be required at this point.
17001 llvm_unreachable("Unsupported opcode");
17002 }
17003 }
17004
17005 /// Helper function to get the narrow type for \p Root.
17006 /// The narrow type is the type of \p Root where we divided the size of each
17007 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17008 /// \pre Both the narrow type and the original type should be legal.
17009 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17010 MVT VT = Root->getSimpleValueType(0);
17011
17012 // Determine the narrow size.
17013 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17014
17015 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17016 : SupportsExt == ExtKind::FPExt
17017 ? MVT::getFloatingPointVT(NarrowSize)
17018 : MVT::getIntegerVT(NarrowSize);
17019
17020 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17021 "Trying to extend something we can't represent");
17022 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17023 return NarrowVT;
17024 }
17025
17026 /// Get the opcode to materialize:
17027 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17028 static unsigned getSExtOpcode(unsigned Opcode) {
17029 switch (Opcode) {
17030 case ISD::ADD:
17031 case RISCVISD::ADD_VL:
17032 case RISCVISD::VWADD_W_VL:
17033 case RISCVISD::VWADDU_W_VL:
17034 case ISD::OR:
17035 case RISCVISD::OR_VL:
17036 return RISCVISD::VWADD_VL;
17037 case ISD::SUB:
17038 case RISCVISD::SUB_VL:
17039 case RISCVISD::VWSUB_W_VL:
17040 case RISCVISD::VWSUBU_W_VL:
17041 return RISCVISD::VWSUB_VL;
17042 case ISD::MUL:
17043 case RISCVISD::MUL_VL:
17044 return RISCVISD::VWMUL_VL;
17045 default:
17046 llvm_unreachable("Unexpected opcode");
17047 }
17048 }
17049
17050 /// Get the opcode to materialize:
17051 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17052 static unsigned getZExtOpcode(unsigned Opcode) {
17053 switch (Opcode) {
17054 case ISD::ADD:
17055 case RISCVISD::ADD_VL:
17056 case RISCVISD::VWADD_W_VL:
17057 case RISCVISD::VWADDU_W_VL:
17058 case ISD::OR:
17059 case RISCVISD::OR_VL:
17060 return RISCVISD::VWADDU_VL;
17061 case ISD::SUB:
17062 case RISCVISD::SUB_VL:
17063 case RISCVISD::VWSUB_W_VL:
17064 case RISCVISD::VWSUBU_W_VL:
17065 return RISCVISD::VWSUBU_VL;
17066 case ISD::MUL:
17067 case RISCVISD::MUL_VL:
17068 return RISCVISD::VWMULU_VL;
17069 case ISD::SHL:
17070 case RISCVISD::SHL_VL:
17071 return RISCVISD::VWSLL_VL;
17072 default:
17073 llvm_unreachable("Unexpected opcode");
17074 }
17075 }
17076
17077 /// Get the opcode to materialize:
17078 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17079 static unsigned getFPExtOpcode(unsigned Opcode) {
17080 switch (Opcode) {
17081 case RISCVISD::FADD_VL:
17082 case RISCVISD::VFWADD_W_VL:
17083 return RISCVISD::VFWADD_VL;
17084 case RISCVISD::FSUB_VL:
17085 case RISCVISD::VFWSUB_W_VL:
17086 return RISCVISD::VFWSUB_VL;
17087 case RISCVISD::FMUL_VL:
17088 return RISCVISD::VFWMUL_VL;
17089 case RISCVISD::VFMADD_VL:
17090 return RISCVISD::VFWMADD_VL;
17091 case RISCVISD::VFMSUB_VL:
17092 return RISCVISD::VFWMSUB_VL;
17093 case RISCVISD::VFNMADD_VL:
17094 return RISCVISD::VFWNMADD_VL;
17095 case RISCVISD::VFNMSUB_VL:
17096 return RISCVISD::VFWNMSUB_VL;
17097 default:
17098 llvm_unreachable("Unexpected opcode");
17099 }
17100 }
17101
17102 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17103 /// newOpcode(a, b).
17104 static unsigned getSUOpcode(unsigned Opcode) {
17105 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17106 "SU is only supported for MUL");
17107 return RISCVISD::VWMULSU_VL;
17108 }
17109
17110 /// Get the opcode to materialize
17111 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17112 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17113 switch (Opcode) {
17114 case ISD::ADD:
17115 case RISCVISD::ADD_VL:
17116 case ISD::OR:
17117 case RISCVISD::OR_VL:
17118 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17119 : RISCVISD::VWADDU_W_VL;
17120 case ISD::SUB:
17121 case RISCVISD::SUB_VL:
17122 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17123 : RISCVISD::VWSUBU_W_VL;
17124 case RISCVISD::FADD_VL:
17125 return RISCVISD::VFWADD_W_VL;
17126 case RISCVISD::FSUB_VL:
17127 return RISCVISD::VFWSUB_W_VL;
17128 default:
17129 llvm_unreachable("Unexpected opcode");
17130 }
17131 }
17132
17133 using CombineToTry = std::function<std::optional<CombineResult>(
17134 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17135 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17136 const RISCVSubtarget &)>;
17137
17138 /// Check if this node needs to be fully folded or extended for all users.
17139 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17140
17141 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17142 const RISCVSubtarget &Subtarget) {
17143 unsigned Opc = OrigOperand.getOpcode();
17144 MVT VT = OrigOperand.getSimpleValueType();
17145
17146 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17147 "Unexpected Opcode");
17148
17149 // The pasthru must be undef for tail agnostic.
17150 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17151 return;
17152
17153 // Get the scalar value.
17154 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17155 : OrigOperand.getOperand(1);
17156
17157 // See if we have enough sign bits or zero bits in the scalar to use a
17158 // widening opcode by splatting to smaller element size.
17159 unsigned EltBits = VT.getScalarSizeInBits();
17160 unsigned ScalarBits = Op.getValueSizeInBits();
17161 // If we're not getting all bits from the element, we need special handling.
17162 if (ScalarBits < EltBits) {
17163 // This should only occur on RV32.
17164 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17165 !Subtarget.is64Bit() && "Unexpected splat");
17166 // vmv.v.x sign extends narrow inputs.
17167 SupportsSExt = true;
17168
17169 // If the input is positive, then sign extend is also zero extend.
17170 if (DAG.SignBitIsZero(Op))
17171 SupportsZExt = true;
17172
17173 EnforceOneUse = false;
17174 return;
17175 }
17176
17177 unsigned NarrowSize = EltBits / 2;
17178 // If the narrow type cannot be expressed with a legal VMV,
17179 // this is not a valid candidate.
17180 if (NarrowSize < 8)
17181 return;
17182
17183 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17184 SupportsSExt = true;
17185
17186 if (DAG.MaskedValueIsZero(Op,
17187 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17188 SupportsZExt = true;
17189
17190 EnforceOneUse = false;
17191 }
17192
17193 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17194 return (NarrowEltVT == MVT::f32 ||
17195 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17196 }
17197
17198 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17199 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17200 }
17201
17202 /// Helper method to set the various fields of this struct based on the
17203 /// type of \p Root.
17204 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17205 const RISCVSubtarget &Subtarget) {
17206 SupportsZExt = false;
17207 SupportsSExt = false;
17208 SupportsFPExt = false;
17209 SupportsBF16Ext = false;
17210 EnforceOneUse = true;
17211 unsigned Opc = OrigOperand.getOpcode();
17212 // For the nodes we handle below, we end up using their inputs directly: see
17213 // getSource(). However since they either don't have a passthru or we check
17214 // that their passthru is undef, we can safely ignore their mask and VL.
17215 switch (Opc) {
17216 case ISD::ZERO_EXTEND:
17217 case ISD::SIGN_EXTEND: {
17218 MVT VT = OrigOperand.getSimpleValueType();
17219 if (!VT.isVector())
17220 break;
17221
17222 SDValue NarrowElt = OrigOperand.getOperand(0);
17223 MVT NarrowVT = NarrowElt.getSimpleValueType();
17224 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17225 if (NarrowVT.getVectorElementType() == MVT::i1)
17226 break;
17227
17228 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17229 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17230 break;
17231 }
17232 case RISCVISD::VZEXT_VL:
17233 SupportsZExt = true;
17234 break;
17235 case RISCVISD::VSEXT_VL:
17236 SupportsSExt = true;
17237 break;
17238 case RISCVISD::FP_EXTEND_VL: {
17239 MVT NarrowEltVT =
17241 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17242 SupportsFPExt = true;
17243 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17244 SupportsBF16Ext = true;
17245
17246 break;
17247 }
17248 case ISD::SPLAT_VECTOR:
17249 case RISCVISD::VMV_V_X_VL:
17250 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17251 break;
17252 case RISCVISD::VFMV_V_F_VL: {
17253 MVT VT = OrigOperand.getSimpleValueType();
17254
17255 if (!OrigOperand.getOperand(0).isUndef())
17256 break;
17257
17258 SDValue Op = OrigOperand.getOperand(1);
17259 if (Op.getOpcode() != ISD::FP_EXTEND)
17260 break;
17261
17262 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17263 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17264 if (NarrowSize != ScalarBits)
17265 break;
17266
17267 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17268 SupportsFPExt = true;
17269 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17270 Subtarget))
17271 SupportsBF16Ext = true;
17272 break;
17273 }
17274 default:
17275 break;
17276 }
17277 }
17278
17279 /// Check if \p Root supports any extension folding combines.
17280 static bool isSupportedRoot(const SDNode *Root,
17281 const RISCVSubtarget &Subtarget) {
17282 switch (Root->getOpcode()) {
17283 case ISD::ADD:
17284 case ISD::SUB:
17285 case ISD::MUL: {
17286 return Root->getValueType(0).isScalableVector();
17287 }
17288 case ISD::OR: {
17289 return Root->getValueType(0).isScalableVector() &&
17290 Root->getFlags().hasDisjoint();
17291 }
17292 // Vector Widening Integer Add/Sub/Mul Instructions
17293 case RISCVISD::ADD_VL:
17294 case RISCVISD::MUL_VL:
17295 case RISCVISD::VWADD_W_VL:
17296 case RISCVISD::VWADDU_W_VL:
17297 case RISCVISD::SUB_VL:
17298 case RISCVISD::VWSUB_W_VL:
17299 case RISCVISD::VWSUBU_W_VL:
17300 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17301 case RISCVISD::FADD_VL:
17302 case RISCVISD::FSUB_VL:
17303 case RISCVISD::FMUL_VL:
17304 case RISCVISD::VFWADD_W_VL:
17305 case RISCVISD::VFWSUB_W_VL:
17306 return true;
17307 case RISCVISD::OR_VL:
17308 return Root->getFlags().hasDisjoint();
17309 case ISD::SHL:
17310 return Root->getValueType(0).isScalableVector() &&
17311 Subtarget.hasStdExtZvbb();
17312 case RISCVISD::SHL_VL:
17313 return Subtarget.hasStdExtZvbb();
17314 case RISCVISD::VFMADD_VL:
17315 case RISCVISD::VFNMSUB_VL:
17316 case RISCVISD::VFNMADD_VL:
17317 case RISCVISD::VFMSUB_VL:
17318 return true;
17319 default:
17320 return false;
17321 }
17322 }
17323
17324 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17325 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17326 const RISCVSubtarget &Subtarget) {
17327 assert(isSupportedRoot(Root, Subtarget) &&
17328 "Trying to build an helper with an "
17329 "unsupported root");
17330 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17332 OrigOperand = Root->getOperand(OperandIdx);
17333
17334 unsigned Opc = Root->getOpcode();
17335 switch (Opc) {
17336 // We consider
17337 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17338 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17339 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17340 case RISCVISD::VWADD_W_VL:
17341 case RISCVISD::VWADDU_W_VL:
17342 case RISCVISD::VWSUB_W_VL:
17343 case RISCVISD::VWSUBU_W_VL:
17344 case RISCVISD::VFWADD_W_VL:
17345 case RISCVISD::VFWSUB_W_VL:
17346 if (OperandIdx == 1) {
17347 SupportsZExt =
17348 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
17349 SupportsSExt =
17350 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
17351 SupportsFPExt =
17352 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
17353 // There's no existing extension here, so we don't have to worry about
17354 // making sure it gets removed.
17355 EnforceOneUse = false;
17356 break;
17357 }
17358 [[fallthrough]];
17359 default:
17360 fillUpExtensionSupport(Root, DAG, Subtarget);
17361 break;
17362 }
17363 }
17364
17365 /// Helper function to get the Mask and VL from \p Root.
17366 static std::pair<SDValue, SDValue>
17367 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17368 const RISCVSubtarget &Subtarget) {
17369 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17370 switch (Root->getOpcode()) {
17371 case ISD::ADD:
17372 case ISD::SUB:
17373 case ISD::MUL:
17374 case ISD::OR:
17375 case ISD::SHL: {
17376 SDLoc DL(Root);
17377 MVT VT = Root->getSimpleValueType(0);
17378 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17379 }
17380 default:
17381 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17382 }
17383 }
17384
17385 /// Helper function to check if \p N is commutative with respect to the
17386 /// foldings that are supported by this class.
17387 static bool isCommutative(const SDNode *N) {
17388 switch (N->getOpcode()) {
17389 case ISD::ADD:
17390 case ISD::MUL:
17391 case ISD::OR:
17392 case RISCVISD::ADD_VL:
17393 case RISCVISD::MUL_VL:
17394 case RISCVISD::OR_VL:
17395 case RISCVISD::VWADD_W_VL:
17396 case RISCVISD::VWADDU_W_VL:
17397 case RISCVISD::FADD_VL:
17398 case RISCVISD::FMUL_VL:
17399 case RISCVISD::VFWADD_W_VL:
17400 case RISCVISD::VFMADD_VL:
17401 case RISCVISD::VFNMSUB_VL:
17402 case RISCVISD::VFNMADD_VL:
17403 case RISCVISD::VFMSUB_VL:
17404 return true;
17405 case ISD::SUB:
17406 case RISCVISD::SUB_VL:
17407 case RISCVISD::VWSUB_W_VL:
17408 case RISCVISD::VWSUBU_W_VL:
17409 case RISCVISD::FSUB_VL:
17410 case RISCVISD::VFWSUB_W_VL:
17411 case ISD::SHL:
17412 case RISCVISD::SHL_VL:
17413 return false;
17414 default:
17415 llvm_unreachable("Unexpected opcode");
17416 }
17417 }
17418
17419 /// Get a list of combine to try for folding extensions in \p Root.
17420 /// Note that each returned CombineToTry function doesn't actually modify
17421 /// anything. Instead they produce an optional CombineResult that if not None,
17422 /// need to be materialized for the combine to be applied.
17423 /// \see CombineResult::materialize.
17424 /// If the related CombineToTry function returns std::nullopt, that means the
17425 /// combine didn't match.
17426 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17427};
17428
17429/// Helper structure that holds all the necessary information to materialize a
17430/// combine that does some extension folding.
17431struct CombineResult {
17432 /// Opcode to be generated when materializing the combine.
17433 unsigned TargetOpcode;
17434 // No value means no extension is needed.
17435 std::optional<ExtKind> LHSExt;
17436 std::optional<ExtKind> RHSExt;
17437 /// Root of the combine.
17438 SDNode *Root;
17439 /// LHS of the TargetOpcode.
17440 NodeExtensionHelper LHS;
17441 /// RHS of the TargetOpcode.
17442 NodeExtensionHelper RHS;
17443
17444 CombineResult(unsigned TargetOpcode, SDNode *Root,
17445 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17446 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17447 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17448 LHS(LHS), RHS(RHS) {}
17449
17450 /// Return a value that uses TargetOpcode and that can be used to replace
17451 /// Root.
17452 /// The actual replacement is *not* done in that method.
17453 SDValue materialize(SelectionDAG &DAG,
17454 const RISCVSubtarget &Subtarget) const {
17455 SDValue Mask, VL, Passthru;
17456 std::tie(Mask, VL) =
17457 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17458 switch (Root->getOpcode()) {
17459 default:
17460 Passthru = Root->getOperand(2);
17461 break;
17462 case ISD::ADD:
17463 case ISD::SUB:
17464 case ISD::MUL:
17465 case ISD::OR:
17466 case ISD::SHL:
17467 Passthru = DAG.getUNDEF(Root->getValueType(0));
17468 break;
17469 }
17470 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17471 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17472 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17473 Passthru, Mask, VL);
17474 }
17475};
17476
17477/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17478/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17479/// are zext) and LHS and RHS can be folded into Root.
17480/// AllowExtMask define which form `ext` can take in this pattern.
17481///
17482/// \note If the pattern can match with both zext and sext, the returned
17483/// CombineResult will feature the zext result.
17484///
17485/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17486/// can be used to apply the pattern.
17487static std::optional<CombineResult>
17488canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17489 const NodeExtensionHelper &RHS,
17490 uint8_t AllowExtMask, SelectionDAG &DAG,
17491 const RISCVSubtarget &Subtarget) {
17492 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17493 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17494 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17495 /*RHSExt=*/{ExtKind::ZExt});
17496 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17497 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17498 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17499 /*RHSExt=*/{ExtKind::SExt});
17500 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17501 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17502 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17503 /*RHSExt=*/{ExtKind::FPExt});
17504 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17505 RHS.SupportsBF16Ext)
17506 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17507 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17508 /*RHSExt=*/{ExtKind::BF16Ext});
17509 return std::nullopt;
17510}
17511
17512/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17513/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17514/// are zext) and LHS and RHS can be folded into Root.
17515///
17516/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17517/// can be used to apply the pattern.
17518static std::optional<CombineResult>
17519canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17520 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17521 const RISCVSubtarget &Subtarget) {
17522 return canFoldToVWWithSameExtensionImpl(
17523 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17524 Subtarget);
17525}
17526
17527/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17528///
17529/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17530/// can be used to apply the pattern.
17531static std::optional<CombineResult>
17532canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17533 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17534 const RISCVSubtarget &Subtarget) {
17535 if (RHS.SupportsFPExt)
17536 return CombineResult(
17537 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17538 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17539
17540 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17541 // sext/zext?
17542 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17543 // purposes.
17544 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17545 return CombineResult(
17546 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17547 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17548 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17549 return CombineResult(
17550 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17551 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17552 return std::nullopt;
17553}
17554
17555/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17556///
17557/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17558/// can be used to apply the pattern.
17559static std::optional<CombineResult>
17560canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17561 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17562 const RISCVSubtarget &Subtarget) {
17563 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
17564 Subtarget);
17565}
17566
17567/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17568///
17569/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17570/// can be used to apply the pattern.
17571static std::optional<CombineResult>
17572canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17573 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17574 const RISCVSubtarget &Subtarget) {
17575 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17576 Subtarget);
17577}
17578
17579/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17580///
17581/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17582/// can be used to apply the pattern.
17583static std::optional<CombineResult>
17584canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17585 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17586 const RISCVSubtarget &Subtarget) {
17587 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
17588 Subtarget);
17589}
17590
17591/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17592///
17593/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17594/// can be used to apply the pattern.
17595static std::optional<CombineResult>
17596canFoldToVWWithBF16EXT(SDNode *Root, const NodeExtensionHelper &LHS,
17597 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17598 const RISCVSubtarget &Subtarget) {
17599 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17600 Subtarget);
17601}
17602
17603/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17604///
17605/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17606/// can be used to apply the pattern.
17607static std::optional<CombineResult>
17608canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17609 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17610 const RISCVSubtarget &Subtarget) {
17611
17612 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17613 return std::nullopt;
17614 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17615 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17616 /*RHSExt=*/{ExtKind::ZExt});
17617}
17618
17620NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17621 SmallVector<CombineToTry> Strategies;
17622 switch (Root->getOpcode()) {
17623 case ISD::ADD:
17624 case ISD::SUB:
17625 case ISD::OR:
17626 case RISCVISD::ADD_VL:
17627 case RISCVISD::SUB_VL:
17628 case RISCVISD::OR_VL:
17629 case RISCVISD::FADD_VL:
17630 case RISCVISD::FSUB_VL:
17631 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17632 Strategies.push_back(canFoldToVWWithSameExtension);
17633 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17634 Strategies.push_back(canFoldToVW_W);
17635 break;
17636 case RISCVISD::FMUL_VL:
17637 case RISCVISD::VFMADD_VL:
17638 case RISCVISD::VFMSUB_VL:
17639 case RISCVISD::VFNMADD_VL:
17640 case RISCVISD::VFNMSUB_VL:
17641 Strategies.push_back(canFoldToVWWithSameExtension);
17642 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17643 Strategies.push_back(canFoldToVWWithBF16EXT);
17644 break;
17645 case ISD::MUL:
17646 case RISCVISD::MUL_VL:
17647 // mul -> vwmul(u)
17648 Strategies.push_back(canFoldToVWWithSameExtension);
17649 // mul -> vwmulsu
17650 Strategies.push_back(canFoldToVW_SU);
17651 break;
17652 case ISD::SHL:
17653 case RISCVISD::SHL_VL:
17654 // shl -> vwsll
17655 Strategies.push_back(canFoldToVWWithZEXT);
17656 break;
17657 case RISCVISD::VWADD_W_VL:
17658 case RISCVISD::VWSUB_W_VL:
17659 // vwadd_w|vwsub_w -> vwadd|vwsub
17660 Strategies.push_back(canFoldToVWWithSEXT);
17661 break;
17662 case RISCVISD::VWADDU_W_VL:
17663 case RISCVISD::VWSUBU_W_VL:
17664 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17665 Strategies.push_back(canFoldToVWWithZEXT);
17666 break;
17667 case RISCVISD::VFWADD_W_VL:
17668 case RISCVISD::VFWSUB_W_VL:
17669 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17670 Strategies.push_back(canFoldToVWWithFPEXT);
17671 break;
17672 default:
17673 llvm_unreachable("Unexpected opcode");
17674 }
17675 return Strategies;
17676}
17677} // End anonymous namespace.
17678
17680 // TODO: Extend this to other binops using generic identity logic
17681 assert(N->getOpcode() == RISCVISD::ADD_VL);
17682 SDValue A = N->getOperand(0);
17683 SDValue B = N->getOperand(1);
17684 SDValue Passthru = N->getOperand(2);
17685 if (!Passthru.isUndef())
17686 // TODO:This could be a vmerge instead
17687 return SDValue();
17688 ;
17690 return A;
17691 // Peek through fixed to scalable
17692 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17693 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17694 return A;
17695 return SDValue();
17696}
17697
17698/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17699/// The supported combines are:
17700/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17701/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17702/// mul | mul_vl -> vwmul(u) | vwmul_su
17703/// shl | shl_vl -> vwsll
17704/// fadd_vl -> vfwadd | vfwadd_w
17705/// fsub_vl -> vfwsub | vfwsub_w
17706/// fmul_vl -> vfwmul
17707/// vwadd_w(u) -> vwadd(u)
17708/// vwsub_w(u) -> vwsub(u)
17709/// vfwadd_w -> vfwadd
17710/// vfwsub_w -> vfwsub
17713 const RISCVSubtarget &Subtarget) {
17714 SelectionDAG &DAG = DCI.DAG;
17715 if (DCI.isBeforeLegalize())
17716 return SDValue();
17717
17718 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17719 return SDValue();
17720
17721 SmallVector<SDNode *> Worklist;
17722 SmallPtrSet<SDNode *, 8> Inserted;
17723 Worklist.push_back(N);
17724 Inserted.insert(N);
17725 SmallVector<CombineResult> CombinesToApply;
17726
17727 while (!Worklist.empty()) {
17728 SDNode *Root = Worklist.pop_back_val();
17729
17730 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17731 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17732 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17733 &Inserted](const NodeExtensionHelper &Op) {
17734 if (Op.needToPromoteOtherUsers()) {
17735 for (SDUse &Use : Op.OrigOperand->uses()) {
17736 SDNode *TheUser = Use.getUser();
17737 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17738 return false;
17739 // We only support the first 2 operands of FMA.
17740 if (Use.getOperandNo() >= 2)
17741 return false;
17742 if (Inserted.insert(TheUser).second)
17743 Worklist.push_back(TheUser);
17744 }
17745 }
17746 return true;
17747 };
17748
17749 // Control the compile time by limiting the number of node we look at in
17750 // total.
17751 if (Inserted.size() > ExtensionMaxWebSize)
17752 return SDValue();
17753
17755 NodeExtensionHelper::getSupportedFoldings(Root);
17756
17757 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17758 bool Matched = false;
17759 for (int Attempt = 0;
17760 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17761 ++Attempt) {
17762
17763 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17764 FoldingStrategies) {
17765 std::optional<CombineResult> Res =
17766 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17767 if (Res) {
17768 Matched = true;
17769 CombinesToApply.push_back(*Res);
17770 // All the inputs that are extended need to be folded, otherwise
17771 // we would be leaving the old input (since it is may still be used),
17772 // and the new one.
17773 if (Res->LHSExt.has_value())
17774 if (!AppendUsersIfNeeded(LHS))
17775 return SDValue();
17776 if (Res->RHSExt.has_value())
17777 if (!AppendUsersIfNeeded(RHS))
17778 return SDValue();
17779 break;
17780 }
17781 }
17782 std::swap(LHS, RHS);
17783 }
17784 // Right now we do an all or nothing approach.
17785 if (!Matched)
17786 return SDValue();
17787 }
17788 // Store the value for the replacement of the input node separately.
17789 SDValue InputRootReplacement;
17790 // We do the RAUW after we materialize all the combines, because some replaced
17791 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17792 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17793 // yet-to-be-visited CombinesToApply roots.
17795 ValuesToReplace.reserve(CombinesToApply.size());
17796 for (CombineResult Res : CombinesToApply) {
17797 SDValue NewValue = Res.materialize(DAG, Subtarget);
17798 if (!InputRootReplacement) {
17799 assert(Res.Root == N &&
17800 "First element is expected to be the current node");
17801 InputRootReplacement = NewValue;
17802 } else {
17803 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17804 }
17805 }
17806 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17807 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17808 DCI.AddToWorklist(OldNewValues.second.getNode());
17809 }
17810 return InputRootReplacement;
17811}
17812
17813// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17814// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17815// y will be the Passthru and cond will be the Mask.
17817 unsigned Opc = N->getOpcode();
17818 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17819 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17820
17821 SDValue Y = N->getOperand(0);
17822 SDValue MergeOp = N->getOperand(1);
17823 unsigned MergeOpc = MergeOp.getOpcode();
17824
17825 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17826 return SDValue();
17827
17828 SDValue X = MergeOp->getOperand(1);
17829
17830 if (!MergeOp.hasOneUse())
17831 return SDValue();
17832
17833 // Passthru should be undef
17834 SDValue Passthru = N->getOperand(2);
17835 if (!Passthru.isUndef())
17836 return SDValue();
17837
17838 // Mask should be all ones
17839 SDValue Mask = N->getOperand(3);
17840 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17841 return SDValue();
17842
17843 // False value of MergeOp should be all zeros
17844 SDValue Z = MergeOp->getOperand(2);
17845
17846 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17847 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17848 Z = Z.getOperand(1);
17849
17850 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17851 return SDValue();
17852
17853 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17854 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17855 N->getFlags());
17856}
17857
17860 const RISCVSubtarget &Subtarget) {
17861 [[maybe_unused]] unsigned Opc = N->getOpcode();
17862 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17863 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17864
17865 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17866 return V;
17867
17868 return combineVWADDSUBWSelect(N, DCI.DAG);
17869}
17870
17871// Helper function for performMemPairCombine.
17872// Try to combine the memory loads/stores LSNode1 and LSNode2
17873// into a single memory pair operation.
17875 LSBaseSDNode *LSNode2, SDValue BasePtr,
17876 uint64_t Imm) {
17878 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17879
17880 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17881 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17882 return SDValue();
17883
17885 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17886
17887 // The new operation has twice the width.
17888 MVT XLenVT = Subtarget.getXLenVT();
17889 EVT MemVT = LSNode1->getMemoryVT();
17890 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17891 MachineMemOperand *MMO = LSNode1->getMemOperand();
17893 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17894
17895 if (LSNode1->getOpcode() == ISD::LOAD) {
17896 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17897 unsigned Opcode;
17898 if (MemVT == MVT::i32)
17899 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17900 else
17901 Opcode = RISCVISD::TH_LDD;
17902
17903 SDValue Res = DAG.getMemIntrinsicNode(
17904 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17905 {LSNode1->getChain(), BasePtr,
17906 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17907 NewMemVT, NewMMO);
17908
17909 SDValue Node1 =
17910 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17911 SDValue Node2 =
17912 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17913
17914 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17915 return Node1;
17916 } else {
17917 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17918
17919 SDValue Res = DAG.getMemIntrinsicNode(
17920 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17921 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17922 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17923 NewMemVT, NewMMO);
17924
17925 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17926 return Res;
17927 }
17928}
17929
17930// Try to combine two adjacent loads/stores to a single pair instruction from
17931// the XTHeadMemPair vendor extension.
17934 SelectionDAG &DAG = DCI.DAG;
17936 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17937
17938 // Target does not support load/store pair.
17939 if (!Subtarget.hasVendorXTHeadMemPair())
17940 return SDValue();
17941
17942 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17943 EVT MemVT = LSNode1->getMemoryVT();
17944 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17945
17946 // No volatile, indexed or atomic loads/stores.
17947 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17948 return SDValue();
17949
17950 // Function to get a base + constant representation from a memory value.
17951 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17952 if (Ptr->getOpcode() == ISD::ADD)
17953 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17954 return {Ptr->getOperand(0), C1->getZExtValue()};
17955 return {Ptr, 0};
17956 };
17957
17958 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17959
17960 SDValue Chain = N->getOperand(0);
17961 for (SDUse &Use : Chain->uses()) {
17962 if (Use.getUser() != N && Use.getResNo() == 0 &&
17963 Use.getUser()->getOpcode() == N->getOpcode()) {
17965
17966 // No volatile, indexed or atomic loads/stores.
17967 if (!LSNode2->isSimple() || LSNode2->isIndexed())
17968 continue;
17969
17970 // Check if LSNode1 and LSNode2 have the same type and extension.
17971 if (LSNode1->getOpcode() == ISD::LOAD)
17972 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
17974 continue;
17975
17976 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17977 continue;
17978
17979 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
17980
17981 // Check if the base pointer is the same for both instruction.
17982 if (Base1 != Base2)
17983 continue;
17984
17985 // Check if the offsets match the XTHeadMemPair encoding constraints.
17986 bool Valid = false;
17987 if (MemVT == MVT::i32) {
17988 // Check for adjacent i32 values and a 2-bit index.
17989 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
17990 Valid = true;
17991 } else if (MemVT == MVT::i64) {
17992 // Check for adjacent i64 values and a 2-bit index.
17993 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
17994 Valid = true;
17995 }
17996
17997 if (!Valid)
17998 continue;
17999
18000 // Try to combine.
18001 if (SDValue Res =
18002 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18003 return Res;
18004 }
18005 }
18006
18007 return SDValue();
18008}
18009
18010// Fold
18011// (fp_to_int (froundeven X)) -> fcvt X, rne
18012// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18013// (fp_to_int (ffloor X)) -> fcvt X, rdn
18014// (fp_to_int (fceil X)) -> fcvt X, rup
18015// (fp_to_int (fround X)) -> fcvt X, rmm
18016// (fp_to_int (frint X)) -> fcvt X
18019 const RISCVSubtarget &Subtarget) {
18020 SelectionDAG &DAG = DCI.DAG;
18021 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18022 MVT XLenVT = Subtarget.getXLenVT();
18023
18024 SDValue Src = N->getOperand(0);
18025
18026 // Don't do this for strict-fp Src.
18027 if (Src->isStrictFPOpcode())
18028 return SDValue();
18029
18030 // Ensure the FP type is legal.
18031 if (!TLI.isTypeLegal(Src.getValueType()))
18032 return SDValue();
18033
18034 // Don't do this for f16 with Zfhmin and not Zfh.
18035 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18036 return SDValue();
18037
18038 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18039 // If the result is invalid, we didn't find a foldable instruction.
18040 if (FRM == RISCVFPRndMode::Invalid)
18041 return SDValue();
18042
18043 SDLoc DL(N);
18044 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18045 EVT VT = N->getValueType(0);
18046
18047 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18048 MVT SrcVT = Src.getSimpleValueType();
18049 MVT SrcContainerVT = SrcVT;
18050 MVT ContainerVT = VT.getSimpleVT();
18051 SDValue XVal = Src.getOperand(0);
18052
18053 // For widening and narrowing conversions we just combine it into a
18054 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18055 // end up getting lowered to their appropriate pseudo instructions based on
18056 // their operand types
18057 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18058 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18059 return SDValue();
18060
18061 // Make fixed-length vectors scalable first
18062 if (SrcVT.isFixedLengthVector()) {
18063 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18064 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18065 ContainerVT =
18066 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18067 }
18068
18069 auto [Mask, VL] =
18070 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18071
18072 SDValue FpToInt;
18073 if (FRM == RISCVFPRndMode::RTZ) {
18074 // Use the dedicated trunc static rounding mode if we're truncating so we
18075 // don't need to generate calls to fsrmi/fsrm
18076 unsigned Opc =
18077 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18078 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18079 } else {
18080 unsigned Opc =
18081 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18082 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18083 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18084 }
18085
18086 // If converted from fixed-length to scalable, convert back
18087 if (VT.isFixedLengthVector())
18088 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18089
18090 return FpToInt;
18091 }
18092
18093 // Only handle XLen or i32 types. Other types narrower than XLen will
18094 // eventually be legalized to XLenVT.
18095 if (VT != MVT::i32 && VT != XLenVT)
18096 return SDValue();
18097
18098 unsigned Opc;
18099 if (VT == XLenVT)
18100 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18101 else
18102 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18103
18104 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18105 DAG.getTargetConstant(FRM, DL, XLenVT));
18106 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18107}
18108
18109// Fold
18110// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18111// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18112// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18113// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18114// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18115// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18118 const RISCVSubtarget &Subtarget) {
18119 SelectionDAG &DAG = DCI.DAG;
18120 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18121 MVT XLenVT = Subtarget.getXLenVT();
18122
18123 // Only handle XLen types. Other types narrower than XLen will eventually be
18124 // legalized to XLenVT.
18125 EVT DstVT = N->getValueType(0);
18126 if (DstVT != XLenVT)
18127 return SDValue();
18128
18129 SDValue Src = N->getOperand(0);
18130
18131 // Don't do this for strict-fp Src.
18132 if (Src->isStrictFPOpcode())
18133 return SDValue();
18134
18135 // Ensure the FP type is also legal.
18136 if (!TLI.isTypeLegal(Src.getValueType()))
18137 return SDValue();
18138
18139 // Don't do this for f16 with Zfhmin and not Zfh.
18140 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18141 return SDValue();
18142
18143 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18144
18145 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18146 if (FRM == RISCVFPRndMode::Invalid)
18147 return SDValue();
18148
18149 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18150
18151 unsigned Opc;
18152 if (SatVT == DstVT)
18153 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18154 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18155 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18156 else
18157 return SDValue();
18158 // FIXME: Support other SatVTs by clamping before or after the conversion.
18159
18160 Src = Src.getOperand(0);
18161
18162 SDLoc DL(N);
18163 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18164 DAG.getTargetConstant(FRM, DL, XLenVT));
18165
18166 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18167 // extend.
18168 if (Opc == RISCVISD::FCVT_WU_RV64)
18169 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18170
18171 // RISC-V FP-to-int conversions saturate to the destination register size, but
18172 // don't produce 0 for nan.
18173 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18174 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18175}
18176
18177// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18178// smaller than XLenVT.
18180 const RISCVSubtarget &Subtarget) {
18181 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18182
18183 SDValue Src = N->getOperand(0);
18184 if (Src.getOpcode() != ISD::BSWAP)
18185 return SDValue();
18186
18187 EVT VT = N->getValueType(0);
18188 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18190 return SDValue();
18191
18192 SDLoc DL(N);
18193 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18194}
18195
18197 const RISCVSubtarget &Subtarget) {
18198 // Fold:
18199 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18200
18201 // Check if its first operand is a vp.load.
18202 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18203 if (!VPLoad)
18204 return SDValue();
18205
18206 EVT LoadVT = VPLoad->getValueType(0);
18207 // We do not have a strided_load version for masks, and the evl of vp.reverse
18208 // and vp.load should always be the same.
18209 if (!LoadVT.getVectorElementType().isByteSized() ||
18210 N->getOperand(2) != VPLoad->getVectorLength() ||
18211 !N->getOperand(0).hasOneUse())
18212 return SDValue();
18213
18214 // Check if the mask of outer vp.reverse are all 1's.
18215 if (!isOneOrOneSplat(N->getOperand(1)))
18216 return SDValue();
18217
18218 SDValue LoadMask = VPLoad->getMask();
18219 // If Mask is all ones, then load is unmasked and can be reversed.
18220 if (!isOneOrOneSplat(LoadMask)) {
18221 // If the mask is not all ones, we can reverse the load if the mask was also
18222 // reversed by an unmasked vp.reverse with the same EVL.
18223 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18224 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18225 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18226 return SDValue();
18227 LoadMask = LoadMask.getOperand(0);
18228 }
18229
18230 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18231 SDLoc DL(N);
18232 MVT XLenVT = Subtarget.getXLenVT();
18233 SDValue NumElem = VPLoad->getVectorLength();
18234 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18235
18236 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18237 DAG.getConstant(1, DL, XLenVT));
18238 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18239 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18240 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18241 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18242
18244 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18246 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18247 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18248
18249 SDValue Ret = DAG.getStridedLoadVP(
18250 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18251 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18252
18253 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18254
18255 return Ret;
18256}
18257
18259 const RISCVSubtarget &Subtarget) {
18260 // Fold:
18261 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18262 // -1, MASK)
18263 auto *VPStore = cast<VPStoreSDNode>(N);
18264
18265 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18266 return SDValue();
18267
18268 SDValue VPReverse = VPStore->getValue();
18269 EVT ReverseVT = VPReverse->getValueType(0);
18270
18271 // We do not have a strided_store version for masks, and the evl of vp.reverse
18272 // and vp.store should always be the same.
18273 if (!ReverseVT.getVectorElementType().isByteSized() ||
18274 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18275 !VPReverse.hasOneUse())
18276 return SDValue();
18277
18278 SDValue StoreMask = VPStore->getMask();
18279 // If Mask is all ones, then load is unmasked and can be reversed.
18280 if (!isOneOrOneSplat(StoreMask)) {
18281 // If the mask is not all ones, we can reverse the store if the mask was
18282 // also reversed by an unmasked vp.reverse with the same EVL.
18283 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18284 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18285 StoreMask.getOperand(2) != VPStore->getVectorLength())
18286 return SDValue();
18287 StoreMask = StoreMask.getOperand(0);
18288 }
18289
18290 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18291 SDLoc DL(N);
18292 MVT XLenVT = Subtarget.getXLenVT();
18293 SDValue NumElem = VPStore->getVectorLength();
18294 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18295
18296 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18297 DAG.getConstant(1, DL, XLenVT));
18298 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18299 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18300 SDValue Base =
18301 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18302 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18303
18305 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18307 PtrInfo, VPStore->getMemOperand()->getFlags(),
18308 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18309
18310 return DAG.getStridedStoreVP(
18311 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18312 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18313 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18314 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18315}
18316
18317// Peephole avgceil pattern.
18318// %1 = zext <N x i8> %a to <N x i32>
18319// %2 = zext <N x i8> %b to <N x i32>
18320// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18321// %4 = add nuw nsw <N x i32> %3, %2
18322// %5 = lshr <N x i32> %4, splat (i32 1)
18323// %6 = trunc <N x i32> %5 to <N x i8>
18325 const RISCVSubtarget &Subtarget) {
18326 EVT VT = N->getValueType(0);
18327
18328 // Ignore fixed vectors.
18329 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18330 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18331 return SDValue();
18332
18333 SDValue In = N->getOperand(0);
18334 SDValue Mask = N->getOperand(1);
18335 SDValue VL = N->getOperand(2);
18336
18337 // Input should be a vp_srl with same mask and VL.
18338 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18339 In.getOperand(3) != VL)
18340 return SDValue();
18341
18342 // Shift amount should be 1.
18343 if (!isOneOrOneSplat(In.getOperand(1)))
18344 return SDValue();
18345
18346 // Shifted value should be a vp_add with same mask and VL.
18347 SDValue LHS = In.getOperand(0);
18348 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18349 LHS.getOperand(3) != VL)
18350 return SDValue();
18351
18352 SDValue Operands[3];
18353
18354 // Matches another VP_ADD with same VL and Mask.
18355 auto FindAdd = [&](SDValue V, SDValue Other) {
18356 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18357 V.getOperand(3) != VL)
18358 return false;
18359
18360 Operands[0] = Other;
18361 Operands[1] = V.getOperand(1);
18362 Operands[2] = V.getOperand(0);
18363 return true;
18364 };
18365
18366 // We need to find another VP_ADD in one of the operands.
18367 SDValue LHS0 = LHS.getOperand(0);
18368 SDValue LHS1 = LHS.getOperand(1);
18369 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18370 return SDValue();
18371
18372 // Now we have three operands of two additions. Check that one of them is a
18373 // constant vector with ones.
18374 auto I = llvm::find_if(Operands,
18375 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18376 if (I == std::end(Operands))
18377 return SDValue();
18378 // We found a vector with ones, move if it to the end of the Operands array.
18379 std::swap(*I, Operands[2]);
18380
18381 // Make sure the other 2 operands can be promoted from the result type.
18382 for (SDValue Op : drop_end(Operands)) {
18383 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18384 Op.getOperand(2) != VL)
18385 return SDValue();
18386 // Input must be the same size or smaller than our result.
18387 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18388 return SDValue();
18389 }
18390
18391 // Pattern is detected.
18392 // Rebuild the zero extends in case the inputs are smaller than our result.
18393 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18394 Operands[0].getOperand(0), Mask, VL);
18395 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18396 Operands[1].getOperand(0), Mask, VL);
18397 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18398 // mode.
18399 SDLoc DL(N);
18400 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18401 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18402}
18403
18404// Convert from one FMA opcode to another based on whether we are negating the
18405// multiply result and/or the accumulator.
18406// NOTE: Only supports RVV operations with VL.
18407static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18408 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18409 if (NegMul) {
18410 // clang-format off
18411 switch (Opcode) {
18412 default: llvm_unreachable("Unexpected opcode");
18413 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18414 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18415 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18416 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18417 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18418 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18419 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18420 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18421 }
18422 // clang-format on
18423 }
18424
18425 // Negating the accumulator changes ADD<->SUB.
18426 if (NegAcc) {
18427 // clang-format off
18428 switch (Opcode) {
18429 default: llvm_unreachable("Unexpected opcode");
18430 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18431 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18432 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18433 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18434 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18435 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18436 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18437 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18438 }
18439 // clang-format on
18440 }
18441
18442 return Opcode;
18443}
18444
18446 // Fold FNEG_VL into FMA opcodes.
18447 // The first operand of strict-fp is chain.
18448 bool IsStrict =
18449 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18450 unsigned Offset = IsStrict ? 1 : 0;
18451 SDValue A = N->getOperand(0 + Offset);
18452 SDValue B = N->getOperand(1 + Offset);
18453 SDValue C = N->getOperand(2 + Offset);
18454 SDValue Mask = N->getOperand(3 + Offset);
18455 SDValue VL = N->getOperand(4 + Offset);
18456
18457 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18458 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18459 V.getOperand(2) == VL) {
18460 // Return the negated input.
18461 V = V.getOperand(0);
18462 return true;
18463 }
18464
18465 return false;
18466 };
18467
18468 bool NegA = invertIfNegative(A);
18469 bool NegB = invertIfNegative(B);
18470 bool NegC = invertIfNegative(C);
18471
18472 // If no operands are negated, we're done.
18473 if (!NegA && !NegB && !NegC)
18474 return SDValue();
18475
18476 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18477 if (IsStrict)
18478 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18479 {N->getOperand(0), A, B, C, Mask, VL});
18480 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18481 VL);
18482}
18483
18486 const RISCVSubtarget &Subtarget) {
18487 SelectionDAG &DAG = DCI.DAG;
18488
18490 return V;
18491
18492 // FIXME: Ignore strict opcodes for now.
18493 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18494 return SDValue();
18495
18496 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18497}
18498
18500 const RISCVSubtarget &Subtarget) {
18501 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18502
18503 EVT VT = N->getValueType(0);
18504
18505 if (VT != Subtarget.getXLenVT())
18506 return SDValue();
18507
18508 if (!isa<ConstantSDNode>(N->getOperand(1)))
18509 return SDValue();
18510 uint64_t ShAmt = N->getConstantOperandVal(1);
18511
18512 SDValue N0 = N->getOperand(0);
18513
18514 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18515 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18516 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18517 unsigned ExtSize =
18518 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18519 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18520 N0.getOperand(0).hasOneUse() &&
18522 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18523 if (LShAmt < ExtSize) {
18524 unsigned Size = VT.getSizeInBits();
18525 SDLoc ShlDL(N0.getOperand(0));
18526 SDValue Shl =
18527 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18528 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18529 SDLoc DL(N);
18530 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18531 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18532 }
18533 }
18534 }
18535
18536 if (ShAmt > 32 || VT != MVT::i64)
18537 return SDValue();
18538
18539 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18540 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18541 //
18542 // Also try these folds where an add or sub is in the middle.
18543 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18544 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18545 SDValue Shl;
18546 ConstantSDNode *AddC = nullptr;
18547
18548 // We might have an ADD or SUB between the SRA and SHL.
18549 bool IsAdd = N0.getOpcode() == ISD::ADD;
18550 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18551 // Other operand needs to be a constant we can modify.
18552 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18553 if (!AddC)
18554 return SDValue();
18555
18556 // AddC needs to have at least 32 trailing zeros.
18557 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18558 return SDValue();
18559
18560 // All users should be a shift by constant less than or equal to 32. This
18561 // ensures we'll do this optimization for each of them to produce an
18562 // add/sub+sext_inreg they can all share.
18563 for (SDNode *U : N0->users()) {
18564 if (U->getOpcode() != ISD::SRA ||
18565 !isa<ConstantSDNode>(U->getOperand(1)) ||
18566 U->getConstantOperandVal(1) > 32)
18567 return SDValue();
18568 }
18569
18570 Shl = N0.getOperand(IsAdd ? 0 : 1);
18571 } else {
18572 // Not an ADD or SUB.
18573 Shl = N0;
18574 }
18575
18576 // Look for a shift left by 32.
18577 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18578 Shl.getConstantOperandVal(1) != 32)
18579 return SDValue();
18580
18581 // We if we didn't look through an add/sub, then the shl should have one use.
18582 // If we did look through an add/sub, the sext_inreg we create is free so
18583 // we're only creating 2 new instructions. It's enough to only remove the
18584 // original sra+add/sub.
18585 if (!AddC && !Shl.hasOneUse())
18586 return SDValue();
18587
18588 SDLoc DL(N);
18589 SDValue In = Shl.getOperand(0);
18590
18591 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18592 // constant.
18593 if (AddC) {
18594 SDValue ShiftedAddC =
18595 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18596 if (IsAdd)
18597 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18598 else
18599 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18600 }
18601
18602 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18603 DAG.getValueType(MVT::i32));
18604 if (ShAmt == 32)
18605 return SExt;
18606
18607 return DAG.getNode(
18608 ISD::SHL, DL, MVT::i64, SExt,
18609 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18610}
18611
18612// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18613// the result is used as the condition of a br_cc or select_cc we can invert,
18614// inverting the setcc is free, and Z is 0/1. Caller will invert the
18615// br_cc/select_cc.
18617 bool IsAnd = Cond.getOpcode() == ISD::AND;
18618 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18619 return SDValue();
18620
18621 if (!Cond.hasOneUse())
18622 return SDValue();
18623
18624 SDValue Setcc = Cond.getOperand(0);
18625 SDValue Xor = Cond.getOperand(1);
18626 // Canonicalize setcc to LHS.
18627 if (Setcc.getOpcode() != ISD::SETCC)
18628 std::swap(Setcc, Xor);
18629 // LHS should be a setcc and RHS should be an xor.
18630 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18631 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18632 return SDValue();
18633
18634 // If the condition is an And, SimplifyDemandedBits may have changed
18635 // (xor Z, 1) to (not Z).
18636 SDValue Xor1 = Xor.getOperand(1);
18637 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18638 return SDValue();
18639
18640 EVT VT = Cond.getValueType();
18641 SDValue Xor0 = Xor.getOperand(0);
18642
18643 // The LHS of the xor needs to be 0/1.
18645 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18646 return SDValue();
18647
18648 // We can only invert integer setccs.
18649 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18650 if (!SetCCOpVT.isScalarInteger())
18651 return SDValue();
18652
18653 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18654 if (ISD::isIntEqualitySetCC(CCVal)) {
18655 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18656 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18657 Setcc.getOperand(1), CCVal);
18658 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18659 // Invert (setlt 0, X) by converting to (setlt X, 1).
18660 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18661 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18662 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18663 // (setlt X, 1) by converting to (setlt 0, X).
18664 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18665 DAG.getConstant(0, SDLoc(Setcc), VT),
18666 Setcc.getOperand(0), CCVal);
18667 } else
18668 return SDValue();
18669
18670 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18671 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18672}
18673
18674// Perform common combines for BR_CC and SELECT_CC conditions.
18675static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18676 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18677 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18678
18679 // As far as arithmetic right shift always saves the sign,
18680 // shift can be omitted.
18681 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18682 // setge (sra X, N), 0 -> setge X, 0
18683 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18684 LHS.getOpcode() == ISD::SRA) {
18685 LHS = LHS.getOperand(0);
18686 return true;
18687 }
18688
18689 if (!ISD::isIntEqualitySetCC(CCVal))
18690 return false;
18691
18692 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18693 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18694 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18695 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18696 // If we're looking for eq 0 instead of ne 0, we need to invert the
18697 // condition.
18698 bool Invert = CCVal == ISD::SETEQ;
18699 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18700 if (Invert)
18701 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18702
18703 RHS = LHS.getOperand(1);
18704 LHS = LHS.getOperand(0);
18705 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18706
18707 CC = DAG.getCondCode(CCVal);
18708 return true;
18709 }
18710
18711 // If XOR is reused and has an immediate that will fit in XORI,
18712 // do not fold.
18713 auto isXorImmediate = [](const SDValue &Op) -> bool {
18714 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18715 return isInt<12>(XorCnst->getSExtValue());
18716 return false;
18717 };
18718 // Fold (X(i1) ^ 1) == 0 -> X != 0
18719 auto singleBitOp = [&DAG](const SDValue &VarOp,
18720 const SDValue &ConstOp) -> bool {
18721 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18722 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18723 return (XorCnst->getSExtValue() == 1) &&
18724 DAG.MaskedValueIsZero(VarOp, Mask);
18725 }
18726 return false;
18727 };
18728 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18729 for (const SDNode *UserNode : Op->users()) {
18730 const unsigned Opcode = UserNode->getOpcode();
18731 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18732 return false;
18733 }
18734 return true;
18735 };
18736 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18737 const SDValue &LHS, const SDValue &RHS) -> bool {
18738 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18739 (!isXorImmediate(LHS.getOperand(1)) ||
18740 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18741 onlyUsedBySelectOrBR(LHS));
18742 };
18743 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18744 if (isFoldableXorEq(LHS, RHS)) {
18745 RHS = LHS.getOperand(1);
18746 LHS = LHS.getOperand(0);
18747 return true;
18748 }
18749 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18750 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18751 const SDValue LHS0 = LHS.getOperand(0);
18752 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18753 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18754 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18755 LHS0.getOperand(1), LHS.getOperand(1));
18756 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18757 LHS0.getOperand(0), LHS.getOperand(1));
18758 return true;
18759 }
18760 }
18761
18762 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18763 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18764 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18765 SDValue LHS0 = LHS.getOperand(0);
18766 if (LHS0.getOpcode() == ISD::AND &&
18767 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18768 uint64_t Mask = LHS0.getConstantOperandVal(1);
18769 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18770 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18771 // XAndesPerf supports branch on test bit.
18772 if (Subtarget.hasVendorXAndesPerf()) {
18773 LHS =
18774 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18775 DAG.getConstant(Mask, DL, LHS.getValueType()));
18776 return true;
18777 }
18778
18779 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18780 CC = DAG.getCondCode(CCVal);
18781
18782 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18783 LHS = LHS0.getOperand(0);
18784 if (ShAmt != 0)
18785 LHS =
18786 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18787 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18788 return true;
18789 }
18790 }
18791 }
18792
18793 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18794 // This can occur when legalizing some floating point comparisons.
18795 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18796 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18797 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18798 CC = DAG.getCondCode(CCVal);
18799 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18800 return true;
18801 }
18802
18803 if (isNullConstant(RHS)) {
18804 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18805 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18806 CC = DAG.getCondCode(CCVal);
18807 LHS = NewCond;
18808 return true;
18809 }
18810 }
18811
18812 return false;
18813}
18814
18815// Fold
18816// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18817// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18818// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18819// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18820// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18821// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18823 SDValue TrueVal, SDValue FalseVal,
18824 bool Swapped) {
18825 bool Commutative = true;
18826 unsigned Opc = TrueVal.getOpcode();
18827 switch (Opc) {
18828 default:
18829 return SDValue();
18830 case ISD::SHL:
18831 case ISD::SRA:
18832 case ISD::SRL:
18833 case ISD::SUB:
18834 case ISD::ROTL:
18835 case ISD::ROTR:
18836 Commutative = false;
18837 break;
18838 case ISD::ADD:
18839 case ISD::OR:
18840 case ISD::XOR:
18841 break;
18842 }
18843
18844 if (!TrueVal.hasOneUse())
18845 return SDValue();
18846
18847 unsigned OpToFold;
18848 if (FalseVal == TrueVal.getOperand(0))
18849 OpToFold = 0;
18850 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18851 OpToFold = 1;
18852 else
18853 return SDValue();
18854
18855 EVT VT = N->getValueType(0);
18856 SDLoc DL(N);
18857 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18858 EVT OtherOpVT = OtherOp.getValueType();
18859 SDValue IdentityOperand =
18860 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18861 if (!Commutative)
18862 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18863 assert(IdentityOperand && "No identity operand!");
18864
18865 if (Swapped)
18866 std::swap(OtherOp, IdentityOperand);
18867 SDValue NewSel =
18868 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18869 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18870}
18871
18872// This tries to get rid of `select` and `icmp` that are being used to handle
18873// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18875 SDValue Cond = N->getOperand(0);
18876
18877 // This represents either CTTZ or CTLZ instruction.
18878 SDValue CountZeroes;
18879
18880 SDValue ValOnZero;
18881
18882 if (Cond.getOpcode() != ISD::SETCC)
18883 return SDValue();
18884
18885 if (!isNullConstant(Cond->getOperand(1)))
18886 return SDValue();
18887
18888 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18889 if (CCVal == ISD::CondCode::SETEQ) {
18890 CountZeroes = N->getOperand(2);
18891 ValOnZero = N->getOperand(1);
18892 } else if (CCVal == ISD::CondCode::SETNE) {
18893 CountZeroes = N->getOperand(1);
18894 ValOnZero = N->getOperand(2);
18895 } else {
18896 return SDValue();
18897 }
18898
18899 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18900 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18901 CountZeroes = CountZeroes.getOperand(0);
18902
18903 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18904 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18905 CountZeroes.getOpcode() != ISD::CTLZ &&
18906 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18907 return SDValue();
18908
18909 if (!isNullConstant(ValOnZero))
18910 return SDValue();
18911
18912 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18913 if (Cond->getOperand(0) != CountZeroesArgument)
18914 return SDValue();
18915
18916 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18917 if (!isPowerOf2_32(BitWidth))
18918 return SDValue();
18919
18920 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18921 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18922 CountZeroes.getValueType(), CountZeroesArgument);
18923 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18924 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18925 CountZeroes.getValueType(), CountZeroesArgument);
18926 }
18927
18928 SDValue BitWidthMinusOne =
18929 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18930
18931 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18932 CountZeroes, BitWidthMinusOne);
18933 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18934}
18935
18937 const RISCVSubtarget &Subtarget) {
18938 SDValue Cond = N->getOperand(0);
18939 SDValue True = N->getOperand(1);
18940 SDValue False = N->getOperand(2);
18941 SDLoc DL(N);
18942 EVT VT = N->getValueType(0);
18943 EVT CondVT = Cond.getValueType();
18944
18945 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18946 return SDValue();
18947
18948 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18949 // BEXTI, where C is power of 2.
18950 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
18951 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
18952 SDValue LHS = Cond.getOperand(0);
18953 SDValue RHS = Cond.getOperand(1);
18954 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18955 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18956 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18957 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18958 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18959 return DAG.getSelect(DL, VT,
18960 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18961 False, True);
18962 }
18963 }
18964 return SDValue();
18965}
18966
18967static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18968 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
18969 return false;
18970
18971 SwapCC = false;
18972 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18973 std::swap(TrueVal, FalseVal);
18974 SwapCC = true;
18975 }
18976
18977 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
18978 return false;
18979
18980 SDValue A = FalseVal.getOperand(0);
18981 SDValue B = FalseVal.getOperand(1);
18982 // Add is commutative, so check both orders
18983 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
18984 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
18985}
18986
18987/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18988/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18989/// register pressure over the add followed by masked vsub sequence.
18991 SDLoc DL(N);
18992 EVT VT = N->getValueType(0);
18993 SDValue CC = N->getOperand(0);
18994 SDValue TrueVal = N->getOperand(1);
18995 SDValue FalseVal = N->getOperand(2);
18996
18997 bool SwapCC;
18998 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
18999 return SDValue();
19000
19001 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19002 SDValue A = Sub.getOperand(0);
19003 SDValue B = Sub.getOperand(1);
19004
19005 // Arrange the select such that we can match a masked
19006 // vrsub.vi to perform the conditional negate
19007 SDValue NegB = DAG.getNegative(B, DL, VT);
19008 if (!SwapCC)
19009 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19010 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19011 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19012}
19013
19015 const RISCVSubtarget &Subtarget) {
19016 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19017 return Folded;
19018
19019 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19020 return V;
19021
19022 if (Subtarget.hasConditionalMoveFusion())
19023 return SDValue();
19024
19025 SDValue TrueVal = N->getOperand(1);
19026 SDValue FalseVal = N->getOperand(2);
19027 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19028 return V;
19029 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19030}
19031
19032/// If we have a build_vector where each lane is binop X, C, where C
19033/// is a constant (but not necessarily the same constant on all lanes),
19034/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19035/// We assume that materializing a constant build vector will be no more
19036/// expensive that performing O(n) binops.
19038 const RISCVSubtarget &Subtarget,
19039 const RISCVTargetLowering &TLI) {
19040 SDLoc DL(N);
19041 EVT VT = N->getValueType(0);
19042
19043 assert(!VT.isScalableVector() && "unexpected build vector");
19044
19045 if (VT.getVectorNumElements() == 1)
19046 return SDValue();
19047
19048 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19049 if (!TLI.isBinOp(Opcode))
19050 return SDValue();
19051
19052 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19053 return SDValue();
19054
19055 // This BUILD_VECTOR involves an implicit truncation, and sinking
19056 // truncates through binops is non-trivial.
19057 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19058 return SDValue();
19059
19060 SmallVector<SDValue> LHSOps;
19061 SmallVector<SDValue> RHSOps;
19062 for (SDValue Op : N->ops()) {
19063 if (Op.isUndef()) {
19064 // We can't form a divide or remainder from undef.
19065 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19066 return SDValue();
19067
19068 LHSOps.push_back(Op);
19069 RHSOps.push_back(Op);
19070 continue;
19071 }
19072
19073 // TODO: We can handle operations which have an neutral rhs value
19074 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19075 // of profit in a more explicit manner.
19076 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19077 return SDValue();
19078
19079 LHSOps.push_back(Op.getOperand(0));
19080 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19081 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19082 return SDValue();
19083 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19084 // have different LHS and RHS types.
19085 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19086 return SDValue();
19087
19088 RHSOps.push_back(Op.getOperand(1));
19089 }
19090
19091 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19092 DAG.getBuildVector(VT, DL, RHSOps));
19093}
19094
19096 ElementCount OpEC = OpVT.getVectorElementCount();
19097 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19098 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19099}
19100
19101/// Given fixed length vectors A and B with equal element types, but possibly
19102/// different number of elements, return A + B where either A or B is zero
19103/// padded to the larger number of elements.
19105 SelectionDAG &DAG) {
19106 // NOTE: Manually doing the extract/add/insert scheme produces
19107 // significantly better codegen than the naive pad with zeros
19108 // and add scheme.
19109 EVT AVT = A.getValueType();
19110 EVT BVT = B.getValueType();
19113 std::swap(A, B);
19114 std::swap(AVT, BVT);
19115 }
19116
19117 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19118 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19119 return DAG.getInsertSubvector(DL, B, Res, 0);
19120}
19121
19123 SelectionDAG &DAG,
19124 const RISCVSubtarget &Subtarget,
19125 const RISCVTargetLowering &TLI) {
19126 using namespace SDPatternMatch;
19127 // Note: We intentionally do not check the legality of the reduction type.
19128 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19129 // intermediate types flow through here.
19130 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19132 return SDValue();
19133
19134 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19135 // form).
19136 SDValue A, B;
19137 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19138 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19139 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19140 if (AOpt || BOpt) {
19141 if (AOpt)
19142 A = AOpt;
19143 if (BOpt)
19144 B = BOpt;
19145 // From here, we're doing A + B with mixed types, implicitly zero
19146 // padded to the wider type. Note that we *don't* need the result
19147 // type to be the original VT, and in fact prefer narrower ones
19148 // if possible.
19149 return getZeroPaddedAdd(DL, A, B, DAG);
19150 }
19151 }
19152
19153 // zext a <--> partial_reduce_umla 0, a, 1
19154 // sext a <--> partial_reduce_smla 0, a, 1
19155 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19156 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19157 SDValue A = InVec.getOperand(0);
19158 EVT OpVT = A.getValueType();
19159 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19160 return SDValue();
19161
19162 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19163 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19164 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19165 unsigned Opc =
19166 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19167 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19168 }
19169
19170 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19171 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19172 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19173 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19174 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19175 return SDValue();
19176
19177 if (!ISD::isExtOpcode(A.getOpcode()))
19178 return SDValue();
19179
19180 EVT OpVT = A.getOperand(0).getValueType();
19181 if (OpVT.getVectorElementType() != MVT::i8 ||
19182 OpVT != B.getOperand(0).getValueType() ||
19183 !TLI.isTypeLegal(A.getValueType()))
19184 return SDValue();
19185
19186 unsigned Opc;
19187 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19188 Opc = ISD::PARTIAL_REDUCE_SMLA;
19189 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19190 B.getOpcode() == ISD::ZERO_EXTEND)
19191 Opc = ISD::PARTIAL_REDUCE_UMLA;
19192 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19193 B.getOpcode() == ISD::ZERO_EXTEND)
19194 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19195 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19196 B.getOpcode() == ISD::SIGN_EXTEND) {
19197 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19198 std::swap(A, B);
19199 } else
19200 return SDValue();
19201
19202 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19203 return DAG.getNode(
19204 Opc, DL, ResVT,
19205 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19206}
19207
19209 const RISCVSubtarget &Subtarget,
19210 const RISCVTargetLowering &TLI) {
19211 if (!Subtarget.hasStdExtZvqdotq())
19212 return SDValue();
19213
19214 SDLoc DL(N);
19215 EVT VT = N->getValueType(0);
19216 SDValue InVec = N->getOperand(0);
19217 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19218 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19219 return SDValue();
19220}
19221
19223 const RISCVSubtarget &Subtarget,
19224 const RISCVTargetLowering &TLI) {
19225 SDValue InVec = N->getOperand(0);
19226 SDValue InVal = N->getOperand(1);
19227 SDValue EltNo = N->getOperand(2);
19228 SDLoc DL(N);
19229
19230 EVT VT = InVec.getValueType();
19231 if (VT.isScalableVector())
19232 return SDValue();
19233
19234 if (!InVec.hasOneUse())
19235 return SDValue();
19236
19237 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19238 // move the insert_vector_elts into the arms of the binop. Note that
19239 // the new RHS must be a constant.
19240 const unsigned InVecOpcode = InVec->getOpcode();
19241 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19242 InVal.hasOneUse()) {
19243 SDValue InVecLHS = InVec->getOperand(0);
19244 SDValue InVecRHS = InVec->getOperand(1);
19245 SDValue InValLHS = InVal->getOperand(0);
19246 SDValue InValRHS = InVal->getOperand(1);
19247
19249 return SDValue();
19250 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19251 return SDValue();
19252 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19253 // have different LHS and RHS types.
19254 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19255 return SDValue();
19257 InVecLHS, InValLHS, EltNo);
19259 InVecRHS, InValRHS, EltNo);
19260 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19261 }
19262
19263 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19264 // move the insert_vector_elt to the source operand of the concat_vector.
19265 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19266 return SDValue();
19267
19268 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19269 if (!IndexC)
19270 return SDValue();
19271 unsigned Elt = IndexC->getZExtValue();
19272
19273 EVT ConcatVT = InVec.getOperand(0).getValueType();
19274 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19275 return SDValue();
19276 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19277 unsigned NewIdx = Elt % ConcatNumElts;
19278
19279 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19280 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19281 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19282
19283 SmallVector<SDValue> ConcatOps(InVec->ops());
19284 ConcatOps[ConcatOpIdx] = ConcatOp;
19285 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19286}
19287
19288// If we're concatenating a series of vector loads like
19289// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19290// Then we can turn this into a strided load by widening the vector elements
19291// vlse32 p, stride=n
19293 const RISCVSubtarget &Subtarget,
19294 const RISCVTargetLowering &TLI) {
19295 SDLoc DL(N);
19296 EVT VT = N->getValueType(0);
19297
19298 // Only perform this combine on legal MVTs.
19299 if (!TLI.isTypeLegal(VT))
19300 return SDValue();
19301
19302 // TODO: Potentially extend this to scalable vectors
19303 if (VT.isScalableVector())
19304 return SDValue();
19305
19306 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19307 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19308 !SDValue(BaseLd, 0).hasOneUse())
19309 return SDValue();
19310
19311 EVT BaseLdVT = BaseLd->getValueType(0);
19312
19313 // Go through the loads and check that they're strided
19315 Lds.push_back(BaseLd);
19316 Align Align = BaseLd->getAlign();
19317 for (SDValue Op : N->ops().drop_front()) {
19318 auto *Ld = dyn_cast<LoadSDNode>(Op);
19319 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19320 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19321 Ld->getValueType(0) != BaseLdVT)
19322 return SDValue();
19323
19324 Lds.push_back(Ld);
19325
19326 // The common alignment is the most restrictive (smallest) of all the loads
19327 Align = std::min(Align, Ld->getAlign());
19328 }
19329
19330 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19331 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19332 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19333 // If the load ptrs can be decomposed into a common (Base + Index) with a
19334 // common constant stride, then return the constant stride.
19335 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19336 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19337 if (BIO1.equalBaseIndex(BIO2, DAG))
19338 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19339
19340 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19341 SDValue P1 = Ld1->getBasePtr();
19342 SDValue P2 = Ld2->getBasePtr();
19343 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19344 return {{P2.getOperand(1), false}};
19345 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19346 return {{P1.getOperand(1), true}};
19347
19348 return std::nullopt;
19349 };
19350
19351 // Get the distance between the first and second loads
19352 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19353 if (!BaseDiff)
19354 return SDValue();
19355
19356 // Check all the loads are the same distance apart
19357 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19358 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19359 return SDValue();
19360
19361 // TODO: At this point, we've successfully matched a generalized gather
19362 // load. Maybe we should emit that, and then move the specialized
19363 // matchers above and below into a DAG combine?
19364
19365 // Get the widened scalar type, e.g. v4i8 -> i64
19366 unsigned WideScalarBitWidth =
19367 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19368 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19369
19370 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19371 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19372 if (!TLI.isTypeLegal(WideVecVT))
19373 return SDValue();
19374
19375 // Check that the operation is legal
19376 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19377 return SDValue();
19378
19379 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19380 SDValue Stride =
19381 std::holds_alternative<SDValue>(StrideVariant)
19382 ? std::get<SDValue>(StrideVariant)
19383 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19384 Lds[0]->getOffset().getValueType());
19385 if (MustNegateStride)
19386 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19387
19388 SDValue AllOneMask =
19389 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19390 DAG.getConstant(1, DL, MVT::i1));
19391
19392 uint64_t MemSize;
19393 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19394 ConstStride && ConstStride->getSExtValue() >= 0)
19395 // total size = (elsize * n) + (stride - elsize) * (n-1)
19396 // = elsize + stride * (n-1)
19397 MemSize = WideScalarVT.getSizeInBits() +
19398 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19399 else
19400 // If Stride isn't constant, then we can't know how much it will load
19402
19404 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19405 Align);
19406
19407 SDValue StridedLoad = DAG.getStridedLoadVP(
19408 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19409 AllOneMask,
19410 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19411
19412 for (SDValue Ld : N->ops())
19413 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19414
19415 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19416}
19417
19419 const RISCVSubtarget &Subtarget,
19420 const RISCVTargetLowering &TLI) {
19421 SDLoc DL(N);
19422 EVT VT = N->getValueType(0);
19423 const unsigned ElementSize = VT.getScalarSizeInBits();
19424 const unsigned NumElts = VT.getVectorNumElements();
19425 SDValue V1 = N->getOperand(0);
19426 SDValue V2 = N->getOperand(1);
19427 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19428 MVT XLenVT = Subtarget.getXLenVT();
19429
19430 // Recognized a disguised select of add/sub.
19431 bool SwapCC;
19432 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19433 matchSelectAddSub(V1, V2, SwapCC)) {
19434 SDValue Sub = SwapCC ? V1 : V2;
19435 SDValue A = Sub.getOperand(0);
19436 SDValue B = Sub.getOperand(1);
19437
19438 SmallVector<SDValue> MaskVals;
19439 for (int MaskIndex : Mask) {
19440 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19441 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19442 }
19443 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19444 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19445 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19446
19447 // Arrange the select such that we can match a masked
19448 // vrsub.vi to perform the conditional negate
19449 SDValue NegB = DAG.getNegative(B, DL, VT);
19450 if (!SwapCC)
19451 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19452 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19453 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19454 }
19455
19456 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19457 // during the combine phase before type legalization, and relies on
19458 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19459 // for the source mask.
19460 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19461 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19462 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19463 return SDValue();
19464
19465 SmallVector<int, 8> NewMask;
19466 narrowShuffleMaskElts(2, Mask, NewMask);
19467
19468 LLVMContext &C = *DAG.getContext();
19469 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19470 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19471 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19472 DAG.getBitcast(NewVT, V2), NewMask);
19473 return DAG.getBitcast(VT, Res);
19474}
19475
19477 const RISCVSubtarget &Subtarget) {
19478 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19479
19480 if (N->getValueType(0).isFixedLengthVector())
19481 return SDValue();
19482
19483 SDValue Addend = N->getOperand(0);
19484 SDValue MulOp = N->getOperand(1);
19485
19486 if (N->getOpcode() == RISCVISD::ADD_VL) {
19487 SDValue AddPassthruOp = N->getOperand(2);
19488 if (!AddPassthruOp.isUndef())
19489 return SDValue();
19490 }
19491
19492 auto IsVWMulOpc = [](unsigned Opc) {
19493 switch (Opc) {
19494 case RISCVISD::VWMUL_VL:
19495 case RISCVISD::VWMULU_VL:
19496 case RISCVISD::VWMULSU_VL:
19497 return true;
19498 default:
19499 return false;
19500 }
19501 };
19502
19503 if (!IsVWMulOpc(MulOp.getOpcode()))
19504 std::swap(Addend, MulOp);
19505
19506 if (!IsVWMulOpc(MulOp.getOpcode()))
19507 return SDValue();
19508
19509 SDValue MulPassthruOp = MulOp.getOperand(2);
19510
19511 if (!MulPassthruOp.isUndef())
19512 return SDValue();
19513
19514 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19515 const RISCVSubtarget &Subtarget) {
19516 if (N->getOpcode() == ISD::ADD) {
19517 SDLoc DL(N);
19518 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19519 Subtarget);
19520 }
19521 return std::make_pair(N->getOperand(3), N->getOperand(4));
19522 }(N, DAG, Subtarget);
19523
19524 SDValue MulMask = MulOp.getOperand(3);
19525 SDValue MulVL = MulOp.getOperand(4);
19526
19527 if (AddMask != MulMask || AddVL != MulVL)
19528 return SDValue();
19529
19530 const auto &TSInfo =
19531 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19532 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19533
19534 SDLoc DL(N);
19535 EVT VT = N->getValueType(0);
19536 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19537 AddVL};
19538 return DAG.getNode(Opc, DL, VT, Ops);
19539}
19540
19542 const RISCVSubtarget &Subtarget) {
19543
19544 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19545
19546 if (!N->getValueType(0).isVector())
19547 return SDValue();
19548
19549 SDValue Addend = N->getOperand(0);
19550 SDValue DotOp = N->getOperand(1);
19551
19552 if (N->getOpcode() == RISCVISD::ADD_VL) {
19553 SDValue AddPassthruOp = N->getOperand(2);
19554 if (!AddPassthruOp.isUndef())
19555 return SDValue();
19556 }
19557
19558 auto IsVqdotqOpc = [](unsigned Opc) {
19559 switch (Opc) {
19560 case RISCVISD::VQDOT_VL:
19561 case RISCVISD::VQDOTU_VL:
19562 case RISCVISD::VQDOTSU_VL:
19563 return true;
19564 default:
19565 return false;
19566 }
19567 };
19568
19569 if (!IsVqdotqOpc(DotOp.getOpcode()))
19570 std::swap(Addend, DotOp);
19571
19572 if (!IsVqdotqOpc(DotOp.getOpcode()))
19573 return SDValue();
19574
19575 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19576 const RISCVSubtarget &Subtarget) {
19577 if (N->getOpcode() == ISD::ADD) {
19578 SDLoc DL(N);
19579 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19580 Subtarget);
19581 }
19582 return std::make_pair(N->getOperand(3), N->getOperand(4));
19583 }(N, DAG, Subtarget);
19584
19585 SDValue MulVL = DotOp.getOperand(4);
19586 if (AddVL != MulVL)
19587 return SDValue();
19588
19589 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19590 AddMask.getOperand(0) != MulVL)
19591 return SDValue();
19592
19593 SDValue AccumOp = DotOp.getOperand(2);
19594 SDLoc DL(N);
19595 EVT VT = N->getValueType(0);
19596 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19597 DAG.getUNDEF(VT), AddMask, AddVL);
19598
19599 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19600 DotOp.getOperand(3), DotOp->getOperand(4)};
19601 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19602}
19603
19604static bool
19606 ISD::MemIndexType &IndexType,
19608 if (!DCI.isBeforeLegalize())
19609 return false;
19610
19611 SelectionDAG &DAG = DCI.DAG;
19612 const MVT XLenVT =
19613 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19614
19615 const EVT IndexVT = Index.getValueType();
19616
19617 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19618 // mode, so anything else must be manually legalized.
19619 if (!isIndexTypeSigned(IndexType))
19620 return false;
19621
19622 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19623 // Any index legalization should first promote to XLenVT, so we don't lose
19624 // bits when scaling. This may create an illegal index type so we let
19625 // LLVM's legalization take care of the splitting.
19626 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19627 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19628 IndexVT.changeVectorElementType(XLenVT), Index);
19629 }
19630 IndexType = ISD::UNSIGNED_SCALED;
19631 return true;
19632}
19633
19634/// Match the index vector of a scatter or gather node as the shuffle mask
19635/// which performs the rearrangement if possible. Will only match if
19636/// all lanes are touched, and thus replacing the scatter or gather with
19637/// a unit strided access and shuffle is legal.
19638static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19639 SmallVector<int> &ShuffleMask) {
19640 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19641 return false;
19642 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19643 return false;
19644
19645 const unsigned ElementSize = VT.getScalarStoreSize();
19646 const unsigned NumElems = VT.getVectorNumElements();
19647
19648 // Create the shuffle mask and check all bits active
19649 assert(ShuffleMask.empty());
19650 BitVector ActiveLanes(NumElems);
19651 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19652 // TODO: We've found an active bit of UB, and could be
19653 // more aggressive here if desired.
19654 if (Index->getOperand(i)->isUndef())
19655 return false;
19656 uint64_t C = Index->getConstantOperandVal(i);
19657 if (C % ElementSize != 0)
19658 return false;
19659 C = C / ElementSize;
19660 if (C >= NumElems)
19661 return false;
19662 ShuffleMask.push_back(C);
19663 ActiveLanes.set(C);
19664 }
19665 return ActiveLanes.all();
19666}
19667
19668/// Match the index of a gather or scatter operation as an operation
19669/// with twice the element width and half the number of elements. This is
19670/// generally profitable (if legal) because these operations are linear
19671/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19672/// come out ahead.
19673static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19674 Align BaseAlign, const RISCVSubtarget &ST) {
19675 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19676 return false;
19677 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19678 return false;
19679
19680 // Attempt a doubling. If we can use a element type 4x or 8x in
19681 // size, this will happen via multiply iterations of the transform.
19682 const unsigned NumElems = VT.getVectorNumElements();
19683 if (NumElems % 2 != 0)
19684 return false;
19685
19686 const unsigned ElementSize = VT.getScalarStoreSize();
19687 const unsigned WiderElementSize = ElementSize * 2;
19688 if (WiderElementSize > ST.getELen()/8)
19689 return false;
19690
19691 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19692 return false;
19693
19694 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19695 // TODO: We've found an active bit of UB, and could be
19696 // more aggressive here if desired.
19697 if (Index->getOperand(i)->isUndef())
19698 return false;
19699 // TODO: This offset check is too strict if we support fully
19700 // misaligned memory operations.
19701 uint64_t C = Index->getConstantOperandVal(i);
19702 if (i % 2 == 0) {
19703 if (C % WiderElementSize != 0)
19704 return false;
19705 continue;
19706 }
19707 uint64_t Last = Index->getConstantOperandVal(i-1);
19708 if (C != Last + ElementSize)
19709 return false;
19710 }
19711 return true;
19712}
19713
19714// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19715// This would be benefit for the cases where X and Y are both the same value
19716// type of low precision vectors. Since the truncate would be lowered into
19717// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19718// restriction, such pattern would be expanded into a series of "vsetvli"
19719// and "vnsrl" instructions later to reach this point.
19721 SDValue Mask = N->getOperand(1);
19722 SDValue VL = N->getOperand(2);
19723
19724 bool IsVLMAX = isAllOnesConstant(VL) ||
19725 (isa<RegisterSDNode>(VL) &&
19726 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19727 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19728 Mask.getOperand(0) != VL)
19729 return SDValue();
19730
19731 auto IsTruncNode = [&](SDValue V) {
19732 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19733 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19734 };
19735
19736 SDValue Op = N->getOperand(0);
19737
19738 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19739 // to distinguish such pattern.
19740 while (IsTruncNode(Op)) {
19741 if (!Op.hasOneUse())
19742 return SDValue();
19743 Op = Op.getOperand(0);
19744 }
19745
19746 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19747 return SDValue();
19748
19749 SDValue N0 = Op.getOperand(0);
19750 SDValue N1 = Op.getOperand(1);
19751 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19752 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19753 return SDValue();
19754
19755 SDValue N00 = N0.getOperand(0);
19756 SDValue N10 = N1.getOperand(0);
19757 if (!N00.getValueType().isVector() ||
19758 N00.getValueType() != N10.getValueType() ||
19759 N->getValueType(0) != N10.getValueType())
19760 return SDValue();
19761
19762 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19763 SDValue SMin =
19764 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19765 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19766 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19767}
19768
19769// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19770// maximum value for the truncated type.
19771// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19772// is the signed maximum value for the truncated type and C2 is the signed
19773// minimum value.
19775 const RISCVSubtarget &Subtarget) {
19776 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19777
19778 MVT VT = N->getSimpleValueType(0);
19779
19780 SDValue Mask = N->getOperand(1);
19781 SDValue VL = N->getOperand(2);
19782
19783 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19784 APInt &SplatVal) {
19785 if (V.getOpcode() != Opc &&
19786 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19787 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19788 return SDValue();
19789
19790 SDValue Op = V.getOperand(1);
19791
19792 // Peek through conversion between fixed and scalable vectors.
19793 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19794 isNullConstant(Op.getOperand(2)) &&
19795 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19796 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19797 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19798 isNullConstant(Op.getOperand(1).getOperand(1)))
19799 Op = Op.getOperand(1).getOperand(0);
19800
19801 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19802 return V.getOperand(0);
19803
19804 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19805 Op.getOperand(2) == VL) {
19806 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19807 SplatVal =
19808 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19809 return V.getOperand(0);
19810 }
19811 }
19812
19813 return SDValue();
19814 };
19815
19816 SDLoc DL(N);
19817
19818 auto DetectUSatPattern = [&](SDValue V) {
19819 APInt LoC, HiC;
19820
19821 // Simple case, V is a UMIN.
19822 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19823 if (HiC.isMask(VT.getScalarSizeInBits()))
19824 return UMinOp;
19825
19826 // If we have an SMAX that removes negative numbers first, then we can match
19827 // SMIN instead of UMIN.
19828 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19829 if (SDValue SMaxOp =
19830 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19831 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19832 return SMinOp;
19833
19834 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19835 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19836 // first.
19837 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19838 if (SDValue SMinOp =
19839 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19840 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19841 HiC.uge(LoC))
19842 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19843 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19844 Mask, VL);
19845
19846 return SDValue();
19847 };
19848
19849 auto DetectSSatPattern = [&](SDValue V) {
19850 unsigned NumDstBits = VT.getScalarSizeInBits();
19851 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19852 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19853 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19854
19855 APInt HiC, LoC;
19856 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19857 if (SDValue SMaxOp =
19858 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19859 if (HiC == SignedMax && LoC == SignedMin)
19860 return SMaxOp;
19861
19862 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19863 if (SDValue SMinOp =
19864 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19865 if (HiC == SignedMax && LoC == SignedMin)
19866 return SMinOp;
19867
19868 return SDValue();
19869 };
19870
19871 SDValue Src = N->getOperand(0);
19872
19873 // Look through multiple layers of truncates.
19874 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19875 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19876 Src.hasOneUse())
19877 Src = Src.getOperand(0);
19878
19879 SDValue Val;
19880 unsigned ClipOpc;
19881 if ((Val = DetectUSatPattern(Src)))
19882 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19883 else if ((Val = DetectSSatPattern(Src)))
19884 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19885 else
19886 return SDValue();
19887
19888 MVT ValVT = Val.getSimpleValueType();
19889
19890 do {
19891 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19892 ValVT = ValVT.changeVectorElementType(ValEltVT);
19893 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19894 } while (ValVT != VT);
19895
19896 return Val;
19897}
19898
19899// Convert
19900// (iX ctpop (bitcast (vXi1 A)))
19901// ->
19902// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19903// and
19904// (iN reduce.add (zext (vXi1 A to vXiN))
19905// ->
19906// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19907// FIXME: It's complicated to match all the variations of this after type
19908// legalization so we only handle the pre-type legalization pattern, but that
19909// requires the fixed vector type to be legal.
19911 const RISCVSubtarget &Subtarget) {
19912 unsigned Opc = N->getOpcode();
19913 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
19914 "Unexpected opcode");
19915 EVT VT = N->getValueType(0);
19916 if (!VT.isScalarInteger())
19917 return SDValue();
19918
19919 SDValue Src = N->getOperand(0);
19920
19921 if (Opc == ISD::CTPOP) {
19922 // Peek through zero_extend. It doesn't change the count.
19923 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19924 Src = Src.getOperand(0);
19925
19926 if (Src.getOpcode() != ISD::BITCAST)
19927 return SDValue();
19928 Src = Src.getOperand(0);
19929 } else if (Opc == ISD::VECREDUCE_ADD) {
19930 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19931 return SDValue();
19932 Src = Src.getOperand(0);
19933 }
19934
19935 EVT SrcEVT = Src.getValueType();
19936 if (!SrcEVT.isSimple())
19937 return SDValue();
19938
19939 MVT SrcMVT = SrcEVT.getSimpleVT();
19940 // Make sure the input is an i1 vector.
19941 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19942 return SDValue();
19943
19944 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19945 if (!TLI.isTypeLegal(SrcMVT))
19946 return SDValue();
19947
19948 // Check that destination type is large enough to hold result without
19949 // overflow.
19950 if (Opc == ISD::VECREDUCE_ADD) {
19951 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19952 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19953 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19954 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19955 ? SrcMVT.getVectorNumElements()
19957 VectorBitsMax, EltSize, MinSize);
19958 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19959 return SDValue();
19960 }
19961
19962 MVT ContainerVT = SrcMVT;
19963 if (SrcMVT.isFixedLengthVector()) {
19964 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
19965 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
19966 }
19967
19968 SDLoc DL(N);
19969 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
19970
19971 MVT XLenVT = Subtarget.getXLenVT();
19972 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
19973 return DAG.getZExtOrTrunc(Pop, DL, VT);
19974}
19975
19978 const RISCVSubtarget &Subtarget) {
19979 // (shl (zext x), y) -> (vwsll x, y)
19980 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19981 return V;
19982
19983 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
19984 // (shl (zext x), C) -> (vwmulu x, 1u << C)
19985
19986 if (!DCI.isAfterLegalizeDAG())
19987 return SDValue();
19988
19989 SDValue LHS = N->getOperand(0);
19990 if (!LHS.hasOneUse())
19991 return SDValue();
19992 unsigned Opcode;
19993 switch (LHS.getOpcode()) {
19994 case ISD::SIGN_EXTEND:
19995 case RISCVISD::VSEXT_VL:
19996 Opcode = RISCVISD::VWMULSU_VL;
19997 break;
19998 case ISD::ZERO_EXTEND:
19999 case RISCVISD::VZEXT_VL:
20000 Opcode = RISCVISD::VWMULU_VL;
20001 break;
20002 default:
20003 return SDValue();
20004 }
20005
20006 SDValue RHS = N->getOperand(1);
20007 APInt ShAmt;
20008 uint64_t ShAmtInt;
20009 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20010 ShAmtInt = ShAmt.getZExtValue();
20011 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20012 RHS.getOperand(1).getOpcode() == ISD::Constant)
20013 ShAmtInt = RHS.getConstantOperandVal(1);
20014 else
20015 return SDValue();
20016
20017 // Better foldings:
20018 // (shl (sext x), 1) -> (vwadd x, x)
20019 // (shl (zext x), 1) -> (vwaddu x, x)
20020 if (ShAmtInt <= 1)
20021 return SDValue();
20022
20023 SDValue NarrowOp = LHS.getOperand(0);
20024 MVT NarrowVT = NarrowOp.getSimpleValueType();
20025 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20026 if (ShAmtInt >= NarrowBits)
20027 return SDValue();
20028 MVT VT = N->getSimpleValueType(0);
20029 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20030 return SDValue();
20031
20032 SelectionDAG &DAG = DCI.DAG;
20033 SDLoc DL(N);
20034 SDValue Passthru, Mask, VL;
20035 switch (N->getOpcode()) {
20036 case ISD::SHL:
20037 Passthru = DAG.getUNDEF(VT);
20038 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20039 break;
20040 case RISCVISD::SHL_VL:
20041 Passthru = N->getOperand(2);
20042 Mask = N->getOperand(3);
20043 VL = N->getOperand(4);
20044 break;
20045 default:
20046 llvm_unreachable("Expected SHL");
20047 }
20048 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20049 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20050 Passthru, Mask, VL);
20051}
20052
20054 DAGCombinerInfo &DCI) const {
20055 SelectionDAG &DAG = DCI.DAG;
20056 const MVT XLenVT = Subtarget.getXLenVT();
20057 SDLoc DL(N);
20058
20059 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20060 // bits are demanded. N will be added to the Worklist if it was not deleted.
20061 // Caller should return SDValue(N, 0) if this returns true.
20062 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20063 SDValue Op = N->getOperand(OpNo);
20064 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20065 if (!SimplifyDemandedBits(Op, Mask, DCI))
20066 return false;
20067
20068 if (N->getOpcode() != ISD::DELETED_NODE)
20069 DCI.AddToWorklist(N);
20070 return true;
20071 };
20072
20073 switch (N->getOpcode()) {
20074 default:
20075 break;
20076 case RISCVISD::SplitF64: {
20077 SDValue Op0 = N->getOperand(0);
20078 // If the input to SplitF64 is just BuildPairF64 then the operation is
20079 // redundant. Instead, use BuildPairF64's operands directly.
20080 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20081 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20082
20083 if (Op0->isUndef()) {
20084 SDValue Lo = DAG.getUNDEF(MVT::i32);
20085 SDValue Hi = DAG.getUNDEF(MVT::i32);
20086 return DCI.CombineTo(N, Lo, Hi);
20087 }
20088
20089 // It's cheaper to materialise two 32-bit integers than to load a double
20090 // from the constant pool and transfer it to integer registers through the
20091 // stack.
20093 APInt V = C->getValueAPF().bitcastToAPInt();
20094 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20095 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20096 return DCI.CombineTo(N, Lo, Hi);
20097 }
20098
20099 // This is a target-specific version of a DAGCombine performed in
20100 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20101 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20102 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20103 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20104 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20105 break;
20106 SDValue NewSplitF64 =
20107 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20108 Op0.getOperand(0));
20109 SDValue Lo = NewSplitF64.getValue(0);
20110 SDValue Hi = NewSplitF64.getValue(1);
20111 APInt SignBit = APInt::getSignMask(32);
20112 if (Op0.getOpcode() == ISD::FNEG) {
20113 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20114 DAG.getConstant(SignBit, DL, MVT::i32));
20115 return DCI.CombineTo(N, Lo, NewHi);
20116 }
20117 assert(Op0.getOpcode() == ISD::FABS);
20118 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20119 DAG.getConstant(~SignBit, DL, MVT::i32));
20120 return DCI.CombineTo(N, Lo, NewHi);
20121 }
20122 case RISCVISD::SLLW:
20123 case RISCVISD::SRAW:
20124 case RISCVISD::SRLW:
20125 case RISCVISD::RORW:
20126 case RISCVISD::ROLW: {
20127 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20128 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20129 SimplifyDemandedLowBitsHelper(1, 5))
20130 return SDValue(N, 0);
20131
20132 break;
20133 }
20134 case RISCVISD::CLZW:
20135 case RISCVISD::CTZW: {
20136 // Only the lower 32 bits of the first operand are read
20137 if (SimplifyDemandedLowBitsHelper(0, 32))
20138 return SDValue(N, 0);
20139 break;
20140 }
20141 case RISCVISD::FMV_W_X_RV64: {
20142 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20143 // conversion is unnecessary and can be replaced with the
20144 // FMV_X_ANYEXTW_RV64 operand.
20145 SDValue Op0 = N->getOperand(0);
20146 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20147 return Op0.getOperand(0);
20148 break;
20149 }
20150 case RISCVISD::FMV_X_ANYEXTH:
20151 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20152 SDLoc DL(N);
20153 SDValue Op0 = N->getOperand(0);
20154 MVT VT = N->getSimpleValueType(0);
20155
20156 // Constant fold.
20157 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20158 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20159 return DAG.getConstant(Val, DL, VT);
20160 }
20161
20162 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20163 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20164 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20165 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20166 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20167 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20168 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20169 assert(Op0.getOperand(0).getValueType() == VT &&
20170 "Unexpected value type!");
20171 return Op0.getOperand(0);
20172 }
20173
20174 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20175 cast<LoadSDNode>(Op0)->isSimple()) {
20177 auto *LN0 = cast<LoadSDNode>(Op0);
20178 SDValue Load =
20179 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20180 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20181 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20182 return Load;
20183 }
20184
20185 // This is a target-specific version of a DAGCombine performed in
20186 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20187 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20188 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20189 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20190 !Op0.getNode()->hasOneUse())
20191 break;
20192 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20193 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20194 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20195 if (Op0.getOpcode() == ISD::FNEG)
20196 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20197 DAG.getConstant(SignBit, DL, VT));
20198
20199 assert(Op0.getOpcode() == ISD::FABS);
20200 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20201 DAG.getConstant(~SignBit, DL, VT));
20202 }
20203 case ISD::ABS: {
20204 EVT VT = N->getValueType(0);
20205 SDValue N0 = N->getOperand(0);
20206 // abs (sext) -> zext (abs)
20207 // abs (zext) -> zext (handled elsewhere)
20208 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20209 SDValue Src = N0.getOperand(0);
20210 SDLoc DL(N);
20211 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20212 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20213 }
20214 break;
20215 }
20216 case ISD::ADD: {
20217 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20218 return V;
20219 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20220 return V;
20221 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20222 return V;
20223 return performADDCombine(N, DCI, Subtarget);
20224 }
20225 case ISD::SUB: {
20226 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20227 return V;
20228 return performSUBCombine(N, DAG, Subtarget);
20229 }
20230 case ISD::AND:
20231 return performANDCombine(N, DCI, Subtarget);
20232 case ISD::OR: {
20233 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20234 return V;
20235 return performORCombine(N, DCI, Subtarget);
20236 }
20237 case ISD::XOR:
20238 return performXORCombine(N, DAG, Subtarget);
20239 case ISD::MUL:
20240 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20241 return V;
20242 return performMULCombine(N, DAG, DCI, Subtarget);
20243 case ISD::SDIV:
20244 case ISD::UDIV:
20245 case ISD::SREM:
20246 case ISD::UREM:
20247 if (SDValue V = combineBinOpOfZExt(N, DAG))
20248 return V;
20249 break;
20250 case ISD::FMUL: {
20251 using namespace SDPatternMatch;
20252 SDLoc DL(N);
20253 EVT VT = N->getValueType(0);
20254 SDValue X, Y;
20255 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20256 // hoistFNegAboveFMulFDiv.
20257 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20259 return DAG.getNode(ISD::FNEG, DL, VT,
20260 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20261
20262 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20263 SDValue N0 = N->getOperand(0);
20264 SDValue N1 = N->getOperand(1);
20265 if (N0->getOpcode() != ISD::FCOPYSIGN)
20266 std::swap(N0, N1);
20267 if (N0->getOpcode() != ISD::FCOPYSIGN)
20268 return SDValue();
20270 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20271 return SDValue();
20272 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20273 return SDValue();
20274 SDValue Sign = N0->getOperand(1);
20275 if (Sign.getValueType() != VT)
20276 return SDValue();
20277 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20278 }
20279 case ISD::FADD:
20280 case ISD::UMAX:
20281 case ISD::UMIN:
20282 case ISD::SMAX:
20283 case ISD::SMIN:
20284 case ISD::FMAXNUM:
20285 case ISD::FMINNUM: {
20286 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20287 return V;
20288 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20289 return V;
20290 return SDValue();
20291 }
20292 case ISD::SETCC:
20293 return performSETCCCombine(N, DCI, Subtarget);
20295 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20296 case ISD::ZERO_EXTEND:
20297 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20298 // type legalization. This is safe because fp_to_uint produces poison if
20299 // it overflows.
20300 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20301 SDValue Src = N->getOperand(0);
20302 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20303 isTypeLegal(Src.getOperand(0).getValueType()))
20304 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20305 Src.getOperand(0));
20306 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20307 isTypeLegal(Src.getOperand(1).getValueType())) {
20308 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20309 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20310 Src.getOperand(0), Src.getOperand(1));
20311 DCI.CombineTo(N, Res);
20312 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20313 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20314 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20315 }
20316 }
20317 return SDValue();
20318 case RISCVISD::TRUNCATE_VECTOR_VL:
20319 if (SDValue V = combineTruncOfSraSext(N, DAG))
20320 return V;
20321 return combineTruncToVnclip(N, DAG, Subtarget);
20322 case ISD::VP_TRUNCATE:
20323 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20324 case ISD::TRUNCATE:
20325 return performTRUNCATECombine(N, DAG, Subtarget);
20326 case ISD::SELECT:
20327 return performSELECTCombine(N, DAG, Subtarget);
20328 case ISD::VSELECT:
20329 return performVSELECTCombine(N, DAG);
20330 case RISCVISD::CZERO_EQZ:
20331 case RISCVISD::CZERO_NEZ: {
20332 SDValue Val = N->getOperand(0);
20333 SDValue Cond = N->getOperand(1);
20334
20335 unsigned Opc = N->getOpcode();
20336
20337 // czero_eqz x, x -> x
20338 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20339 return Val;
20340
20341 unsigned InvOpc =
20342 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20343
20344 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20345 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20346 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20347 SDValue NewCond = Cond.getOperand(0);
20348 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20349 if (DAG.MaskedValueIsZero(NewCond, Mask))
20350 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20351 }
20352 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20353 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20354 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20355 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20356 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20357 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20358 if (ISD::isIntEqualitySetCC(CCVal))
20359 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20360 N->getValueType(0), Val, Cond.getOperand(0));
20361 }
20362 return SDValue();
20363 }
20364 case RISCVISD::SELECT_CC: {
20365 // Transform
20366 SDValue LHS = N->getOperand(0);
20367 SDValue RHS = N->getOperand(1);
20368 SDValue CC = N->getOperand(2);
20369 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20370 SDValue TrueV = N->getOperand(3);
20371 SDValue FalseV = N->getOperand(4);
20372 SDLoc DL(N);
20373 EVT VT = N->getValueType(0);
20374
20375 // If the True and False values are the same, we don't need a select_cc.
20376 if (TrueV == FalseV)
20377 return TrueV;
20378
20379 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20380 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20381 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20382 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20383 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20384 if (CCVal == ISD::CondCode::SETGE)
20385 std::swap(TrueV, FalseV);
20386
20387 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20388 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20389 // Only handle simm12, if it is not in this range, it can be considered as
20390 // register.
20391 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20392 isInt<12>(TrueSImm - FalseSImm)) {
20393 SDValue SRA =
20394 DAG.getNode(ISD::SRA, DL, VT, LHS,
20395 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20396 SDValue AND =
20397 DAG.getNode(ISD::AND, DL, VT, SRA,
20398 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20399 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20400 }
20401
20402 if (CCVal == ISD::CondCode::SETGE)
20403 std::swap(TrueV, FalseV);
20404 }
20405
20406 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20407 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20408 {LHS, RHS, CC, TrueV, FalseV});
20409
20410 if (!Subtarget.hasConditionalMoveFusion()) {
20411 // (select c, -1, y) -> -c | y
20412 if (isAllOnesConstant(TrueV)) {
20413 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20414 SDValue Neg = DAG.getNegative(C, DL, VT);
20415 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20416 }
20417 // (select c, y, -1) -> -!c | y
20418 if (isAllOnesConstant(FalseV)) {
20419 SDValue C =
20420 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20421 SDValue Neg = DAG.getNegative(C, DL, VT);
20422 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20423 }
20424
20425 // (select c, 0, y) -> -!c & y
20426 if (isNullConstant(TrueV)) {
20427 SDValue C =
20428 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20429 SDValue Neg = DAG.getNegative(C, DL, VT);
20430 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20431 }
20432 // (select c, y, 0) -> -c & y
20433 if (isNullConstant(FalseV)) {
20434 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20435 SDValue Neg = DAG.getNegative(C, DL, VT);
20436 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20437 }
20438 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20439 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20440 if (((isOneConstant(FalseV) && LHS == TrueV &&
20441 CCVal == ISD::CondCode::SETNE) ||
20442 (isOneConstant(TrueV) && LHS == FalseV &&
20443 CCVal == ISD::CondCode::SETEQ)) &&
20444 isNullConstant(RHS)) {
20445 // freeze it to be safe.
20446 LHS = DAG.getFreeze(LHS);
20447 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20448 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20449 }
20450 }
20451
20452 // If both true/false are an xor with 1, pull through the select.
20453 // This can occur after op legalization if both operands are setccs that
20454 // require an xor to invert.
20455 // FIXME: Generalize to other binary ops with identical operand?
20456 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20457 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20458 isOneConstant(TrueV.getOperand(1)) &&
20459 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20460 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20461 TrueV.getOperand(0), FalseV.getOperand(0));
20462 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20463 }
20464
20465 return SDValue();
20466 }
20467 case RISCVISD::BR_CC: {
20468 SDValue LHS = N->getOperand(1);
20469 SDValue RHS = N->getOperand(2);
20470 SDValue CC = N->getOperand(3);
20471 SDLoc DL(N);
20472
20473 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20474 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20475 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20476
20477 return SDValue();
20478 }
20479 case ISD::BITREVERSE:
20480 return performBITREVERSECombine(N, DAG, Subtarget);
20481 case ISD::FP_TO_SINT:
20482 case ISD::FP_TO_UINT:
20483 return performFP_TO_INTCombine(N, DCI, Subtarget);
20486 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20487 case ISD::FCOPYSIGN: {
20488 EVT VT = N->getValueType(0);
20489 if (!VT.isVector())
20490 break;
20491 // There is a form of VFSGNJ which injects the negated sign of its second
20492 // operand. Try and bubble any FNEG up after the extend/round to produce
20493 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20494 // TRUNC=1.
20495 SDValue In2 = N->getOperand(1);
20496 // Avoid cases where the extend/round has multiple uses, as duplicating
20497 // those is typically more expensive than removing a fneg.
20498 if (!In2.hasOneUse())
20499 break;
20500 if (In2.getOpcode() != ISD::FP_EXTEND &&
20501 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20502 break;
20503 In2 = In2.getOperand(0);
20504 if (In2.getOpcode() != ISD::FNEG)
20505 break;
20506 SDLoc DL(N);
20507 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20508 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20509 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20510 }
20511 case ISD::MGATHER: {
20512 const auto *MGN = cast<MaskedGatherSDNode>(N);
20513 const EVT VT = N->getValueType(0);
20514 SDValue Index = MGN->getIndex();
20515 SDValue ScaleOp = MGN->getScale();
20516 ISD::MemIndexType IndexType = MGN->getIndexType();
20517 assert(!MGN->isIndexScaled() &&
20518 "Scaled gather/scatter should not be formed");
20519
20520 SDLoc DL(N);
20521 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20522 return DAG.getMaskedGather(
20523 N->getVTList(), MGN->getMemoryVT(), DL,
20524 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20525 MGN->getBasePtr(), Index, ScaleOp},
20526 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20527
20528 if (narrowIndex(Index, IndexType, DAG))
20529 return DAG.getMaskedGather(
20530 N->getVTList(), MGN->getMemoryVT(), DL,
20531 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20532 MGN->getBasePtr(), Index, ScaleOp},
20533 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20534
20535 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20536 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20537 // The sequence will be XLenVT, not the type of Index. Tell
20538 // isSimpleVIDSequence this so we avoid overflow.
20539 if (std::optional<VIDSequence> SimpleVID =
20540 isSimpleVIDSequence(Index, Subtarget.getXLen());
20541 SimpleVID && SimpleVID->StepDenominator == 1) {
20542 const int64_t StepNumerator = SimpleVID->StepNumerator;
20543 const int64_t Addend = SimpleVID->Addend;
20544
20545 // Note: We don't need to check alignment here since (by assumption
20546 // from the existence of the gather), our offsets must be sufficiently
20547 // aligned.
20548
20549 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20550 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20551 assert(IndexType == ISD::UNSIGNED_SCALED);
20552 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20553 DAG.getSignedConstant(Addend, DL, PtrVT));
20554
20555 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20557 SDValue StridedLoad = DAG.getStridedLoadVP(
20558 VT, DL, MGN->getChain(), BasePtr,
20559 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20560 EVL, MGN->getMemOperand());
20561 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20562 MGN->getPassThru());
20563 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20564 DL);
20565 }
20566 }
20567
20568 SmallVector<int> ShuffleMask;
20569 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20570 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20571 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20572 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20573 MGN->getMask(), DAG.getUNDEF(VT),
20574 MGN->getMemoryVT(), MGN->getMemOperand(),
20576 SDValue Shuffle =
20577 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20578 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20579 }
20580
20581 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20582 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20583 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20584 SmallVector<SDValue> NewIndices;
20585 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20586 NewIndices.push_back(Index.getOperand(i));
20587 EVT IndexVT = Index.getValueType()
20589 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20590
20591 unsigned ElementSize = VT.getScalarStoreSize();
20592 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20593 auto EltCnt = VT.getVectorElementCount();
20594 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20595 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20596 EltCnt.divideCoefficientBy(2));
20597 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20598 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20599 EltCnt.divideCoefficientBy(2));
20600 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20601
20602 SDValue Gather =
20603 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20604 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20605 Index, ScaleOp},
20606 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20607 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20608 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20609 }
20610 break;
20611 }
20612 case ISD::MSCATTER:{
20613 const auto *MSN = cast<MaskedScatterSDNode>(N);
20614 SDValue Index = MSN->getIndex();
20615 SDValue ScaleOp = MSN->getScale();
20616 ISD::MemIndexType IndexType = MSN->getIndexType();
20617 assert(!MSN->isIndexScaled() &&
20618 "Scaled gather/scatter should not be formed");
20619
20620 SDLoc DL(N);
20621 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20622 return DAG.getMaskedScatter(
20623 N->getVTList(), MSN->getMemoryVT(), DL,
20624 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20625 Index, ScaleOp},
20626 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20627
20628 if (narrowIndex(Index, IndexType, DAG))
20629 return DAG.getMaskedScatter(
20630 N->getVTList(), MSN->getMemoryVT(), DL,
20631 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20632 Index, ScaleOp},
20633 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20634
20635 EVT VT = MSN->getValue()->getValueType(0);
20636 SmallVector<int> ShuffleMask;
20637 if (!MSN->isTruncatingStore() &&
20638 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20639 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20640 DAG.getUNDEF(VT), ShuffleMask);
20641 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20642 DAG.getUNDEF(XLenVT), MSN->getMask(),
20643 MSN->getMemoryVT(), MSN->getMemOperand(),
20644 ISD::UNINDEXED, false);
20645 }
20646 break;
20647 }
20648 case ISD::VP_GATHER: {
20649 const auto *VPGN = cast<VPGatherSDNode>(N);
20650 SDValue Index = VPGN->getIndex();
20651 SDValue ScaleOp = VPGN->getScale();
20652 ISD::MemIndexType IndexType = VPGN->getIndexType();
20653 assert(!VPGN->isIndexScaled() &&
20654 "Scaled gather/scatter should not be formed");
20655
20656 SDLoc DL(N);
20657 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20658 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20659 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20660 ScaleOp, VPGN->getMask(),
20661 VPGN->getVectorLength()},
20662 VPGN->getMemOperand(), IndexType);
20663
20664 if (narrowIndex(Index, IndexType, DAG))
20665 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20666 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20667 ScaleOp, VPGN->getMask(),
20668 VPGN->getVectorLength()},
20669 VPGN->getMemOperand(), IndexType);
20670
20671 break;
20672 }
20673 case ISD::VP_SCATTER: {
20674 const auto *VPSN = cast<VPScatterSDNode>(N);
20675 SDValue Index = VPSN->getIndex();
20676 SDValue ScaleOp = VPSN->getScale();
20677 ISD::MemIndexType IndexType = VPSN->getIndexType();
20678 assert(!VPSN->isIndexScaled() &&
20679 "Scaled gather/scatter should not be formed");
20680
20681 SDLoc DL(N);
20682 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20683 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20684 {VPSN->getChain(), VPSN->getValue(),
20685 VPSN->getBasePtr(), Index, ScaleOp,
20686 VPSN->getMask(), VPSN->getVectorLength()},
20687 VPSN->getMemOperand(), IndexType);
20688
20689 if (narrowIndex(Index, IndexType, DAG))
20690 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20691 {VPSN->getChain(), VPSN->getValue(),
20692 VPSN->getBasePtr(), Index, ScaleOp,
20693 VPSN->getMask(), VPSN->getVectorLength()},
20694 VPSN->getMemOperand(), IndexType);
20695 break;
20696 }
20697 case RISCVISD::SHL_VL:
20698 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20699 return V;
20700 [[fallthrough]];
20701 case RISCVISD::SRA_VL:
20702 case RISCVISD::SRL_VL: {
20703 SDValue ShAmt = N->getOperand(1);
20704 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20705 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20706 SDLoc DL(N);
20707 SDValue VL = N->getOperand(4);
20708 EVT VT = N->getValueType(0);
20709 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20710 ShAmt.getOperand(1), VL);
20711 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20712 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20713 }
20714 break;
20715 }
20716 case ISD::SRA:
20717 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20718 return V;
20719 [[fallthrough]];
20720 case ISD::SRL:
20721 case ISD::SHL: {
20722 if (N->getOpcode() == ISD::SHL) {
20723 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20724 return V;
20725 }
20726 SDValue ShAmt = N->getOperand(1);
20727 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20728 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20729 SDLoc DL(N);
20730 EVT VT = N->getValueType(0);
20731 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20732 ShAmt.getOperand(1),
20733 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20734 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20735 }
20736 break;
20737 }
20738 case RISCVISD::ADD_VL:
20739 if (SDValue V = simplifyOp_VL(N))
20740 return V;
20741 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20742 return V;
20743 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20744 return V;
20745 return combineToVWMACC(N, DAG, Subtarget);
20746 case RISCVISD::VWADD_W_VL:
20747 case RISCVISD::VWADDU_W_VL:
20748 case RISCVISD::VWSUB_W_VL:
20749 case RISCVISD::VWSUBU_W_VL:
20750 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20751 case RISCVISD::OR_VL:
20752 case RISCVISD::SUB_VL:
20753 case RISCVISD::MUL_VL:
20754 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20755 case RISCVISD::VFMADD_VL:
20756 case RISCVISD::VFNMADD_VL:
20757 case RISCVISD::VFMSUB_VL:
20758 case RISCVISD::VFNMSUB_VL:
20759 case RISCVISD::STRICT_VFMADD_VL:
20760 case RISCVISD::STRICT_VFNMADD_VL:
20761 case RISCVISD::STRICT_VFMSUB_VL:
20762 case RISCVISD::STRICT_VFNMSUB_VL:
20763 return performVFMADD_VLCombine(N, DCI, Subtarget);
20764 case RISCVISD::FADD_VL:
20765 case RISCVISD::FSUB_VL:
20766 case RISCVISD::FMUL_VL:
20767 case RISCVISD::VFWADD_W_VL:
20768 case RISCVISD::VFWSUB_W_VL:
20769 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20770 case ISD::LOAD:
20771 case ISD::STORE: {
20772 if (DCI.isAfterLegalizeDAG())
20773 if (SDValue V = performMemPairCombine(N, DCI))
20774 return V;
20775
20776 if (N->getOpcode() != ISD::STORE)
20777 break;
20778
20779 auto *Store = cast<StoreSDNode>(N);
20780 SDValue Chain = Store->getChain();
20781 EVT MemVT = Store->getMemoryVT();
20782 SDValue Val = Store->getValue();
20783 SDLoc DL(N);
20784
20785 bool IsScalarizable =
20786 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20787 Store->isSimple() &&
20788 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20789 isPowerOf2_64(MemVT.getSizeInBits()) &&
20790 MemVT.getSizeInBits() <= Subtarget.getXLen();
20791
20792 // If sufficiently aligned we can scalarize stores of constant vectors of
20793 // any power-of-two size up to XLen bits, provided that they aren't too
20794 // expensive to materialize.
20795 // vsetivli zero, 2, e8, m1, ta, ma
20796 // vmv.v.i v8, 4
20797 // vse64.v v8, (a0)
20798 // ->
20799 // li a1, 1028
20800 // sh a1, 0(a0)
20801 if (DCI.isBeforeLegalize() && IsScalarizable &&
20803 // Get the constant vector bits
20804 APInt NewC(Val.getValueSizeInBits(), 0);
20805 uint64_t EltSize = Val.getScalarValueSizeInBits();
20806 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20807 if (Val.getOperand(i).isUndef())
20808 continue;
20809 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20810 i * EltSize);
20811 }
20812 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20813
20814 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20815 true) <= 2 &&
20817 NewVT, *Store->getMemOperand())) {
20818 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20819 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20820 Store->getPointerInfo(), Store->getBaseAlign(),
20821 Store->getMemOperand()->getFlags());
20822 }
20823 }
20824
20825 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20826 // vsetivli zero, 2, e16, m1, ta, ma
20827 // vle16.v v8, (a0)
20828 // vse16.v v8, (a1)
20829 if (auto *L = dyn_cast<LoadSDNode>(Val);
20830 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20831 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20832 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20833 L->getMemoryVT() == MemVT) {
20834 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20836 NewVT, *Store->getMemOperand()) &&
20838 NewVT, *L->getMemOperand())) {
20839 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20840 L->getPointerInfo(), L->getBaseAlign(),
20841 L->getMemOperand()->getFlags());
20842 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20843 Store->getPointerInfo(), Store->getBaseAlign(),
20844 Store->getMemOperand()->getFlags());
20845 }
20846 }
20847
20848 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20849 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20850 // any illegal types.
20851 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20852 (DCI.isAfterLegalizeDAG() &&
20854 isNullConstant(Val.getOperand(1)))) &&
20855 Val.hasOneUse()) {
20856 SDValue Src = Val.getOperand(0);
20857 MVT VecVT = Src.getSimpleValueType();
20858 // VecVT should be scalable and memory VT should match the element type.
20859 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20860 MemVT == VecVT.getVectorElementType()) {
20861 SDLoc DL(N);
20862 MVT MaskVT = getMaskTypeFor(VecVT);
20863 return DAG.getStoreVP(
20864 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20865 DAG.getConstant(1, DL, MaskVT),
20866 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20867 Store->getMemOperand(), Store->getAddressingMode(),
20868 Store->isTruncatingStore(), /*IsCompress*/ false);
20869 }
20870 }
20871
20872 break;
20873 }
20874 case ISD::SPLAT_VECTOR: {
20875 EVT VT = N->getValueType(0);
20876 // Only perform this combine on legal MVT types.
20877 if (!isTypeLegal(VT))
20878 break;
20879 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20880 DAG, Subtarget))
20881 return Gather;
20882 break;
20883 }
20884 case ISD::BUILD_VECTOR:
20885 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20886 return V;
20887 break;
20889 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20890 return V;
20891 break;
20893 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20894 return V;
20895 break;
20897 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20898 return V;
20899 break;
20900 case RISCVISD::VFMV_V_F_VL: {
20901 const MVT VT = N->getSimpleValueType(0);
20902 SDValue Passthru = N->getOperand(0);
20903 SDValue Scalar = N->getOperand(1);
20904 SDValue VL = N->getOperand(2);
20905
20906 // If VL is 1, we can use vfmv.s.f.
20907 if (isOneConstant(VL))
20908 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20909 break;
20910 }
20911 case RISCVISD::VMV_V_X_VL: {
20912 const MVT VT = N->getSimpleValueType(0);
20913 SDValue Passthru = N->getOperand(0);
20914 SDValue Scalar = N->getOperand(1);
20915 SDValue VL = N->getOperand(2);
20916
20917 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20918 // scalar input.
20919 unsigned ScalarSize = Scalar.getValueSizeInBits();
20920 unsigned EltWidth = VT.getScalarSizeInBits();
20921 if (ScalarSize > EltWidth && Passthru.isUndef())
20922 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20923 return SDValue(N, 0);
20924
20925 // If VL is 1 and the scalar value won't benefit from immediate, we can
20926 // use vmv.s.x.
20928 if (isOneConstant(VL) &&
20929 (!Const || Const->isZero() ||
20930 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20931 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20932
20933 break;
20934 }
20935 case RISCVISD::VFMV_S_F_VL: {
20936 SDValue Src = N->getOperand(1);
20937 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20938 // into an undef vector.
20939 // TODO: Could use a vslide or vmv.v.v for non-undef.
20940 if (N->getOperand(0).isUndef() &&
20941 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20942 isNullConstant(Src.getOperand(1)) &&
20943 Src.getOperand(0).getValueType().isScalableVector()) {
20944 EVT VT = N->getValueType(0);
20945 SDValue EVSrc = Src.getOperand(0);
20946 EVT EVSrcVT = EVSrc.getValueType();
20948 // Widths match, just return the original vector.
20949 if (EVSrcVT == VT)
20950 return EVSrc;
20951 SDLoc DL(N);
20952 // Width is narrower, using insert_subvector.
20953 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20954 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20955 EVSrc,
20956 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20957 }
20958 // Width is wider, using extract_subvector.
20959 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20960 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20961 }
20962 [[fallthrough]];
20963 }
20964 case RISCVISD::VMV_S_X_VL: {
20965 const MVT VT = N->getSimpleValueType(0);
20966 SDValue Passthru = N->getOperand(0);
20967 SDValue Scalar = N->getOperand(1);
20968 SDValue VL = N->getOperand(2);
20969
20970 // The vmv.s.x instruction copies the scalar integer register to element 0
20971 // of the destination vector register. If SEW < XLEN, the least-significant
20972 // bits are copied and the upper XLEN-SEW bits are ignored.
20973 unsigned ScalarSize = Scalar.getValueSizeInBits();
20974 unsigned EltWidth = VT.getScalarSizeInBits();
20975 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
20976 return SDValue(N, 0);
20977
20978 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20979 Scalar.getOperand(0).getValueType() == N->getValueType(0))
20980 return Scalar.getOperand(0);
20981
20982 // Use M1 or smaller to avoid over constraining register allocation
20983 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20984 if (M1VT.bitsLT(VT)) {
20985 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
20986 SDValue Result =
20987 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
20988 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
20989 return Result;
20990 }
20991
20992 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
20993 // higher would involve overly constraining the register allocator for
20994 // no purpose.
20995 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20996 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
20997 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
20998 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
20999
21000 break;
21001 }
21002 case RISCVISD::VMV_X_S: {
21003 SDValue Vec = N->getOperand(0);
21004 MVT VecVT = N->getOperand(0).getSimpleValueType();
21005 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21006 if (M1VT.bitsLT(VecVT)) {
21007 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21008 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21009 }
21010 break;
21011 }
21015 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21016 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21017 switch (IntNo) {
21018 // By default we do not combine any intrinsic.
21019 default:
21020 return SDValue();
21021 case Intrinsic::riscv_vcpop:
21022 case Intrinsic::riscv_vcpop_mask:
21023 case Intrinsic::riscv_vfirst:
21024 case Intrinsic::riscv_vfirst_mask: {
21025 SDValue VL = N->getOperand(2);
21026 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21027 IntNo == Intrinsic::riscv_vfirst_mask)
21028 VL = N->getOperand(3);
21029 if (!isNullConstant(VL))
21030 return SDValue();
21031 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21032 SDLoc DL(N);
21033 EVT VT = N->getValueType(0);
21034 if (IntNo == Intrinsic::riscv_vfirst ||
21035 IntNo == Intrinsic::riscv_vfirst_mask)
21036 return DAG.getAllOnesConstant(DL, VT);
21037 return DAG.getConstant(0, DL, VT);
21038 }
21039 case Intrinsic::riscv_vsseg2_mask:
21040 case Intrinsic::riscv_vsseg3_mask:
21041 case Intrinsic::riscv_vsseg4_mask:
21042 case Intrinsic::riscv_vsseg5_mask:
21043 case Intrinsic::riscv_vsseg6_mask:
21044 case Intrinsic::riscv_vsseg7_mask:
21045 case Intrinsic::riscv_vsseg8_mask: {
21046 SDValue Tuple = N->getOperand(2);
21047 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21048
21049 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21050 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21051 !Tuple.getOperand(0).isUndef())
21052 return SDValue();
21053
21054 SDValue Val = Tuple.getOperand(1);
21055 unsigned Idx = Tuple.getConstantOperandVal(2);
21056
21057 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21058 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21059 "Type mismatch without bitcast?");
21060 unsigned Stride = SEW / 8 * NF;
21061 unsigned Offset = SEW / 8 * Idx;
21062
21063 SDValue Ops[] = {
21064 /*Chain=*/N->getOperand(0),
21065 /*IntID=*/
21066 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21067 /*StoredVal=*/Val,
21068 /*Ptr=*/
21069 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21070 DAG.getConstant(Offset, DL, XLenVT)),
21071 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21072 /*Mask=*/N->getOperand(4),
21073 /*VL=*/N->getOperand(5)};
21074
21075 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21076 // Match getTgtMemIntrinsic for non-unit stride case
21077 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21080 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21081
21082 SDVTList VTs = DAG.getVTList(MVT::Other);
21083 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21084 MMO);
21085 }
21086 }
21087 }
21088 case ISD::EXPERIMENTAL_VP_REVERSE:
21089 return performVP_REVERSECombine(N, DAG, Subtarget);
21090 case ISD::VP_STORE:
21091 return performVP_STORECombine(N, DAG, Subtarget);
21092 case ISD::BITCAST: {
21093 assert(Subtarget.useRVVForFixedLengthVectors());
21094 SDValue N0 = N->getOperand(0);
21095 EVT VT = N->getValueType(0);
21096 EVT SrcVT = N0.getValueType();
21097 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21098 unsigned NF = VT.getRISCVVectorTupleNumFields();
21099 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21100 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21101 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21102
21103 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21104
21105 SDValue Result = DAG.getUNDEF(VT);
21106 for (unsigned i = 0; i < NF; ++i)
21107 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21108 DAG.getTargetConstant(i, DL, MVT::i32));
21109 return Result;
21110 }
21111 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21112 // type, widen both sides to avoid a trip through memory.
21113 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21114 VT.isScalarInteger()) {
21115 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21116 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21117 Ops[0] = N0;
21118 SDLoc DL(N);
21119 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21120 N0 = DAG.getBitcast(MVT::i8, N0);
21121 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21122 }
21123
21124 return SDValue();
21125 }
21126 case ISD::VECREDUCE_ADD:
21127 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21128 return V;
21129 [[fallthrough]];
21130 case ISD::CTPOP:
21131 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21132 return V;
21133 break;
21134 case RISCVISD::VRGATHER_VX_VL: {
21135 // Note this assumes that out of bounds indices produce poison
21136 // and can thus be replaced without having to prove them inbounds..
21137 EVT VT = N->getValueType(0);
21138 SDValue Src = N->getOperand(0);
21139 SDValue Idx = N->getOperand(1);
21140 SDValue Passthru = N->getOperand(2);
21141 SDValue VL = N->getOperand(4);
21142
21143 // Warning: Unlike most cases we strip an insert_subvector, this one
21144 // does not require the first operand to be undef.
21145 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21146 isNullConstant(Src.getOperand(2)))
21147 Src = Src.getOperand(1);
21148
21149 switch (Src.getOpcode()) {
21150 default:
21151 break;
21152 case RISCVISD::VMV_V_X_VL:
21153 case RISCVISD::VFMV_V_F_VL:
21154 // Drop a redundant vrgather_vx.
21155 // TODO: Remove the type restriction if we find a motivating
21156 // test case?
21157 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21158 Src.getValueType() == VT)
21159 return Src;
21160 break;
21161 case RISCVISD::VMV_S_X_VL:
21162 case RISCVISD::VFMV_S_F_VL:
21163 // If this use only demands lane zero from the source vmv.s.x, and
21164 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21165 // a vmv.v.x. Note that there can be other uses of the original
21166 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21167 if (isNullConstant(Idx) && Passthru.isUndef() &&
21168 VL == Src.getOperand(2)) {
21169 unsigned Opc =
21170 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21171 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21172 VL);
21173 }
21174 break;
21175 }
21176 break;
21177 }
21178 case RISCVISD::TUPLE_EXTRACT: {
21179 EVT VT = N->getValueType(0);
21180 SDValue Tuple = N->getOperand(0);
21181 unsigned Idx = N->getConstantOperandVal(1);
21182 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21183 break;
21184
21185 unsigned NF = 0;
21186 switch (Tuple.getConstantOperandVal(1)) {
21187 default:
21188 break;
21189 case Intrinsic::riscv_vlseg2_mask:
21190 case Intrinsic::riscv_vlseg3_mask:
21191 case Intrinsic::riscv_vlseg4_mask:
21192 case Intrinsic::riscv_vlseg5_mask:
21193 case Intrinsic::riscv_vlseg6_mask:
21194 case Intrinsic::riscv_vlseg7_mask:
21195 case Intrinsic::riscv_vlseg8_mask:
21196 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21197 break;
21198 }
21199
21200 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21201 break;
21202
21203 unsigned SEW = VT.getScalarSizeInBits();
21204 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21205 "Type mismatch without bitcast?");
21206 unsigned Stride = SEW / 8 * NF;
21207 unsigned Offset = SEW / 8 * Idx;
21208
21209 SDValue Ops[] = {
21210 /*Chain=*/Tuple.getOperand(0),
21211 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21212 /*Passthru=*/Tuple.getOperand(2),
21213 /*Ptr=*/
21214 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21215 DAG.getConstant(Offset, DL, XLenVT)),
21216 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21217 /*Mask=*/Tuple.getOperand(4),
21218 /*VL=*/Tuple.getOperand(5),
21219 /*Policy=*/Tuple.getOperand(6)};
21220
21221 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21222 // Match getTgtMemIntrinsic for non-unit stride case
21223 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21226 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21227
21228 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21230 Ops, MemVT, MMO);
21231 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21232 return Result.getValue(0);
21233 }
21234 case RISCVISD::TUPLE_INSERT: {
21235 // tuple_insert tuple, undef, idx -> tuple
21236 if (N->getOperand(1).isUndef())
21237 return N->getOperand(0);
21238 break;
21239 }
21240 case RISCVISD::VSLIDE1UP_VL:
21241 case RISCVISD::VFSLIDE1UP_VL: {
21242 using namespace SDPatternMatch;
21243 SDValue SrcVec;
21244 SDLoc DL(N);
21245 MVT VT = N->getSimpleValueType(0);
21246 // If the scalar we're sliding in was extracted from the first element of a
21247 // vector, we can use that vector as the passthru in a normal slideup of 1.
21248 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21249 if (!N->getOperand(0).isUndef() ||
21250 !sd_match(N->getOperand(2),
21251 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21252 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21253 break;
21254
21255 MVT SrcVecVT = SrcVec.getSimpleValueType();
21256 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21257 break;
21258 // Adapt the value type of source vector.
21259 if (SrcVecVT.isFixedLengthVector()) {
21260 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21261 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21262 }
21264 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21265 else
21266 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21267
21268 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21269 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21270 N->getOperand(4));
21271 }
21272 }
21273
21274 return SDValue();
21275}
21276
21278 EVT XVT, unsigned KeptBits) const {
21279 // For vectors, we don't have a preference..
21280 if (XVT.isVector())
21281 return false;
21282
21283 if (XVT != MVT::i32 && XVT != MVT::i64)
21284 return false;
21285
21286 // We can use sext.w for RV64 or an srai 31 on RV32.
21287 if (KeptBits == 32 || KeptBits == 64)
21288 return true;
21289
21290 // With Zbb we can use sext.h/sext.b.
21291 return Subtarget.hasStdExtZbb() &&
21292 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21293 KeptBits == 16);
21294}
21295
21297 const SDNode *N, CombineLevel Level) const {
21298 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21299 N->getOpcode() == ISD::SRL) &&
21300 "Expected shift op");
21301
21302 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21303 // materialised in fewer instructions than `(OP _, c1)`:
21304 //
21305 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21306 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21307 SDValue N0 = N->getOperand(0);
21308 EVT Ty = N0.getValueType();
21309
21310 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21311 // LD/ST, it can still complete the folding optimization operation performed
21312 // above.
21313 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21314 for (SDNode *Use : X->users()) {
21315 // This use is the one we're on right now. Skip it
21316 if (Use == User || Use->getOpcode() == ISD::SELECT)
21317 continue;
21319 return false;
21320 }
21321 return true;
21322 };
21323
21324 if (Ty.isScalarInteger() &&
21325 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21326 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21327 return isUsedByLdSt(N0.getNode(), N);
21328
21329 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21330 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21331
21332 bool IsShXAdd =
21333 (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
21334 C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
21335 bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
21336 C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
21337
21338 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21339 if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
21340 N->user_begin()->getOpcode() == ISD::ADD &&
21341 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21342 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21343 return false;
21344
21345 if (C1 && C2) {
21346 const APInt &C1Int = C1->getAPIntValue();
21347 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21348
21349 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21350 // and the combine should happen, to potentially allow further combines
21351 // later.
21352 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21353 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21354 return true;
21355
21356 // We can materialise `c1` in an add immediate, so it's "free", and the
21357 // combine should be prevented.
21358 if (C1Int.getSignificantBits() <= 64 &&
21360 return false;
21361
21362 // Neither constant will fit into an immediate, so find materialisation
21363 // costs.
21364 int C1Cost =
21365 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21366 /*CompressionCost*/ true);
21367 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21368 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21369 /*CompressionCost*/ true);
21370
21371 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21372 // combine should be prevented.
21373 if (C1Cost < ShiftedC1Cost)
21374 return false;
21375 }
21376 }
21377
21378 if (!N0->hasOneUse())
21379 return false;
21380
21381 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21382 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21383 !N0->getOperand(0)->hasOneUse())
21384 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21385
21386 return true;
21387}
21388
21390 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21391 TargetLoweringOpt &TLO) const {
21392 // Delay this optimization as late as possible.
21393 if (!TLO.LegalOps)
21394 return false;
21395
21396 EVT VT = Op.getValueType();
21397 if (VT.isVector())
21398 return false;
21399
21400 unsigned Opcode = Op.getOpcode();
21401 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21402 return false;
21403
21404 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21405 if (!C)
21406 return false;
21407
21408 const APInt &Mask = C->getAPIntValue();
21409
21410 // Clear all non-demanded bits initially.
21411 APInt ShrunkMask = Mask & DemandedBits;
21412
21413 // Try to make a smaller immediate by setting undemanded bits.
21414
21415 APInt ExpandedMask = Mask | ~DemandedBits;
21416
21417 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21418 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21419 };
21420 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21421 if (NewMask == Mask)
21422 return true;
21423 SDLoc DL(Op);
21424 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21425 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21426 Op.getOperand(0), NewC);
21427 return TLO.CombineTo(Op, NewOp);
21428 };
21429
21430 // If the shrunk mask fits in sign extended 12 bits, let the target
21431 // independent code apply it.
21432 if (ShrunkMask.isSignedIntN(12))
21433 return false;
21434
21435 // And has a few special cases for zext.
21436 if (Opcode == ISD::AND) {
21437 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21438 // otherwise use SLLI + SRLI.
21439 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21440 if (IsLegalMask(NewMask))
21441 return UseMask(NewMask);
21442
21443 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21444 if (VT == MVT::i64) {
21445 APInt NewMask = APInt(64, 0xffffffff);
21446 if (IsLegalMask(NewMask))
21447 return UseMask(NewMask);
21448 }
21449 }
21450
21451 // For the remaining optimizations, we need to be able to make a negative
21452 // number through a combination of mask and undemanded bits.
21453 if (!ExpandedMask.isNegative())
21454 return false;
21455
21456 // What is the fewest number of bits we need to represent the negative number.
21457 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21458
21459 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21460 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21461 // If we can't create a simm12, we shouldn't change opaque constants.
21462 APInt NewMask = ShrunkMask;
21463 if (MinSignedBits <= 12)
21464 NewMask.setBitsFrom(11);
21465 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21466 NewMask.setBitsFrom(31);
21467 else
21468 return false;
21469
21470 // Check that our new mask is a subset of the demanded mask.
21471 assert(IsLegalMask(NewMask));
21472 return UseMask(NewMask);
21473}
21474
21475static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21476 static const uint64_t GREVMasks[] = {
21477 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21478 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21479
21480 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21481 unsigned Shift = 1 << Stage;
21482 if (ShAmt & Shift) {
21483 uint64_t Mask = GREVMasks[Stage];
21484 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21485 if (IsGORC)
21486 Res |= x;
21487 x = Res;
21488 }
21489 }
21490
21491 return x;
21492}
21493
21495 KnownBits &Known,
21496 const APInt &DemandedElts,
21497 const SelectionDAG &DAG,
21498 unsigned Depth) const {
21499 unsigned BitWidth = Known.getBitWidth();
21500 unsigned Opc = Op.getOpcode();
21505 "Should use MaskedValueIsZero if you don't know whether Op"
21506 " is a target node!");
21507
21508 Known.resetAll();
21509 switch (Opc) {
21510 default: break;
21511 case RISCVISD::SELECT_CC: {
21512 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21513 // If we don't know any bits, early out.
21514 if (Known.isUnknown())
21515 break;
21516 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21517
21518 // Only known if known in both the LHS and RHS.
21519 Known = Known.intersectWith(Known2);
21520 break;
21521 }
21522 case RISCVISD::VCPOP_VL: {
21523 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21524 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21525 break;
21526 }
21527 case RISCVISD::CZERO_EQZ:
21528 case RISCVISD::CZERO_NEZ:
21529 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21530 // Result is either all zero or operand 0. We can propagate zeros, but not
21531 // ones.
21532 Known.One.clearAllBits();
21533 break;
21534 case RISCVISD::REMUW: {
21535 KnownBits Known2;
21536 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21537 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21538 // We only care about the lower 32 bits.
21539 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21540 // Restore the original width by sign extending.
21541 Known = Known.sext(BitWidth);
21542 break;
21543 }
21544 case RISCVISD::DIVUW: {
21545 KnownBits Known2;
21546 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21547 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21548 // We only care about the lower 32 bits.
21549 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21550 // Restore the original width by sign extending.
21551 Known = Known.sext(BitWidth);
21552 break;
21553 }
21554 case RISCVISD::SLLW: {
21555 KnownBits Known2;
21556 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21557 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21558 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21559 // Restore the original width by sign extending.
21560 Known = Known.sext(BitWidth);
21561 break;
21562 }
21563 case RISCVISD::SRLW: {
21564 KnownBits Known2;
21565 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21566 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21567 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21568 // Restore the original width by sign extending.
21569 Known = Known.sext(BitWidth);
21570 break;
21571 }
21572 case RISCVISD::SRAW: {
21573 KnownBits Known2;
21574 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21575 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21576 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21577 // Restore the original width by sign extending.
21578 Known = Known.sext(BitWidth);
21579 break;
21580 }
21581 case RISCVISD::CTZW: {
21582 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21583 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21584 unsigned LowBits = llvm::bit_width(PossibleTZ);
21585 Known.Zero.setBitsFrom(LowBits);
21586 break;
21587 }
21588 case RISCVISD::CLZW: {
21589 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21590 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21591 unsigned LowBits = llvm::bit_width(PossibleLZ);
21592 Known.Zero.setBitsFrom(LowBits);
21593 break;
21594 }
21595 case RISCVISD::BREV8:
21596 case RISCVISD::ORC_B: {
21597 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21598 // control value of 7 is equivalent to brev8 and orc.b.
21599 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21600 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21601 // To compute zeros for ORC_B, we need to invert the value and invert it
21602 // back after. This inverting is harmless for BREV8.
21603 Known.Zero =
21604 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21605 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21606 break;
21607 }
21608 case RISCVISD::READ_VLENB: {
21609 // We can use the minimum and maximum VLEN values to bound VLENB. We
21610 // know VLEN must be a power of two.
21611 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21612 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21613 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21614 Known.Zero.setLowBits(Log2_32(MinVLenB));
21615 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21616 if (MaxVLenB == MinVLenB)
21617 Known.One.setBit(Log2_32(MinVLenB));
21618 break;
21619 }
21620 case RISCVISD::FCLASS: {
21621 // fclass will only set one of the low 10 bits.
21622 Known.Zero.setBitsFrom(10);
21623 break;
21624 }
21627 unsigned IntNo =
21628 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21629 switch (IntNo) {
21630 default:
21631 // We can't do anything for most intrinsics.
21632 break;
21633 case Intrinsic::riscv_vsetvli:
21634 case Intrinsic::riscv_vsetvlimax: {
21635 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21636 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21637 RISCVVType::VLMUL VLMUL =
21638 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21639 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21640 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21641 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21642 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21643
21644 // Result of vsetvli must be not larger than AVL.
21645 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21646 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21647
21648 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21649 if (BitWidth > KnownZeroFirstBit)
21650 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21651 break;
21652 }
21653 }
21654 break;
21655 }
21656 }
21657}
21658
21660 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21661 unsigned Depth) const {
21662 switch (Op.getOpcode()) {
21663 default:
21664 break;
21665 case RISCVISD::SELECT_CC: {
21666 unsigned Tmp =
21667 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21668 if (Tmp == 1) return 1; // Early out.
21669 unsigned Tmp2 =
21670 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21671 return std::min(Tmp, Tmp2);
21672 }
21673 case RISCVISD::CZERO_EQZ:
21674 case RISCVISD::CZERO_NEZ:
21675 // Output is either all zero or operand 0. We can propagate sign bit count
21676 // from operand 0.
21677 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21678 case RISCVISD::ABSW: {
21679 // We expand this at isel to negw+max. The result will have 33 sign bits
21680 // if the input has at least 33 sign bits.
21681 unsigned Tmp =
21682 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21683 if (Tmp < 33) return 1;
21684 return 33;
21685 }
21686 case RISCVISD::SRAW: {
21687 unsigned Tmp =
21688 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21689 // sraw produces at least 33 sign bits. If the input already has more than
21690 // 33 sign bits sraw, will preserve them.
21691 // TODO: A more precise answer could be calculated depending on known bits
21692 // in the shift amount.
21693 return std::max(Tmp, 33U);
21694 }
21695 case RISCVISD::SLLW:
21696 case RISCVISD::SRLW:
21697 case RISCVISD::DIVW:
21698 case RISCVISD::DIVUW:
21699 case RISCVISD::REMUW:
21700 case RISCVISD::ROLW:
21701 case RISCVISD::RORW:
21702 case RISCVISD::FCVT_W_RV64:
21703 case RISCVISD::FCVT_WU_RV64:
21704 case RISCVISD::STRICT_FCVT_W_RV64:
21705 case RISCVISD::STRICT_FCVT_WU_RV64:
21706 // TODO: As the result is sign-extended, this is conservatively correct.
21707 return 33;
21708 case RISCVISD::VMV_X_S: {
21709 // The number of sign bits of the scalar result is computed by obtaining the
21710 // element type of the input vector operand, subtracting its width from the
21711 // XLEN, and then adding one (sign bit within the element type). If the
21712 // element type is wider than XLen, the least-significant XLEN bits are
21713 // taken.
21714 unsigned XLen = Subtarget.getXLen();
21715 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21716 if (EltBits <= XLen)
21717 return XLen - EltBits + 1;
21718 break;
21719 }
21721 unsigned IntNo = Op.getConstantOperandVal(1);
21722 switch (IntNo) {
21723 default:
21724 break;
21725 case Intrinsic::riscv_masked_atomicrmw_xchg:
21726 case Intrinsic::riscv_masked_atomicrmw_add:
21727 case Intrinsic::riscv_masked_atomicrmw_sub:
21728 case Intrinsic::riscv_masked_atomicrmw_nand:
21729 case Intrinsic::riscv_masked_atomicrmw_max:
21730 case Intrinsic::riscv_masked_atomicrmw_min:
21731 case Intrinsic::riscv_masked_atomicrmw_umax:
21732 case Intrinsic::riscv_masked_atomicrmw_umin:
21733 case Intrinsic::riscv_masked_cmpxchg:
21734 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21735 // narrow atomic operation. These are implemented using atomic
21736 // operations at the minimum supported atomicrmw/cmpxchg width whose
21737 // result is then sign extended to XLEN. With +A, the minimum width is
21738 // 32 for both 64 and 32.
21740 assert(Subtarget.hasStdExtA());
21741 return Op.getValueSizeInBits() - 31;
21742 }
21743 break;
21744 }
21745 }
21746
21747 return 1;
21748}
21749
21751 SDValue Op, const APInt &OriginalDemandedBits,
21752 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21753 unsigned Depth) const {
21754 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21755
21756 switch (Op.getOpcode()) {
21757 case RISCVISD::BREV8:
21758 case RISCVISD::ORC_B: {
21759 KnownBits Known2;
21760 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21761 // For BREV8, we need to do BREV8 on the demanded bits.
21762 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21763 // So we need to do ORC_B on the demanded bits.
21765 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21766 7, IsGORC));
21767 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21768 OriginalDemandedElts, Known2, TLO, Depth + 1))
21769 return true;
21770
21771 // To compute zeros for ORC_B, we need to invert the value and invert it
21772 // back after. This inverting is harmless for BREV8.
21773 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21774 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21775 return false;
21776 }
21777 }
21778
21780 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21781}
21782
21784 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21785 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21786
21787 // TODO: Add more target nodes.
21788 switch (Op.getOpcode()) {
21789 case RISCVISD::SLLW:
21790 case RISCVISD::SRAW:
21791 case RISCVISD::SRLW:
21792 case RISCVISD::RORW:
21793 case RISCVISD::ROLW:
21794 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21795 // amount is bounds.
21796 return false;
21797 case RISCVISD::SELECT_CC:
21798 // Integer comparisons cannot create poison.
21799 assert(Op.getOperand(0).getValueType().isInteger() &&
21800 "RISCVISD::SELECT_CC only compares integers");
21801 return false;
21802 }
21804 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21805}
21806
21807const Constant *
21809 assert(Ld && "Unexpected null LoadSDNode");
21810 if (!ISD::isNormalLoad(Ld))
21811 return nullptr;
21812
21813 SDValue Ptr = Ld->getBasePtr();
21814
21815 // Only constant pools with no offset are supported.
21816 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21817 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21818 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21819 CNode->getOffset() != 0)
21820 return nullptr;
21821
21822 return CNode;
21823 };
21824
21825 // Simple case, LLA.
21826 if (Ptr.getOpcode() == RISCVISD::LLA) {
21827 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21828 if (!CNode || CNode->getTargetFlags() != 0)
21829 return nullptr;
21830
21831 return CNode->getConstVal();
21832 }
21833
21834 // Look for a HI and ADD_LO pair.
21835 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21836 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21837 return nullptr;
21838
21839 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21840 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21841
21842 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21843 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21844 return nullptr;
21845
21846 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21847 return nullptr;
21848
21849 return CNodeLo->getConstVal();
21850}
21851
21853 MachineBasicBlock *BB) {
21854 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21855
21856 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21857 // Should the count have wrapped while it was being read, we need to try
21858 // again.
21859 // For example:
21860 // ```
21861 // read:
21862 // csrrs x3, counterh # load high word of counter
21863 // csrrs x2, counter # load low word of counter
21864 // csrrs x4, counterh # load high word of counter
21865 // bne x3, x4, read # check if high word reads match, otherwise try again
21866 // ```
21867
21868 MachineFunction &MF = *BB->getParent();
21869 const BasicBlock *LLVMBB = BB->getBasicBlock();
21871
21872 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21873 MF.insert(It, LoopMBB);
21874
21875 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21876 MF.insert(It, DoneMBB);
21877
21878 // Transfer the remainder of BB and its successor edges to DoneMBB.
21879 DoneMBB->splice(DoneMBB->begin(), BB,
21880 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21882
21883 BB->addSuccessor(LoopMBB);
21884
21886 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21887 Register LoReg = MI.getOperand(0).getReg();
21888 Register HiReg = MI.getOperand(1).getReg();
21889 int64_t LoCounter = MI.getOperand(2).getImm();
21890 int64_t HiCounter = MI.getOperand(3).getImm();
21891 DebugLoc DL = MI.getDebugLoc();
21892
21894 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21895 .addImm(HiCounter)
21896 .addReg(RISCV::X0);
21897 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21898 .addImm(LoCounter)
21899 .addReg(RISCV::X0);
21900 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21901 .addImm(HiCounter)
21902 .addReg(RISCV::X0);
21903
21904 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21905 .addReg(HiReg)
21906 .addReg(ReadAgainReg)
21907 .addMBB(LoopMBB);
21908
21909 LoopMBB->addSuccessor(LoopMBB);
21910 LoopMBB->addSuccessor(DoneMBB);
21911
21912 MI.eraseFromParent();
21913
21914 return DoneMBB;
21915}
21916
21919 const RISCVSubtarget &Subtarget) {
21920 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21921
21922 MachineFunction &MF = *BB->getParent();
21923 DebugLoc DL = MI.getDebugLoc();
21926 Register LoReg = MI.getOperand(0).getReg();
21927 Register HiReg = MI.getOperand(1).getReg();
21928 Register SrcReg = MI.getOperand(2).getReg();
21929
21930 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21931 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21932
21933 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21934 RI, Register());
21936 MachineMemOperand *MMOLo =
21940 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21941 .addFrameIndex(FI)
21942 .addImm(0)
21943 .addMemOperand(MMOLo);
21944 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21945 .addFrameIndex(FI)
21946 .addImm(4)
21947 .addMemOperand(MMOHi);
21948 MI.eraseFromParent(); // The pseudo instruction is gone now.
21949 return BB;
21950}
21951
21954 const RISCVSubtarget &Subtarget) {
21955 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21956 "Unexpected instruction");
21957
21958 MachineFunction &MF = *BB->getParent();
21959 DebugLoc DL = MI.getDebugLoc();
21962 Register DstReg = MI.getOperand(0).getReg();
21963 Register LoReg = MI.getOperand(1).getReg();
21964 Register HiReg = MI.getOperand(2).getReg();
21965
21966 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21967 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21968
21970 MachineMemOperand *MMOLo =
21974 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21975 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
21976 .addFrameIndex(FI)
21977 .addImm(0)
21978 .addMemOperand(MMOLo);
21979 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21980 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
21981 .addFrameIndex(FI)
21982 .addImm(4)
21983 .addMemOperand(MMOHi);
21984 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
21985 MI.eraseFromParent(); // The pseudo instruction is gone now.
21986 return BB;
21987}
21988
21990 unsigned RelOpcode, unsigned EqOpcode,
21991 const RISCVSubtarget &Subtarget) {
21992 DebugLoc DL = MI.getDebugLoc();
21993 Register DstReg = MI.getOperand(0).getReg();
21994 Register Src1Reg = MI.getOperand(1).getReg();
21995 Register Src2Reg = MI.getOperand(2).getReg();
21997 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
21999
22000 // Save the current FFLAGS.
22001 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22002
22003 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22004 .addReg(Src1Reg)
22005 .addReg(Src2Reg);
22008
22009 // Restore the FFLAGS.
22010 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22011 .addReg(SavedFFlags, RegState::Kill);
22012
22013 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22014 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22015 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22016 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22019
22020 // Erase the pseudoinstruction.
22021 MI.eraseFromParent();
22022 return BB;
22023}
22024
22025static MachineBasicBlock *
22027 MachineBasicBlock *ThisMBB,
22028 const RISCVSubtarget &Subtarget) {
22029 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22030 // Without this, custom-inserter would have generated:
22031 //
22032 // A
22033 // | \
22034 // | B
22035 // | /
22036 // C
22037 // | \
22038 // | D
22039 // | /
22040 // E
22041 //
22042 // A: X = ...; Y = ...
22043 // B: empty
22044 // C: Z = PHI [X, A], [Y, B]
22045 // D: empty
22046 // E: PHI [X, C], [Z, D]
22047 //
22048 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22049 //
22050 // A
22051 // | \
22052 // | C
22053 // | /|
22054 // |/ |
22055 // | |
22056 // | D
22057 // | /
22058 // E
22059 //
22060 // A: X = ...; Y = ...
22061 // D: empty
22062 // E: PHI [X, A], [X, C], [Y, D]
22063
22064 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22065 const DebugLoc &DL = First.getDebugLoc();
22066 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22067 MachineFunction *F = ThisMBB->getParent();
22068 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22069 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22070 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22071 MachineFunction::iterator It = ++ThisMBB->getIterator();
22072 F->insert(It, FirstMBB);
22073 F->insert(It, SecondMBB);
22074 F->insert(It, SinkMBB);
22075
22076 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22077 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22079 ThisMBB->end());
22080 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22081
22082 // Fallthrough block for ThisMBB.
22083 ThisMBB->addSuccessor(FirstMBB);
22084 // Fallthrough block for FirstMBB.
22085 FirstMBB->addSuccessor(SecondMBB);
22086 ThisMBB->addSuccessor(SinkMBB);
22087 FirstMBB->addSuccessor(SinkMBB);
22088 // This is fallthrough.
22089 SecondMBB->addSuccessor(SinkMBB);
22090
22091 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22092 Register FLHS = First.getOperand(1).getReg();
22093 Register FRHS = First.getOperand(2).getReg();
22094 // Insert appropriate branch.
22095 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22096 .addReg(FLHS)
22097 .addReg(FRHS)
22098 .addMBB(SinkMBB);
22099
22100 Register SLHS = Second.getOperand(1).getReg();
22101 Register SRHS = Second.getOperand(2).getReg();
22102 Register Op1Reg4 = First.getOperand(4).getReg();
22103 Register Op1Reg5 = First.getOperand(5).getReg();
22104
22105 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22106 // Insert appropriate branch.
22107 BuildMI(ThisMBB, DL,
22108 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22109 .addReg(SLHS)
22110 .addReg(SRHS)
22111 .addMBB(SinkMBB);
22112
22113 Register DestReg = Second.getOperand(0).getReg();
22114 Register Op2Reg4 = Second.getOperand(4).getReg();
22115 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22116 .addReg(Op2Reg4)
22117 .addMBB(ThisMBB)
22118 .addReg(Op1Reg4)
22119 .addMBB(FirstMBB)
22120 .addReg(Op1Reg5)
22121 .addMBB(SecondMBB);
22122
22123 // Now remove the Select_FPRX_s.
22124 First.eraseFromParent();
22125 Second.eraseFromParent();
22126 return SinkMBB;
22127}
22128
22131 const RISCVSubtarget &Subtarget) {
22132 // To "insert" Select_* instructions, we actually have to insert the triangle
22133 // control-flow pattern. The incoming instructions know the destination vreg
22134 // to set, the condition code register to branch on, the true/false values to
22135 // select between, and the condcode to use to select the appropriate branch.
22136 //
22137 // We produce the following control flow:
22138 // HeadMBB
22139 // | \
22140 // | IfFalseMBB
22141 // | /
22142 // TailMBB
22143 //
22144 // When we find a sequence of selects we attempt to optimize their emission
22145 // by sharing the control flow. Currently we only handle cases where we have
22146 // multiple selects with the exact same condition (same LHS, RHS and CC).
22147 // The selects may be interleaved with other instructions if the other
22148 // instructions meet some requirements we deem safe:
22149 // - They are not pseudo instructions.
22150 // - They are debug instructions. Otherwise,
22151 // - They do not have side-effects, do not access memory and their inputs do
22152 // not depend on the results of the select pseudo-instructions.
22153 // The TrueV/FalseV operands of the selects cannot depend on the result of
22154 // previous selects in the sequence.
22155 // These conditions could be further relaxed. See the X86 target for a
22156 // related approach and more information.
22157 //
22158 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22159 // is checked here and handled by a separate function -
22160 // EmitLoweredCascadedSelect.
22161
22162 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22163 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22164 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22165 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22166 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22167 Next->getOperand(5).isKill())
22168 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22169
22170 Register LHS = MI.getOperand(1).getReg();
22171 Register RHS;
22172 if (MI.getOperand(2).isReg())
22173 RHS = MI.getOperand(2).getReg();
22174 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22175
22176 SmallVector<MachineInstr *, 4> SelectDebugValues;
22177 SmallSet<Register, 4> SelectDests;
22178 SelectDests.insert(MI.getOperand(0).getReg());
22179
22180 MachineInstr *LastSelectPseudo = &MI;
22181 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22182 SequenceMBBI != E; ++SequenceMBBI) {
22183 if (SequenceMBBI->isDebugInstr())
22184 continue;
22185 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22186 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22187 !SequenceMBBI->getOperand(2).isReg() ||
22188 SequenceMBBI->getOperand(2).getReg() != RHS ||
22189 SequenceMBBI->getOperand(3).getImm() != CC ||
22190 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22191 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22192 break;
22193 LastSelectPseudo = &*SequenceMBBI;
22194 SequenceMBBI->collectDebugValues(SelectDebugValues);
22195 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22196 continue;
22197 }
22198 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22199 SequenceMBBI->mayLoadOrStore() ||
22200 SequenceMBBI->usesCustomInsertionHook())
22201 break;
22202 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22203 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22204 }))
22205 break;
22206 }
22207
22208 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22209 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22210 DebugLoc DL = MI.getDebugLoc();
22212
22213 MachineBasicBlock *HeadMBB = BB;
22214 MachineFunction *F = BB->getParent();
22215 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22216 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22217
22218 F->insert(I, IfFalseMBB);
22219 F->insert(I, TailMBB);
22220
22221 // Set the call frame size on entry to the new basic blocks.
22222 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22223 IfFalseMBB->setCallFrameSize(CallFrameSize);
22224 TailMBB->setCallFrameSize(CallFrameSize);
22225
22226 // Transfer debug instructions associated with the selects to TailMBB.
22227 for (MachineInstr *DebugInstr : SelectDebugValues) {
22228 TailMBB->push_back(DebugInstr->removeFromParent());
22229 }
22230
22231 // Move all instructions after the sequence to TailMBB.
22232 TailMBB->splice(TailMBB->end(), HeadMBB,
22233 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22234 // Update machine-CFG edges by transferring all successors of the current
22235 // block to the new block which will contain the Phi nodes for the selects.
22236 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22237 // Set the successors for HeadMBB.
22238 HeadMBB->addSuccessor(IfFalseMBB);
22239 HeadMBB->addSuccessor(TailMBB);
22240
22241 // Insert appropriate branch.
22242 if (MI.getOperand(2).isImm())
22243 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22244 .addReg(LHS)
22245 .addImm(MI.getOperand(2).getImm())
22246 .addMBB(TailMBB);
22247 else
22248 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22249 .addReg(LHS)
22250 .addReg(RHS)
22251 .addMBB(TailMBB);
22252
22253 // IfFalseMBB just falls through to TailMBB.
22254 IfFalseMBB->addSuccessor(TailMBB);
22255
22256 // Create PHIs for all of the select pseudo-instructions.
22257 auto SelectMBBI = MI.getIterator();
22258 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22259 auto InsertionPoint = TailMBB->begin();
22260 while (SelectMBBI != SelectEnd) {
22261 auto Next = std::next(SelectMBBI);
22262 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22263 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22264 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22265 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22266 .addReg(SelectMBBI->getOperand(4).getReg())
22267 .addMBB(HeadMBB)
22268 .addReg(SelectMBBI->getOperand(5).getReg())
22269 .addMBB(IfFalseMBB);
22270 SelectMBBI->eraseFromParent();
22271 }
22272 SelectMBBI = Next;
22273 }
22274
22275 F->getProperties().resetNoPHIs();
22276 return TailMBB;
22277}
22278
22279// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22280static const RISCV::RISCVMaskedPseudoInfo *
22281lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22283 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22284 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22286 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22287 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22288 return Masked;
22289}
22290
22293 unsigned CVTXOpc) {
22294 DebugLoc DL = MI.getDebugLoc();
22295
22297
22299 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22300
22301 // Save the old value of FFLAGS.
22302 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22303
22304 assert(MI.getNumOperands() == 7);
22305
22306 // Emit a VFCVT_X_F
22307 const TargetRegisterInfo *TRI =
22309 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22310 Register Tmp = MRI.createVirtualRegister(RC);
22311 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22312 .add(MI.getOperand(1))
22313 .add(MI.getOperand(2))
22314 .add(MI.getOperand(3))
22315 .add(MachineOperand::CreateImm(7)) // frm = DYN
22316 .add(MI.getOperand(4))
22317 .add(MI.getOperand(5))
22318 .add(MI.getOperand(6))
22319 .add(MachineOperand::CreateReg(RISCV::FRM,
22320 /*IsDef*/ false,
22321 /*IsImp*/ true));
22322
22323 // Emit a VFCVT_F_X
22324 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22325 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22326 // There is no E8 variant for VFCVT_F_X.
22327 assert(Log2SEW >= 4);
22328 unsigned CVTFOpc =
22329 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22330 ->MaskedPseudo;
22331
22332 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22333 .add(MI.getOperand(0))
22334 .add(MI.getOperand(1))
22335 .addReg(Tmp)
22336 .add(MI.getOperand(3))
22337 .add(MachineOperand::CreateImm(7)) // frm = DYN
22338 .add(MI.getOperand(4))
22339 .add(MI.getOperand(5))
22340 .add(MI.getOperand(6))
22341 .add(MachineOperand::CreateReg(RISCV::FRM,
22342 /*IsDef*/ false,
22343 /*IsImp*/ true));
22344
22345 // Restore FFLAGS.
22346 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22347 .addReg(SavedFFLAGS, RegState::Kill);
22348
22349 // Erase the pseudoinstruction.
22350 MI.eraseFromParent();
22351 return BB;
22352}
22353
22355 const RISCVSubtarget &Subtarget) {
22356 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22357 const TargetRegisterClass *RC;
22358 switch (MI.getOpcode()) {
22359 default:
22360 llvm_unreachable("Unexpected opcode");
22361 case RISCV::PseudoFROUND_H:
22362 CmpOpc = RISCV::FLT_H;
22363 F2IOpc = RISCV::FCVT_W_H;
22364 I2FOpc = RISCV::FCVT_H_W;
22365 FSGNJOpc = RISCV::FSGNJ_H;
22366 FSGNJXOpc = RISCV::FSGNJX_H;
22367 RC = &RISCV::FPR16RegClass;
22368 break;
22369 case RISCV::PseudoFROUND_H_INX:
22370 CmpOpc = RISCV::FLT_H_INX;
22371 F2IOpc = RISCV::FCVT_W_H_INX;
22372 I2FOpc = RISCV::FCVT_H_W_INX;
22373 FSGNJOpc = RISCV::FSGNJ_H_INX;
22374 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22375 RC = &RISCV::GPRF16RegClass;
22376 break;
22377 case RISCV::PseudoFROUND_S:
22378 CmpOpc = RISCV::FLT_S;
22379 F2IOpc = RISCV::FCVT_W_S;
22380 I2FOpc = RISCV::FCVT_S_W;
22381 FSGNJOpc = RISCV::FSGNJ_S;
22382 FSGNJXOpc = RISCV::FSGNJX_S;
22383 RC = &RISCV::FPR32RegClass;
22384 break;
22385 case RISCV::PseudoFROUND_S_INX:
22386 CmpOpc = RISCV::FLT_S_INX;
22387 F2IOpc = RISCV::FCVT_W_S_INX;
22388 I2FOpc = RISCV::FCVT_S_W_INX;
22389 FSGNJOpc = RISCV::FSGNJ_S_INX;
22390 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22391 RC = &RISCV::GPRF32RegClass;
22392 break;
22393 case RISCV::PseudoFROUND_D:
22394 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22395 CmpOpc = RISCV::FLT_D;
22396 F2IOpc = RISCV::FCVT_L_D;
22397 I2FOpc = RISCV::FCVT_D_L;
22398 FSGNJOpc = RISCV::FSGNJ_D;
22399 FSGNJXOpc = RISCV::FSGNJX_D;
22400 RC = &RISCV::FPR64RegClass;
22401 break;
22402 case RISCV::PseudoFROUND_D_INX:
22403 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22404 CmpOpc = RISCV::FLT_D_INX;
22405 F2IOpc = RISCV::FCVT_L_D_INX;
22406 I2FOpc = RISCV::FCVT_D_L_INX;
22407 FSGNJOpc = RISCV::FSGNJ_D_INX;
22408 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22409 RC = &RISCV::GPRRegClass;
22410 break;
22411 }
22412
22413 const BasicBlock *BB = MBB->getBasicBlock();
22414 DebugLoc DL = MI.getDebugLoc();
22415 MachineFunction::iterator I = ++MBB->getIterator();
22416
22417 MachineFunction *F = MBB->getParent();
22418 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22419 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22420
22421 F->insert(I, CvtMBB);
22422 F->insert(I, DoneMBB);
22423 // Move all instructions after the sequence to DoneMBB.
22424 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22425 MBB->end());
22426 // Update machine-CFG edges by transferring all successors of the current
22427 // block to the new block which will contain the Phi nodes for the selects.
22429 // Set the successors for MBB.
22430 MBB->addSuccessor(CvtMBB);
22431 MBB->addSuccessor(DoneMBB);
22432
22433 Register DstReg = MI.getOperand(0).getReg();
22434 Register SrcReg = MI.getOperand(1).getReg();
22435 Register MaxReg = MI.getOperand(2).getReg();
22436 int64_t FRM = MI.getOperand(3).getImm();
22437
22438 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22439 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22440
22441 Register FabsReg = MRI.createVirtualRegister(RC);
22442 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22443
22444 // Compare the FP value to the max value.
22445 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22446 auto MIB =
22447 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22450
22451 // Insert branch.
22452 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22453 .addReg(CmpReg)
22454 .addReg(RISCV::X0)
22455 .addMBB(DoneMBB);
22456
22457 CvtMBB->addSuccessor(DoneMBB);
22458
22459 // Convert to integer.
22460 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22461 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22464
22465 // Convert back to FP.
22466 Register I2FReg = MRI.createVirtualRegister(RC);
22467 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22470
22471 // Restore the sign bit.
22472 Register CvtReg = MRI.createVirtualRegister(RC);
22473 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22474
22475 // Merge the results.
22476 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22477 .addReg(SrcReg)
22478 .addMBB(MBB)
22479 .addReg(CvtReg)
22480 .addMBB(CvtMBB);
22481
22482 MI.eraseFromParent();
22483 return DoneMBB;
22484}
22485
22488 MachineBasicBlock *BB) const {
22489 switch (MI.getOpcode()) {
22490 default:
22491 llvm_unreachable("Unexpected instr type to insert");
22492 case RISCV::ReadCounterWide:
22493 assert(!Subtarget.is64Bit() &&
22494 "ReadCounterWide is only to be used on riscv32");
22495 return emitReadCounterWidePseudo(MI, BB);
22496 case RISCV::Select_GPR_Using_CC_GPR:
22497 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22498 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22499 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22500 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22501 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22502 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22503 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22504 case RISCV::Select_FPR16_Using_CC_GPR:
22505 case RISCV::Select_FPR16INX_Using_CC_GPR:
22506 case RISCV::Select_FPR32_Using_CC_GPR:
22507 case RISCV::Select_FPR32INX_Using_CC_GPR:
22508 case RISCV::Select_FPR64_Using_CC_GPR:
22509 case RISCV::Select_FPR64INX_Using_CC_GPR:
22510 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22511 return emitSelectPseudo(MI, BB, Subtarget);
22512 case RISCV::BuildPairF64Pseudo:
22513 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22514 case RISCV::SplitF64Pseudo:
22515 return emitSplitF64Pseudo(MI, BB, Subtarget);
22516 case RISCV::PseudoQuietFLE_H:
22517 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22518 case RISCV::PseudoQuietFLE_H_INX:
22519 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22520 case RISCV::PseudoQuietFLT_H:
22521 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22522 case RISCV::PseudoQuietFLT_H_INX:
22523 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22524 case RISCV::PseudoQuietFLE_S:
22525 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22526 case RISCV::PseudoQuietFLE_S_INX:
22527 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22528 case RISCV::PseudoQuietFLT_S:
22529 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22530 case RISCV::PseudoQuietFLT_S_INX:
22531 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22532 case RISCV::PseudoQuietFLE_D:
22533 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22534 case RISCV::PseudoQuietFLE_D_INX:
22535 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22536 case RISCV::PseudoQuietFLE_D_IN32X:
22537 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22538 Subtarget);
22539 case RISCV::PseudoQuietFLT_D:
22540 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22541 case RISCV::PseudoQuietFLT_D_INX:
22542 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22543 case RISCV::PseudoQuietFLT_D_IN32X:
22544 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22545 Subtarget);
22546
22547 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22548 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22549 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22550 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22551 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22552 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22553 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22554 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22555 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22556 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22557 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22558 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22559 case RISCV::PseudoFROUND_H:
22560 case RISCV::PseudoFROUND_H_INX:
22561 case RISCV::PseudoFROUND_S:
22562 case RISCV::PseudoFROUND_S_INX:
22563 case RISCV::PseudoFROUND_D:
22564 case RISCV::PseudoFROUND_D_INX:
22565 case RISCV::PseudoFROUND_D_IN32X:
22566 return emitFROUND(MI, BB, Subtarget);
22567 case RISCV::PROBED_STACKALLOC_DYN:
22568 return emitDynamicProbedAlloc(MI, BB);
22569 case TargetOpcode::STATEPOINT:
22570 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22571 // while jal call instruction (where statepoint will be lowered at the end)
22572 // has implicit def. This def is early-clobber as it will be set at
22573 // the moment of the call and earlier than any use is read.
22574 // Add this implicit dead def here as a workaround.
22575 MI.addOperand(*MI.getMF(),
22577 RISCV::X1, /*isDef*/ true,
22578 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22579 /*isUndef*/ false, /*isEarlyClobber*/ true));
22580 [[fallthrough]];
22581 case TargetOpcode::STACKMAP:
22582 case TargetOpcode::PATCHPOINT:
22583 if (!Subtarget.is64Bit())
22584 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22585 "supported on 64-bit targets");
22586 return emitPatchPoint(MI, BB);
22587 }
22588}
22589
22591 SDNode *Node) const {
22592 // If instruction defines FRM operand, conservatively set it as non-dead to
22593 // express data dependency with FRM users and prevent incorrect instruction
22594 // reordering.
22595 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22596 FRMDef->setIsDead(false);
22597 return;
22598 }
22599 // Add FRM dependency to any instructions with dynamic rounding mode.
22600 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22601 if (Idx < 0) {
22602 // Vector pseudos have FRM index indicated by TSFlags.
22603 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22604 if (Idx < 0)
22605 return;
22606 }
22607 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22608 return;
22609 // If the instruction already reads FRM, don't add another read.
22610 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22611 return;
22612 MI.addOperand(
22613 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22614}
22615
22616void RISCVTargetLowering::analyzeInputArgs(
22617 MachineFunction &MF, CCState &CCInfo,
22618 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22619 RISCVCCAssignFn Fn) const {
22620 for (const auto &[Idx, In] : enumerate(Ins)) {
22621 MVT ArgVT = In.VT;
22622 ISD::ArgFlagsTy ArgFlags = In.Flags;
22623
22624 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22625 In.OrigTy)) {
22626 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22627 << ArgVT << '\n');
22628 llvm_unreachable(nullptr);
22629 }
22630 }
22631}
22632
22633void RISCVTargetLowering::analyzeOutputArgs(
22634 MachineFunction &MF, CCState &CCInfo,
22635 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22636 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22637 for (const auto &[Idx, Out] : enumerate(Outs)) {
22638 MVT ArgVT = Out.VT;
22639 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22640
22641 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22642 Out.OrigTy)) {
22643 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22644 << ArgVT << "\n");
22645 llvm_unreachable(nullptr);
22646 }
22647 }
22648}
22649
22650// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22651// values.
22653 const CCValAssign &VA, const SDLoc &DL,
22654 const RISCVSubtarget &Subtarget) {
22655 if (VA.needsCustom()) {
22656 if (VA.getLocVT().isInteger() &&
22657 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22658 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22659 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22660 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22662 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22663 llvm_unreachable("Unexpected Custom handling.");
22664 }
22665
22666 switch (VA.getLocInfo()) {
22667 default:
22668 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22669 case CCValAssign::Full:
22670 break;
22671 case CCValAssign::BCvt:
22672 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22673 break;
22674 }
22675 return Val;
22676}
22677
22678// The caller is responsible for loading the full value if the argument is
22679// passed with CCValAssign::Indirect.
22681 const CCValAssign &VA, const SDLoc &DL,
22682 const ISD::InputArg &In,
22683 const RISCVTargetLowering &TLI) {
22686 EVT LocVT = VA.getLocVT();
22687 SDValue Val;
22688 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22689 Register VReg = RegInfo.createVirtualRegister(RC);
22690 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22691 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22692
22693 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22694 if (In.isOrigArg()) {
22695 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22696 if (OrigArg->getType()->isIntegerTy()) {
22697 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22698 // An input zero extended from i31 can also be considered sign extended.
22699 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22700 (BitWidth < 32 && In.Flags.isZExt())) {
22702 RVFI->addSExt32Register(VReg);
22703 }
22704 }
22705 }
22706
22708 return Val;
22709
22710 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22711}
22712
22714 const CCValAssign &VA, const SDLoc &DL,
22715 const RISCVSubtarget &Subtarget) {
22716 EVT LocVT = VA.getLocVT();
22717
22718 if (VA.needsCustom()) {
22719 if (LocVT.isInteger() &&
22720 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22721 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22722 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22723 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22724 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22725 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22726 llvm_unreachable("Unexpected Custom handling.");
22727 }
22728
22729 switch (VA.getLocInfo()) {
22730 default:
22731 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22732 case CCValAssign::Full:
22733 break;
22734 case CCValAssign::BCvt:
22735 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22736 break;
22737 }
22738 return Val;
22739}
22740
22741// The caller is responsible for loading the full value if the argument is
22742// passed with CCValAssign::Indirect.
22744 const CCValAssign &VA, const SDLoc &DL) {
22746 MachineFrameInfo &MFI = MF.getFrameInfo();
22747 EVT LocVT = VA.getLocVT();
22748 EVT ValVT = VA.getValVT();
22750 if (VA.getLocInfo() == CCValAssign::Indirect) {
22751 // When the value is a scalable vector, we save the pointer which points to
22752 // the scalable vector value in the stack. The ValVT will be the pointer
22753 // type, instead of the scalable vector type.
22754 ValVT = LocVT;
22755 }
22756 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22757 /*IsImmutable=*/true);
22758 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22759 SDValue Val;
22760
22762 switch (VA.getLocInfo()) {
22763 default:
22764 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22765 case CCValAssign::Full:
22767 case CCValAssign::BCvt:
22768 break;
22769 }
22770 Val = DAG.getExtLoad(
22771 ExtType, DL, LocVT, Chain, FIN,
22773 return Val;
22774}
22775
22777 const CCValAssign &VA,
22778 const CCValAssign &HiVA,
22779 const SDLoc &DL) {
22780 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22781 "Unexpected VA");
22783 MachineFrameInfo &MFI = MF.getFrameInfo();
22785
22786 assert(VA.isRegLoc() && "Expected register VA assignment");
22787
22788 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22789 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22790 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22791 SDValue Hi;
22792 if (HiVA.isMemLoc()) {
22793 // Second half of f64 is passed on the stack.
22794 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22795 /*IsImmutable=*/true);
22796 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22797 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22799 } else {
22800 // Second half of f64 is passed in another GPR.
22801 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22802 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22803 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22804 }
22805 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22806}
22807
22808// Transform physical registers into virtual registers.
22810 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22811 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22812 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22813
22815
22816 switch (CallConv) {
22817 default:
22818 reportFatalUsageError("Unsupported calling convention");
22819 case CallingConv::C:
22820 case CallingConv::Fast:
22823 case CallingConv::GRAAL:
22825#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22826 CC_VLS_CASE(32)
22827 CC_VLS_CASE(64)
22828 CC_VLS_CASE(128)
22829 CC_VLS_CASE(256)
22830 CC_VLS_CASE(512)
22831 CC_VLS_CASE(1024)
22832 CC_VLS_CASE(2048)
22833 CC_VLS_CASE(4096)
22834 CC_VLS_CASE(8192)
22835 CC_VLS_CASE(16384)
22836 CC_VLS_CASE(32768)
22837 CC_VLS_CASE(65536)
22838#undef CC_VLS_CASE
22839 break;
22840 case CallingConv::GHC:
22841 if (Subtarget.hasStdExtE())
22842 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22843 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22844 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22845 "(Zdinx/D) instruction set extensions");
22846 }
22847
22848 const Function &Func = MF.getFunction();
22849 if (Func.hasFnAttribute("interrupt")) {
22850 if (!Func.arg_empty())
22852 "Functions with the interrupt attribute cannot have arguments!");
22853
22854 StringRef Kind =
22855 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22856
22857 constexpr StringLiteral SupportedInterruptKinds[] = {
22858 "machine",
22859 "supervisor",
22860 "rnmi",
22861 "qci-nest",
22862 "qci-nonest",
22863 "SiFive-CLIC-preemptible",
22864 "SiFive-CLIC-stack-swap",
22865 "SiFive-CLIC-preemptible-stack-swap",
22866 };
22867 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22869 "Function interrupt attribute argument not supported!");
22870
22871 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22873 "'qci-*' interrupt kinds require Xqciint extension");
22874
22875 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22877 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22878
22879 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22880 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22881 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22882 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22883 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22884 "have a frame pointer");
22885 }
22886
22887 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22888 MVT XLenVT = Subtarget.getXLenVT();
22889 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22890 // Used with vargs to accumulate store chains.
22891 std::vector<SDValue> OutChains;
22892
22893 // Assign locations to all of the incoming arguments.
22895 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22896
22897 if (CallConv == CallingConv::GHC)
22899 else
22900 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22902 : CC_RISCV);
22903
22904 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22905 CCValAssign &VA = ArgLocs[i];
22906 SDValue ArgValue;
22907 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22908 // case.
22909 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22910 assert(VA.needsCustom());
22911 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22912 } else if (VA.isRegLoc())
22913 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22914 else
22915 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22916
22917 if (VA.getLocInfo() == CCValAssign::Indirect) {
22918 // If the original argument was split and passed by reference (e.g. i128
22919 // on RV32), we need to load all parts of it here (using the same
22920 // address). Vectors may be partly split to registers and partly to the
22921 // stack, in which case the base address is partly offset and subsequent
22922 // stores are relative to that.
22923 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22925 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22926 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22927 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22928 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22929 CCValAssign &PartVA = ArgLocs[i + 1];
22930 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22931 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22932 if (PartVA.getValVT().isScalableVector())
22933 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22934 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22935 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22937 ++i;
22938 ++InsIdx;
22939 }
22940 continue;
22941 }
22942 InVals.push_back(ArgValue);
22943 }
22944
22945 if (any_of(ArgLocs,
22946 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22948
22949 if (IsVarArg) {
22950 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22951 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22952 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22953 MachineFrameInfo &MFI = MF.getFrameInfo();
22954 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22956
22957 // Size of the vararg save area. For now, the varargs save area is either
22958 // zero or large enough to hold a0-a7.
22959 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22960 int FI;
22961
22962 // If all registers are allocated, then all varargs must be passed on the
22963 // stack and we don't need to save any argregs.
22964 if (VarArgsSaveSize == 0) {
22965 int VaArgOffset = CCInfo.getStackSize();
22966 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
22967 } else {
22968 int VaArgOffset = -VarArgsSaveSize;
22969 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
22970
22971 // If saving an odd number of registers then create an extra stack slot to
22972 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
22973 // offsets to even-numbered registered remain 2*XLEN-aligned.
22974 if (Idx % 2) {
22976 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
22977 VarArgsSaveSize += XLenInBytes;
22978 }
22979
22980 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22981
22982 // Copy the integer registers that may have been used for passing varargs
22983 // to the vararg save area.
22984 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22985 const Register Reg = RegInfo.createVirtualRegister(RC);
22986 RegInfo.addLiveIn(ArgRegs[I], Reg);
22987 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
22988 SDValue Store = DAG.getStore(
22989 Chain, DL, ArgValue, FIN,
22990 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
22991 OutChains.push_back(Store);
22992 FIN =
22993 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
22994 }
22995 }
22996
22997 // Record the frame index of the first variable argument
22998 // which is a value necessary to VASTART.
22999 RVFI->setVarArgsFrameIndex(FI);
23000 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23001 }
23002
23003 // All stores are grouped in one node to allow the matching between
23004 // the size of Ins and InVals. This only happens for vararg functions.
23005 if (!OutChains.empty()) {
23006 OutChains.push_back(Chain);
23007 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23008 }
23009
23010 return Chain;
23011}
23012
23013/// isEligibleForTailCallOptimization - Check whether the call is eligible
23014/// for tail call optimization.
23015/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23016bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23017 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23018 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23019
23020 auto CalleeCC = CLI.CallConv;
23021 auto &Outs = CLI.Outs;
23022 auto &Caller = MF.getFunction();
23023 auto CallerCC = Caller.getCallingConv();
23024
23025 // Exception-handling functions need a special set of instructions to
23026 // indicate a return to the hardware. Tail-calling another function would
23027 // probably break this.
23028 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23029 // should be expanded as new function attributes are introduced.
23030 if (Caller.hasFnAttribute("interrupt"))
23031 return false;
23032
23033 // Do not tail call opt if the stack is used to pass parameters.
23034 if (CCInfo.getStackSize() != 0)
23035 return false;
23036
23037 // Do not tail call opt if any parameters need to be passed indirectly.
23038 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23039 // passed indirectly. So the address of the value will be passed in a
23040 // register, or if not available, then the address is put on the stack. In
23041 // order to pass indirectly, space on the stack often needs to be allocated
23042 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23043 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23044 // are passed CCValAssign::Indirect.
23045 for (auto &VA : ArgLocs)
23046 if (VA.getLocInfo() == CCValAssign::Indirect)
23047 return false;
23048
23049 // Do not tail call opt if either caller or callee uses struct return
23050 // semantics.
23051 auto IsCallerStructRet = Caller.hasStructRetAttr();
23052 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23053 if (IsCallerStructRet || IsCalleeStructRet)
23054 return false;
23055
23056 // The callee has to preserve all registers the caller needs to preserve.
23057 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23058 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23059 if (CalleeCC != CallerCC) {
23060 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23061 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23062 return false;
23063 }
23064
23065 // Byval parameters hand the function a pointer directly into the stack area
23066 // we want to reuse during a tail call. Working around this *is* possible
23067 // but less efficient and uglier in LowerCall.
23068 for (auto &Arg : Outs)
23069 if (Arg.Flags.isByVal())
23070 return false;
23071
23072 return true;
23073}
23074
23076 return DAG.getDataLayout().getPrefTypeAlign(
23077 VT.getTypeForEVT(*DAG.getContext()));
23078}
23079
23080// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23081// and output parameter nodes.
23083 SmallVectorImpl<SDValue> &InVals) const {
23084 SelectionDAG &DAG = CLI.DAG;
23085 SDLoc &DL = CLI.DL;
23087 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23089 SDValue Chain = CLI.Chain;
23090 SDValue Callee = CLI.Callee;
23091 bool &IsTailCall = CLI.IsTailCall;
23092 CallingConv::ID CallConv = CLI.CallConv;
23093 bool IsVarArg = CLI.IsVarArg;
23094 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23095 MVT XLenVT = Subtarget.getXLenVT();
23096 const CallBase *CB = CLI.CB;
23097
23100
23101 // Set type id for call site info.
23102 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23103 CSInfo = MachineFunction::CallSiteInfo(*CB);
23104
23105 // Analyze the operands of the call, assigning locations to each operand.
23107 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23108
23109 if (CallConv == CallingConv::GHC) {
23110 if (Subtarget.hasStdExtE())
23111 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23112 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23113 } else
23114 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23116 : CC_RISCV);
23117
23118 // Check if it's really possible to do a tail call.
23119 if (IsTailCall)
23120 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23121
23122 if (IsTailCall)
23123 ++NumTailCalls;
23124 else if (CLI.CB && CLI.CB->isMustTailCall())
23125 reportFatalInternalError("failed to perform tail call elimination on a "
23126 "call site marked musttail");
23127
23128 // Get a count of how many bytes are to be pushed on the stack.
23129 unsigned NumBytes = ArgCCInfo.getStackSize();
23130
23131 // Create local copies for byval args
23132 SmallVector<SDValue, 8> ByValArgs;
23133 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23134 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23135 if (!Flags.isByVal())
23136 continue;
23137
23138 SDValue Arg = OutVals[i];
23139 unsigned Size = Flags.getByValSize();
23140 Align Alignment = Flags.getNonZeroByValAlign();
23141
23142 int FI =
23143 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23144 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23145 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23146
23147 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23148 /*IsVolatile=*/false,
23149 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23151 ByValArgs.push_back(FIPtr);
23152 }
23153
23154 if (!IsTailCall)
23155 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23156
23157 // Copy argument values to their designated locations.
23159 SmallVector<SDValue, 8> MemOpChains;
23160 SDValue StackPtr;
23161 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23162 ++i, ++OutIdx) {
23163 CCValAssign &VA = ArgLocs[i];
23164 SDValue ArgValue = OutVals[OutIdx];
23165 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23166
23167 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23168 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23169 assert(VA.isRegLoc() && "Expected register VA assignment");
23170 assert(VA.needsCustom());
23171 SDValue SplitF64 = DAG.getNode(
23172 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23173 SDValue Lo = SplitF64.getValue(0);
23174 SDValue Hi = SplitF64.getValue(1);
23175
23176 Register RegLo = VA.getLocReg();
23177 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23178
23179 // Get the CCValAssign for the Hi part.
23180 CCValAssign &HiVA = ArgLocs[++i];
23181
23182 if (HiVA.isMemLoc()) {
23183 // Second half of f64 is passed on the stack.
23184 if (!StackPtr.getNode())
23185 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23187 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23188 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23189 // Emit the store.
23190 MemOpChains.push_back(DAG.getStore(
23191 Chain, DL, Hi, Address,
23193 } else {
23194 // Second half of f64 is passed in another GPR.
23195 Register RegHigh = HiVA.getLocReg();
23196 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23197 }
23198 continue;
23199 }
23200
23201 // Promote the value if needed.
23202 // For now, only handle fully promoted and indirect arguments.
23203 if (VA.getLocInfo() == CCValAssign::Indirect) {
23204 // Store the argument in a stack slot and pass its address.
23205 Align StackAlign =
23206 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23207 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23208 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23209 // If the original argument was split (e.g. i128), we need
23210 // to store the required parts of it here (and pass just one address).
23211 // Vectors may be partly split to registers and partly to the stack, in
23212 // which case the base address is partly offset and subsequent stores are
23213 // relative to that.
23214 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23215 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23216 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23217 // Calculate the total size to store. We don't have access to what we're
23218 // actually storing other than performing the loop and collecting the
23219 // info.
23221 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23222 SDValue PartValue = OutVals[OutIdx + 1];
23223 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23224 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23225 EVT PartVT = PartValue.getValueType();
23226 if (PartVT.isScalableVector())
23227 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23228 StoredSize += PartVT.getStoreSize();
23229 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23230 Parts.push_back(std::make_pair(PartValue, Offset));
23231 ++i;
23232 ++OutIdx;
23233 }
23234 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23235 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23236 MemOpChains.push_back(
23237 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23239 for (const auto &Part : Parts) {
23240 SDValue PartValue = Part.first;
23241 SDValue PartOffset = Part.second;
23243 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23244 MemOpChains.push_back(
23245 DAG.getStore(Chain, DL, PartValue, Address,
23247 }
23248 ArgValue = SpillSlot;
23249 } else {
23250 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23251 }
23252
23253 // Use local copy if it is a byval arg.
23254 if (Flags.isByVal())
23255 ArgValue = ByValArgs[j++];
23256
23257 if (VA.isRegLoc()) {
23258 // Queue up the argument copies and emit them at the end.
23259 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23260 } else {
23261 assert(VA.isMemLoc() && "Argument not register or memory");
23262 assert(!IsTailCall && "Tail call not allowed if stack is used "
23263 "for passing parameters");
23264
23265 // Work out the address of the stack slot.
23266 if (!StackPtr.getNode())
23267 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23269 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23271
23272 // Emit the store.
23273 MemOpChains.push_back(
23274 DAG.getStore(Chain, DL, ArgValue, Address,
23276 }
23277 }
23278
23279 // Join the stores, which are independent of one another.
23280 if (!MemOpChains.empty())
23281 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23282
23283 SDValue Glue;
23284
23285 // Build a sequence of copy-to-reg nodes, chained and glued together.
23286 for (auto &Reg : RegsToPass) {
23287 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23288 Glue = Chain.getValue(1);
23289 }
23290
23291 // Validate that none of the argument registers have been marked as
23292 // reserved, if so report an error. Do the same for the return address if this
23293 // is not a tailcall.
23294 validateCCReservedRegs(RegsToPass, MF);
23295 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23297 MF.getFunction(),
23298 "Return address register required, but has been reserved."});
23299
23300 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23301 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23302 // split it and then direct call can be matched by PseudoCALL.
23303 bool CalleeIsLargeExternalSymbol = false;
23305 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23306 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23307 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23308 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23309 CalleeIsLargeExternalSymbol = true;
23310 }
23311 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23312 const GlobalValue *GV = S->getGlobal();
23313 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23314 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23315 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23316 }
23317
23318 // The first call operand is the chain and the second is the target address.
23320 Ops.push_back(Chain);
23321 Ops.push_back(Callee);
23322
23323 // Add argument registers to the end of the list so that they are
23324 // known live into the call.
23325 for (auto &Reg : RegsToPass)
23326 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23327
23328 // Add a register mask operand representing the call-preserved registers.
23329 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23330 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23331 assert(Mask && "Missing call preserved mask for calling convention");
23332 Ops.push_back(DAG.getRegisterMask(Mask));
23333
23334 // Glue the call to the argument copies, if any.
23335 if (Glue.getNode())
23336 Ops.push_back(Glue);
23337
23338 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23339 "Unexpected CFI type for a direct call");
23340
23341 // Emit the call.
23342 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23343
23344 // Use software guarded branch for large code model non-indirect calls
23345 // Tail call to external symbol will have a null CLI.CB and we need another
23346 // way to determine the callsite type
23347 bool NeedSWGuarded = false;
23349 Subtarget.hasStdExtZicfilp() &&
23350 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23351 NeedSWGuarded = true;
23352
23353 if (IsTailCall) {
23355 unsigned CallOpc =
23356 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23357 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23358 if (CLI.CFIType)
23359 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23360 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23361 if (MF.getTarget().Options.EmitCallGraphSection && CB &&
23362 CB->isIndirectCall())
23363 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23364 return Ret;
23365 }
23366
23367 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23368 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23369 if (CLI.CFIType)
23370 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23371
23372 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23373 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23374
23375 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23376 Glue = Chain.getValue(1);
23377
23378 // Mark the end of the call, which is glued to the call itself.
23379 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23380 Glue = Chain.getValue(1);
23381
23382 // Assign locations to each value returned by this call.
23384 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23385 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23386
23387 // Copy all of the result registers out of their specified physreg.
23388 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23389 auto &VA = RVLocs[i];
23390 // Copy the value out
23391 SDValue RetValue =
23392 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23393 // Glue the RetValue to the end of the call sequence
23394 Chain = RetValue.getValue(1);
23395 Glue = RetValue.getValue(2);
23396
23397 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23398 assert(VA.needsCustom());
23399 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23400 MVT::i32, Glue);
23401 Chain = RetValue2.getValue(1);
23402 Glue = RetValue2.getValue(2);
23403 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23404 RetValue2);
23405 } else
23406 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23407
23408 InVals.push_back(RetValue);
23409 }
23410
23411 return Chain;
23412}
23413
23415 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23416 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23417 const Type *RetTy) const {
23419 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23420
23421 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23422 MVT VT = Outs[i].VT;
23423 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23424 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23425 /*IsRet=*/true, Outs[i].OrigTy))
23426 return false;
23427 }
23428 return true;
23429}
23430
23431SDValue
23433 bool IsVarArg,
23435 const SmallVectorImpl<SDValue> &OutVals,
23436 const SDLoc &DL, SelectionDAG &DAG) const {
23438 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23439
23440 // Stores the assignment of the return value to a location.
23442
23443 // Info about the registers and stack slot.
23444 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23445 *DAG.getContext());
23446
23447 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23448 nullptr, CC_RISCV);
23449
23450 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23451 reportFatalUsageError("GHC functions return void only");
23452
23453 SDValue Glue;
23454 SmallVector<SDValue, 4> RetOps(1, Chain);
23455
23456 // Copy the result values into the output registers.
23457 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23458 SDValue Val = OutVals[OutIdx];
23459 CCValAssign &VA = RVLocs[i];
23460 assert(VA.isRegLoc() && "Can only return in registers!");
23461
23462 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23463 // Handle returning f64 on RV32D with a soft float ABI.
23464 assert(VA.isRegLoc() && "Expected return via registers");
23465 assert(VA.needsCustom());
23466 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23467 DAG.getVTList(MVT::i32, MVT::i32), Val);
23468 SDValue Lo = SplitF64.getValue(0);
23469 SDValue Hi = SplitF64.getValue(1);
23470 Register RegLo = VA.getLocReg();
23471 Register RegHi = RVLocs[++i].getLocReg();
23472
23473 if (STI.isRegisterReservedByUser(RegLo) ||
23474 STI.isRegisterReservedByUser(RegHi))
23476 MF.getFunction(),
23477 "Return value register required, but has been reserved."});
23478
23479 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23480 Glue = Chain.getValue(1);
23481 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23482 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23483 Glue = Chain.getValue(1);
23484 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23485 } else {
23486 // Handle a 'normal' return.
23487 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23488 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23489
23490 if (STI.isRegisterReservedByUser(VA.getLocReg()))
23492 MF.getFunction(),
23493 "Return value register required, but has been reserved."});
23494
23495 // Guarantee that all emitted copies are stuck together.
23496 Glue = Chain.getValue(1);
23497 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23498 }
23499 }
23500
23501 RetOps[0] = Chain; // Update chain.
23502
23503 // Add the glue node if we have it.
23504 if (Glue.getNode()) {
23505 RetOps.push_back(Glue);
23506 }
23507
23508 if (any_of(RVLocs,
23509 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23511
23512 unsigned RetOpc = RISCVISD::RET_GLUE;
23513 // Interrupt service routines use different return instructions.
23514 const Function &Func = DAG.getMachineFunction().getFunction();
23515 if (Func.hasFnAttribute("interrupt")) {
23516 if (!Func.getReturnType()->isVoidTy())
23518 "Functions with the interrupt attribute must have void return type!");
23519
23521 StringRef Kind =
23522 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23523
23524 if (Kind == "supervisor")
23525 RetOpc = RISCVISD::SRET_GLUE;
23526 else if (Kind == "rnmi") {
23527 assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23528 "Need Smrnmi extension for rnmi");
23529 RetOpc = RISCVISD::MNRET_GLUE;
23530 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23531 assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
23532 "Need Xqciint for qci-(no)nest");
23533 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23534 } else
23535 RetOpc = RISCVISD::MRET_GLUE;
23536 }
23537
23538 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23539}
23540
23541void RISCVTargetLowering::validateCCReservedRegs(
23542 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23543 MachineFunction &MF) const {
23544 const Function &F = MF.getFunction();
23545 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23546
23547 if (llvm::any_of(Regs, [&STI](auto Reg) {
23548 return STI.isRegisterReservedByUser(Reg.first);
23549 }))
23550 F.getContext().diagnose(DiagnosticInfoUnsupported{
23551 F, "Argument register required, but has been reserved."});
23552}
23553
23554// Check if the result of the node is only used as a return value, as
23555// otherwise we can't perform a tail-call.
23557 if (N->getNumValues() != 1)
23558 return false;
23559 if (!N->hasNUsesOfValue(1, 0))
23560 return false;
23561
23562 SDNode *Copy = *N->user_begin();
23563
23564 if (Copy->getOpcode() == ISD::BITCAST) {
23565 return isUsedByReturnOnly(Copy, Chain);
23566 }
23567
23568 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23569 // with soft float ABIs.
23570 if (Copy->getOpcode() != ISD::CopyToReg) {
23571 return false;
23572 }
23573
23574 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23575 // isn't safe to perform a tail call.
23576 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23577 return false;
23578
23579 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23580 bool HasRet = false;
23581 for (SDNode *Node : Copy->users()) {
23582 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23583 return false;
23584 HasRet = true;
23585 }
23586 if (!HasRet)
23587 return false;
23588
23589 Chain = Copy->getOperand(0);
23590 return true;
23591}
23592
23594 return CI->isTailCall();
23595}
23596
23597/// getConstraintType - Given a constraint letter, return the type of
23598/// constraint it is for this target.
23601 if (Constraint.size() == 1) {
23602 switch (Constraint[0]) {
23603 default:
23604 break;
23605 case 'f':
23606 case 'R':
23607 return C_RegisterClass;
23608 case 'I':
23609 case 'J':
23610 case 'K':
23611 return C_Immediate;
23612 case 'A':
23613 return C_Memory;
23614 case 's':
23615 case 'S': // A symbolic address
23616 return C_Other;
23617 }
23618 } else {
23619 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23620 return C_RegisterClass;
23621 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23622 return C_RegisterClass;
23623 }
23624 return TargetLowering::getConstraintType(Constraint);
23625}
23626
23627std::pair<unsigned, const TargetRegisterClass *>
23629 StringRef Constraint,
23630 MVT VT) const {
23631 // First, see if this is a constraint that directly corresponds to a RISC-V
23632 // register class.
23633 if (Constraint.size() == 1) {
23634 switch (Constraint[0]) {
23635 case 'r':
23636 // TODO: Support fixed vectors up to XLen for P extension?
23637 if (VT.isVector())
23638 break;
23639 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23640 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23641 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23642 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23643 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23644 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23645 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23646 case 'f':
23647 if (VT == MVT::f16) {
23648 if (Subtarget.hasStdExtZfhmin())
23649 return std::make_pair(0U, &RISCV::FPR16RegClass);
23650 if (Subtarget.hasStdExtZhinxmin())
23651 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23652 } else if (VT == MVT::f32) {
23653 if (Subtarget.hasStdExtF())
23654 return std::make_pair(0U, &RISCV::FPR32RegClass);
23655 if (Subtarget.hasStdExtZfinx())
23656 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23657 } else if (VT == MVT::f64) {
23658 if (Subtarget.hasStdExtD())
23659 return std::make_pair(0U, &RISCV::FPR64RegClass);
23660 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23661 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23662 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23663 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23664 }
23665 break;
23666 case 'R':
23667 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23668 (VT == MVT::i128 && Subtarget.is64Bit()))
23669 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23670 break;
23671 default:
23672 break;
23673 }
23674 } else if (Constraint == "vr") {
23675 for (const auto *RC :
23676 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23677 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23678 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23679 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23680 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23681 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23682 &RISCV::VRN2M4RegClass}) {
23683 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23684 return std::make_pair(0U, RC);
23685
23686 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23687 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23688 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23689 return std::make_pair(0U, RC);
23690 }
23691 }
23692 } else if (Constraint == "vd") {
23693 for (const auto *RC :
23694 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23695 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23696 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23697 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23698 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23699 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23700 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23701 &RISCV::VRN2M4NoV0RegClass}) {
23702 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23703 return std::make_pair(0U, RC);
23704
23705 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23706 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23707 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23708 return std::make_pair(0U, RC);
23709 }
23710 }
23711 } else if (Constraint == "vm") {
23712 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23713 return std::make_pair(0U, &RISCV::VMV0RegClass);
23714
23715 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23716 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23717 // VT here might be coerced to vector with i8 elements, so we need to
23718 // check if this is a M1 register here instead of checking VMV0RegClass.
23719 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23720 return std::make_pair(0U, &RISCV::VMV0RegClass);
23721 }
23722 } else if (Constraint == "cr") {
23723 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23724 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23725 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23726 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23727 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23728 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23729 if (!VT.isVector())
23730 return std::make_pair(0U, &RISCV::GPRCRegClass);
23731 } else if (Constraint == "cR") {
23732 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23733 (VT == MVT::i128 && Subtarget.is64Bit()))
23734 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23735 } else if (Constraint == "cf") {
23736 if (VT == MVT::f16) {
23737 if (Subtarget.hasStdExtZfhmin())
23738 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23739 if (Subtarget.hasStdExtZhinxmin())
23740 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23741 } else if (VT == MVT::f32) {
23742 if (Subtarget.hasStdExtF())
23743 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23744 if (Subtarget.hasStdExtZfinx())
23745 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23746 } else if (VT == MVT::f64) {
23747 if (Subtarget.hasStdExtD())
23748 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23749 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23750 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23751 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23752 return std::make_pair(0U, &RISCV::GPRCRegClass);
23753 }
23754 }
23755
23756 // Clang will correctly decode the usage of register name aliases into their
23757 // official names. However, other frontends like `rustc` do not. This allows
23758 // users of these frontends to use the ABI names for registers in LLVM-style
23759 // register constraints.
23760 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23761 .Case("{zero}", RISCV::X0)
23762 .Case("{ra}", RISCV::X1)
23763 .Case("{sp}", RISCV::X2)
23764 .Case("{gp}", RISCV::X3)
23765 .Case("{tp}", RISCV::X4)
23766 .Case("{t0}", RISCV::X5)
23767 .Case("{t1}", RISCV::X6)
23768 .Case("{t2}", RISCV::X7)
23769 .Cases("{s0}", "{fp}", RISCV::X8)
23770 .Case("{s1}", RISCV::X9)
23771 .Case("{a0}", RISCV::X10)
23772 .Case("{a1}", RISCV::X11)
23773 .Case("{a2}", RISCV::X12)
23774 .Case("{a3}", RISCV::X13)
23775 .Case("{a4}", RISCV::X14)
23776 .Case("{a5}", RISCV::X15)
23777 .Case("{a6}", RISCV::X16)
23778 .Case("{a7}", RISCV::X17)
23779 .Case("{s2}", RISCV::X18)
23780 .Case("{s3}", RISCV::X19)
23781 .Case("{s4}", RISCV::X20)
23782 .Case("{s5}", RISCV::X21)
23783 .Case("{s6}", RISCV::X22)
23784 .Case("{s7}", RISCV::X23)
23785 .Case("{s8}", RISCV::X24)
23786 .Case("{s9}", RISCV::X25)
23787 .Case("{s10}", RISCV::X26)
23788 .Case("{s11}", RISCV::X27)
23789 .Case("{t3}", RISCV::X28)
23790 .Case("{t4}", RISCV::X29)
23791 .Case("{t5}", RISCV::X30)
23792 .Case("{t6}", RISCV::X31)
23793 .Default(RISCV::NoRegister);
23794 if (XRegFromAlias != RISCV::NoRegister)
23795 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23796
23797 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23798 // TableGen record rather than the AsmName to choose registers for InlineAsm
23799 // constraints, plus we want to match those names to the widest floating point
23800 // register type available, manually select floating point registers here.
23801 //
23802 // The second case is the ABI name of the register, so that frontends can also
23803 // use the ABI names in register constraint lists.
23804 if (Subtarget.hasStdExtF()) {
23805 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23806 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23807 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23808 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23809 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23810 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23811 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23812 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23813 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23814 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23815 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23816 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23817 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23818 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23819 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23820 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23821 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23822 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23823 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23824 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23825 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23826 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23827 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23828 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23829 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23830 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23831 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23832 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23833 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23834 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23835 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23836 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23837 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23838 .Default(RISCV::NoRegister);
23839 if (FReg != RISCV::NoRegister) {
23840 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23841 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23842 unsigned RegNo = FReg - RISCV::F0_F;
23843 unsigned DReg = RISCV::F0_D + RegNo;
23844 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23845 }
23846 if (VT == MVT::f32 || VT == MVT::Other)
23847 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23848 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23849 unsigned RegNo = FReg - RISCV::F0_F;
23850 unsigned HReg = RISCV::F0_H + RegNo;
23851 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23852 }
23853 }
23854 }
23855
23856 if (Subtarget.hasVInstructions()) {
23857 Register VReg = StringSwitch<Register>(Constraint.lower())
23858 .Case("{v0}", RISCV::V0)
23859 .Case("{v1}", RISCV::V1)
23860 .Case("{v2}", RISCV::V2)
23861 .Case("{v3}", RISCV::V3)
23862 .Case("{v4}", RISCV::V4)
23863 .Case("{v5}", RISCV::V5)
23864 .Case("{v6}", RISCV::V6)
23865 .Case("{v7}", RISCV::V7)
23866 .Case("{v8}", RISCV::V8)
23867 .Case("{v9}", RISCV::V9)
23868 .Case("{v10}", RISCV::V10)
23869 .Case("{v11}", RISCV::V11)
23870 .Case("{v12}", RISCV::V12)
23871 .Case("{v13}", RISCV::V13)
23872 .Case("{v14}", RISCV::V14)
23873 .Case("{v15}", RISCV::V15)
23874 .Case("{v16}", RISCV::V16)
23875 .Case("{v17}", RISCV::V17)
23876 .Case("{v18}", RISCV::V18)
23877 .Case("{v19}", RISCV::V19)
23878 .Case("{v20}", RISCV::V20)
23879 .Case("{v21}", RISCV::V21)
23880 .Case("{v22}", RISCV::V22)
23881 .Case("{v23}", RISCV::V23)
23882 .Case("{v24}", RISCV::V24)
23883 .Case("{v25}", RISCV::V25)
23884 .Case("{v26}", RISCV::V26)
23885 .Case("{v27}", RISCV::V27)
23886 .Case("{v28}", RISCV::V28)
23887 .Case("{v29}", RISCV::V29)
23888 .Case("{v30}", RISCV::V30)
23889 .Case("{v31}", RISCV::V31)
23890 .Default(RISCV::NoRegister);
23891 if (VReg != RISCV::NoRegister) {
23892 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23893 return std::make_pair(VReg, &RISCV::VMRegClass);
23894 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23895 return std::make_pair(VReg, &RISCV::VRRegClass);
23896 for (const auto *RC :
23897 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23898 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23899 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23900 return std::make_pair(VReg, RC);
23901 }
23902 }
23903 }
23904 }
23905
23906 std::pair<Register, const TargetRegisterClass *> Res =
23908
23909 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23910 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23911 // Subtarget into account.
23912 if (Res.second == &RISCV::GPRF16RegClass ||
23913 Res.second == &RISCV::GPRF32RegClass ||
23914 Res.second == &RISCV::GPRPairRegClass)
23915 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23916
23917 return Res;
23918}
23919
23922 // Currently only support length 1 constraints.
23923 if (ConstraintCode.size() == 1) {
23924 switch (ConstraintCode[0]) {
23925 case 'A':
23927 default:
23928 break;
23929 }
23930 }
23931
23932 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23933}
23934
23936 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23937 SelectionDAG &DAG) const {
23938 // Currently only support length 1 constraints.
23939 if (Constraint.size() == 1) {
23940 switch (Constraint[0]) {
23941 case 'I':
23942 // Validate & create a 12-bit signed immediate operand.
23943 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23944 uint64_t CVal = C->getSExtValue();
23945 if (isInt<12>(CVal))
23946 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23947 Subtarget.getXLenVT()));
23948 }
23949 return;
23950 case 'J':
23951 // Validate & create an integer zero operand.
23952 if (isNullConstant(Op))
23953 Ops.push_back(
23954 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23955 return;
23956 case 'K':
23957 // Validate & create a 5-bit unsigned immediate operand.
23958 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23959 uint64_t CVal = C->getZExtValue();
23960 if (isUInt<5>(CVal))
23961 Ops.push_back(
23962 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
23963 }
23964 return;
23965 case 'S':
23967 return;
23968 default:
23969 break;
23970 }
23971 }
23973}
23974
23976 Instruction *Inst,
23977 AtomicOrdering Ord) const {
23978 if (Subtarget.hasStdExtZtso()) {
23980 return Builder.CreateFence(Ord);
23981 return nullptr;
23982 }
23983
23985 return Builder.CreateFence(Ord);
23986 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
23987 return Builder.CreateFence(AtomicOrdering::Release);
23988 return nullptr;
23989}
23990
23992 Instruction *Inst,
23993 AtomicOrdering Ord) const {
23994 if (Subtarget.hasStdExtZtso()) {
23996 return Builder.CreateFence(Ord);
23997 return nullptr;
23998 }
23999
24000 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24001 return Builder.CreateFence(AtomicOrdering::Acquire);
24002 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24004 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24005 return nullptr;
24006}
24007
24010 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24011 // point operations can't be used in an lr/sc sequence without breaking the
24012 // forward-progress guarantee.
24013 if (AI->isFloatingPointOperation() ||
24019
24020 // Don't expand forced atomics, we want to have __sync libcalls instead.
24021 if (Subtarget.hasForcedAtomics())
24023
24024 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24025 if (AI->getOperation() == AtomicRMWInst::Nand) {
24026 if (Subtarget.hasStdExtZacas() &&
24027 (Size >= 32 || Subtarget.hasStdExtZabha()))
24029 if (Size < 32)
24031 }
24032
24033 if (Size < 32 && !Subtarget.hasStdExtZabha())
24035
24037}
24038
24039static Intrinsic::ID
24041 switch (BinOp) {
24042 default:
24043 llvm_unreachable("Unexpected AtomicRMW BinOp");
24045 return Intrinsic::riscv_masked_atomicrmw_xchg;
24046 case AtomicRMWInst::Add:
24047 return Intrinsic::riscv_masked_atomicrmw_add;
24048 case AtomicRMWInst::Sub:
24049 return Intrinsic::riscv_masked_atomicrmw_sub;
24051 return Intrinsic::riscv_masked_atomicrmw_nand;
24052 case AtomicRMWInst::Max:
24053 return Intrinsic::riscv_masked_atomicrmw_max;
24054 case AtomicRMWInst::Min:
24055 return Intrinsic::riscv_masked_atomicrmw_min;
24057 return Intrinsic::riscv_masked_atomicrmw_umax;
24059 return Intrinsic::riscv_masked_atomicrmw_umin;
24060 }
24061}
24062
24064 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24065 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24066 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24067 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24068 // mask, as this produces better code than the LR/SC loop emitted by
24069 // int_riscv_masked_atomicrmw_xchg.
24070 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24073 if (CVal->isZero())
24074 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24075 Builder.CreateNot(Mask, "Inv_Mask"),
24076 AI->getAlign(), Ord);
24077 if (CVal->isMinusOne())
24078 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24079 AI->getAlign(), Ord);
24080 }
24081
24082 unsigned XLen = Subtarget.getXLen();
24083 Value *Ordering =
24084 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24085 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24087 AI->getModule(),
24089
24090 if (XLen == 64) {
24091 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24092 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24093 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24094 }
24095
24096 Value *Result;
24097
24098 // Must pass the shift amount needed to sign extend the loaded value prior
24099 // to performing a signed comparison for min/max. ShiftAmt is the number of
24100 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24101 // is the number of bits to left+right shift the value in order to
24102 // sign-extend.
24103 if (AI->getOperation() == AtomicRMWInst::Min ||
24105 const DataLayout &DL = AI->getDataLayout();
24106 unsigned ValWidth =
24107 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24108 Value *SextShamt =
24109 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24110 Result = Builder.CreateCall(LrwOpScwLoop,
24111 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24112 } else {
24113 Result =
24114 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24115 }
24116
24117 if (XLen == 64)
24118 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24119 return Result;
24120}
24121
24124 AtomicCmpXchgInst *CI) const {
24125 // Don't expand forced atomics, we want to have __sync libcalls instead.
24126 if (Subtarget.hasForcedAtomics())
24128
24130 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24131 (Size == 8 || Size == 16))
24134}
24135
24137 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24138 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24139 unsigned XLen = Subtarget.getXLen();
24140 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24141 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24142 if (XLen == 64) {
24143 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24144 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24145 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24146 }
24147 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24148 Value *Result = Builder.CreateIntrinsic(
24149 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24150 if (XLen == 64)
24151 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24152 return Result;
24153}
24154
24156 EVT DataVT) const {
24157 // We have indexed loads for all supported EEW types. Indices are always
24158 // zero extended.
24159 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24160 isTypeLegal(Extend.getValueType()) &&
24161 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24162 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24163}
24164
24166 EVT VT) const {
24167 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24168 return false;
24169
24170 switch (FPVT.getSimpleVT().SimpleTy) {
24171 case MVT::f16:
24172 return Subtarget.hasStdExtZfhmin();
24173 case MVT::f32:
24174 return Subtarget.hasStdExtF();
24175 case MVT::f64:
24176 return Subtarget.hasStdExtD();
24177 default:
24178 return false;
24179 }
24180}
24181
24183 // If we are using the small code model, we can reduce size of jump table
24184 // entry to 4 bytes.
24185 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24188 }
24190}
24191
24193 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24194 unsigned uid, MCContext &Ctx) const {
24195 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24197 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24198}
24199
24201 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24202 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24203 // a power of two as well.
24204 // FIXME: This doesn't work for zve32, but that's already broken
24205 // elsewhere for the same reason.
24206 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24207 static_assert(RISCV::RVVBitsPerBlock == 64,
24208 "RVVBitsPerBlock changed, audit needed");
24209 return true;
24210}
24211
24213 SDValue &Offset,
24215 SelectionDAG &DAG) const {
24216 // Target does not support indexed loads.
24217 if (!Subtarget.hasVendorXTHeadMemIdx())
24218 return false;
24219
24220 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24221 return false;
24222
24223 Base = Op->getOperand(0);
24224 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24225 int64_t RHSC = RHS->getSExtValue();
24226 if (Op->getOpcode() == ISD::SUB)
24227 RHSC = -(uint64_t)RHSC;
24228
24229 // The constants that can be encoded in the THeadMemIdx instructions
24230 // are of the form (sign_extend(imm5) << imm2).
24231 bool isLegalIndexedOffset = false;
24232 for (unsigned i = 0; i < 4; i++)
24233 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24234 isLegalIndexedOffset = true;
24235 break;
24236 }
24237
24238 if (!isLegalIndexedOffset)
24239 return false;
24240
24241 Offset = Op->getOperand(1);
24242 return true;
24243 }
24244
24245 return false;
24246}
24247
24249 SDValue &Offset,
24251 SelectionDAG &DAG) const {
24252 EVT VT;
24253 SDValue Ptr;
24254 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24255 VT = LD->getMemoryVT();
24256 Ptr = LD->getBasePtr();
24257 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24258 VT = ST->getMemoryVT();
24259 Ptr = ST->getBasePtr();
24260 } else
24261 return false;
24262
24263 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24264 return false;
24265
24266 AM = ISD::PRE_INC;
24267 return true;
24268}
24269
24271 SDValue &Base,
24272 SDValue &Offset,
24274 SelectionDAG &DAG) const {
24275 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24276 if (Op->getOpcode() != ISD::ADD)
24277 return false;
24278
24280 Base = LS->getBasePtr();
24281 else
24282 return false;
24283
24284 if (Base == Op->getOperand(0))
24285 Offset = Op->getOperand(1);
24286 else if (Base == Op->getOperand(1))
24287 Offset = Op->getOperand(0);
24288 else
24289 return false;
24290
24291 AM = ISD::POST_INC;
24292 return true;
24293 }
24294
24295 EVT VT;
24296 SDValue Ptr;
24297 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24298 VT = LD->getMemoryVT();
24299 Ptr = LD->getBasePtr();
24300 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24301 VT = ST->getMemoryVT();
24302 Ptr = ST->getBasePtr();
24303 } else
24304 return false;
24305
24306 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24307 return false;
24308 // Post-indexing updates the base, so it's not a valid transform
24309 // if that's not the same as the load's pointer.
24310 if (Ptr != Base)
24311 return false;
24312
24313 AM = ISD::POST_INC;
24314 return true;
24315}
24316
24318 EVT VT) const {
24319 EVT SVT = VT.getScalarType();
24320
24321 if (!SVT.isSimple())
24322 return false;
24323
24324 switch (SVT.getSimpleVT().SimpleTy) {
24325 case MVT::f16:
24326 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24327 : Subtarget.hasStdExtZfhOrZhinx();
24328 case MVT::f32:
24329 return Subtarget.hasStdExtFOrZfinx();
24330 case MVT::f64:
24331 return Subtarget.hasStdExtDOrZdinx();
24332 default:
24333 break;
24334 }
24335
24336 return false;
24337}
24338
24340 // Zacas will use amocas.w which does not require extension.
24341 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24342}
24343
24345 const Constant *PersonalityFn) const {
24346 return RISCV::X10;
24347}
24348
24350 const Constant *PersonalityFn) const {
24351 return RISCV::X11;
24352}
24353
24355 // Return false to suppress the unnecessary extensions if the LibCall
24356 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24357 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24358 Type.getSizeInBits() < Subtarget.getXLen()))
24359 return false;
24360
24361 return true;
24362}
24363
24365 bool IsSigned) const {
24366 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24367 return true;
24368
24369 return IsSigned;
24370}
24371
24373 SDValue C) const {
24374 // Check integral scalar types.
24375 if (!VT.isScalarInteger())
24376 return false;
24377
24378 // Omit the optimization if the sub target has the M extension and the data
24379 // size exceeds XLen.
24380 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24381 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24382 return false;
24383
24384 auto *ConstNode = cast<ConstantSDNode>(C);
24385 const APInt &Imm = ConstNode->getAPIntValue();
24386
24387 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24388 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24389 return false;
24390
24391 // Break the MUL to a SLLI and an ADD/SUB.
24392 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24393 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24394 return true;
24395
24396 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24397 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
24398 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24399 (Imm - 8).isPowerOf2()))
24400 return true;
24401
24402 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24403 // a pair of LUI/ADDI.
24404 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24405 ConstNode->hasOneUse()) {
24406 APInt ImmS = Imm.ashr(Imm.countr_zero());
24407 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24408 (1 - ImmS).isPowerOf2())
24409 return true;
24410 }
24411
24412 return false;
24413}
24414
24416 SDValue ConstNode) const {
24417 // Let the DAGCombiner decide for vectors.
24418 EVT VT = AddNode.getValueType();
24419 if (VT.isVector())
24420 return true;
24421
24422 // Let the DAGCombiner decide for larger types.
24423 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24424 return true;
24425
24426 // It is worse if c1 is simm12 while c1*c2 is not.
24427 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24428 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24429 const APInt &C1 = C1Node->getAPIntValue();
24430 const APInt &C2 = C2Node->getAPIntValue();
24431 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24432 return false;
24433
24434 // Default to true and let the DAGCombiner decide.
24435 return true;
24436}
24437
24439 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24440 unsigned *Fast) const {
24441 if (!VT.isVector()) {
24442 if (Fast)
24443 *Fast = Subtarget.enableUnalignedScalarMem();
24444 return Subtarget.enableUnalignedScalarMem();
24445 }
24446
24447 // All vector implementations must support element alignment
24448 EVT ElemVT = VT.getVectorElementType();
24449 if (Alignment >= ElemVT.getStoreSize()) {
24450 if (Fast)
24451 *Fast = 1;
24452 return true;
24453 }
24454
24455 // Note: We lower an unmasked unaligned vector access to an equally sized
24456 // e8 element type access. Given this, we effectively support all unmasked
24457 // misaligned accesses. TODO: Work through the codegen implications of
24458 // allowing such accesses to be formed, and considered fast.
24459 if (Fast)
24460 *Fast = Subtarget.enableUnalignedVectorMem();
24461 return Subtarget.enableUnalignedVectorMem();
24462}
24463
24465 LLVMContext &Context, const MemOp &Op,
24466 const AttributeList &FuncAttributes) const {
24467 if (!Subtarget.hasVInstructions())
24468 return MVT::Other;
24469
24470 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24471 return MVT::Other;
24472
24473 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24474 // has an expansion threshold, and we want the number of hardware memory
24475 // operations to correspond roughly to that threshold. LMUL>1 operations
24476 // are typically expanded linearly internally, and thus correspond to more
24477 // than one actual memory operation. Note that store merging and load
24478 // combining will typically form larger LMUL operations from the LMUL1
24479 // operations emitted here, and that's okay because combining isn't
24480 // introducing new memory operations; it's just merging existing ones.
24481 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24482 const unsigned MinVLenInBytes =
24483 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24484
24485 if (Op.size() < MinVLenInBytes)
24486 // TODO: Figure out short memops. For the moment, do the default thing
24487 // which ends up using scalar sequences.
24488 return MVT::Other;
24489
24490 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24491 // fixed vectors.
24492 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24493 return MVT::Other;
24494
24495 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24496 // a large scalar constant and instead use vmv.v.x/i to do the
24497 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24498 // maximize the chance we can encode the size in the vsetvli.
24499 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24500 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24501
24502 // Do we have sufficient alignment for our preferred VT? If not, revert
24503 // to largest size allowed by our alignment criteria.
24504 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24505 Align RequiredAlign(PreferredVT.getStoreSize());
24506 if (Op.isFixedDstAlign())
24507 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24508 if (Op.isMemcpy())
24509 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24510 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24511 }
24512 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24513}
24514
24516 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24517 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24518 bool IsABIRegCopy = CC.has_value();
24519 EVT ValueVT = Val.getValueType();
24520
24521 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24522 if ((ValueVT == PairVT ||
24523 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24524 ValueVT == MVT::f64)) &&
24525 NumParts == 1 && PartVT == MVT::Untyped) {
24526 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24527 MVT XLenVT = Subtarget.getXLenVT();
24528 if (ValueVT == MVT::f64)
24529 Val = DAG.getBitcast(MVT::i64, Val);
24530 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24531 // Always creating an MVT::Untyped part, so always use
24532 // RISCVISD::BuildGPRPair.
24533 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24534 return true;
24535 }
24536
24537 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24538 PartVT == MVT::f32) {
24539 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24540 // nan, and cast to f32.
24541 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24542 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24543 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24544 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24545 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24546 Parts[0] = Val;
24547 return true;
24548 }
24549
24550 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24551#ifndef NDEBUG
24552 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24553 [[maybe_unused]] unsigned ValLMUL =
24555 ValNF * RISCV::RVVBitsPerBlock);
24556 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24557 [[maybe_unused]] unsigned PartLMUL =
24559 PartNF * RISCV::RVVBitsPerBlock);
24560 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24561 "RISC-V vector tuple type only accepts same register class type "
24562 "TUPLE_INSERT");
24563#endif
24564
24565 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24566 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24567 Parts[0] = Val;
24568 return true;
24569 }
24570
24571 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24572 PartVT.isScalableVector()) {
24573 if (ValueVT.isFixedLengthVector()) {
24574 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24575 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24576 }
24577 LLVMContext &Context = *DAG.getContext();
24578 EVT ValueEltVT = ValueVT.getVectorElementType();
24579 EVT PartEltVT = PartVT.getVectorElementType();
24580 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24581 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24582 if (PartVTBitSize % ValueVTBitSize == 0) {
24583 assert(PartVTBitSize >= ValueVTBitSize);
24584 // If the element types are different, bitcast to the same element type of
24585 // PartVT first.
24586 // Give an example here, we want copy a <vscale x 1 x i8> value to
24587 // <vscale x 4 x i16>.
24588 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24589 // subvector, then we can bitcast to <vscale x 4 x i16>.
24590 if (ValueEltVT != PartEltVT) {
24591 if (PartVTBitSize > ValueVTBitSize) {
24592 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24593 assert(Count != 0 && "The number of element should not be zero.");
24594 EVT SameEltTypeVT =
24595 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24596 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24597 }
24598 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24599 } else {
24600 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24601 }
24602 Parts[0] = Val;
24603 return true;
24604 }
24605 }
24606
24607 return false;
24608}
24609
24611 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24612 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24613 bool IsABIRegCopy = CC.has_value();
24614
24615 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24616 if ((ValueVT == PairVT ||
24617 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24618 ValueVT == MVT::f64)) &&
24619 NumParts == 1 && PartVT == MVT::Untyped) {
24620 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24621 MVT XLenVT = Subtarget.getXLenVT();
24622
24623 SDValue Val = Parts[0];
24624 // Always starting with an MVT::Untyped part, so always use
24625 // RISCVISD::SplitGPRPair
24626 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24627 Val);
24628 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24629 Val.getValue(1));
24630 if (ValueVT == MVT::f64)
24631 Val = DAG.getBitcast(ValueVT, Val);
24632 return Val;
24633 }
24634
24635 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24636 PartVT == MVT::f32) {
24637 SDValue Val = Parts[0];
24638
24639 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24640 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24641 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24642 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24643 return Val;
24644 }
24645
24646 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24647 PartVT.isScalableVector()) {
24648 LLVMContext &Context = *DAG.getContext();
24649 SDValue Val = Parts[0];
24650 EVT ValueEltVT = ValueVT.getVectorElementType();
24651 EVT PartEltVT = PartVT.getVectorElementType();
24652 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24653 if (ValueVT.isFixedLengthVector())
24654 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24655 .getSizeInBits()
24657 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24658 if (PartVTBitSize % ValueVTBitSize == 0) {
24659 assert(PartVTBitSize >= ValueVTBitSize);
24660 EVT SameEltTypeVT = ValueVT;
24661 // If the element types are different, convert it to the same element type
24662 // of PartVT.
24663 // Give an example here, we want copy a <vscale x 1 x i8> value from
24664 // <vscale x 4 x i16>.
24665 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24666 // then we can extract <vscale x 1 x i8>.
24667 if (ValueEltVT != PartEltVT) {
24668 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24669 assert(Count != 0 && "The number of element should not be zero.");
24670 SameEltTypeVT =
24671 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24672 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24673 }
24674 if (ValueVT.isFixedLengthVector())
24675 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24676 else
24677 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24678 return Val;
24679 }
24680 }
24681 return SDValue();
24682}
24683
24684bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24685 // When aggressively optimizing for code size, we prefer to use a div
24686 // instruction, as it is usually smaller than the alternative sequence.
24687 // TODO: Add vector division?
24688 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24689 return OptSize && !VT.isVector();
24690}
24691
24693 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24694 // some situation.
24695 unsigned Opc = N->getOpcode();
24697 return false;
24698 return true;
24699}
24700
24701static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24702 Module *M = IRB.GetInsertBlock()->getModule();
24703 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24704 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24705 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24706 IRB.CreateCall(ThreadPointerFunc), Offset);
24707}
24708
24710 // Fuchsia provides a fixed TLS slot for the stack cookie.
24711 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24712 if (Subtarget.isTargetFuchsia())
24713 return useTpOffset(IRB, -0x10);
24714
24715 // Android provides a fixed TLS slot for the stack cookie. See the definition
24716 // of TLS_SLOT_STACK_GUARD in
24717 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24718 if (Subtarget.isTargetAndroid())
24719 return useTpOffset(IRB, -0x18);
24720
24721 Module *M = IRB.GetInsertBlock()->getModule();
24722
24723 if (M->getStackProtectorGuard() == "tls") {
24724 // Users must specify the offset explicitly
24725 int Offset = M->getStackProtectorGuardOffset();
24726 return useTpOffset(IRB, Offset);
24727 }
24728
24730}
24731
24733 Align Alignment) const {
24734 if (!Subtarget.hasVInstructions())
24735 return false;
24736
24737 // Only support fixed vectors if we know the minimum vector size.
24738 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24739 return false;
24740
24741 EVT ScalarType = DataType.getScalarType();
24742 if (!isLegalElementTypeForRVV(ScalarType))
24743 return false;
24744
24745 if (!Subtarget.enableUnalignedVectorMem() &&
24746 Alignment < ScalarType.getStoreSize())
24747 return false;
24748
24749 return true;
24750}
24751
24755 const TargetInstrInfo *TII) const {
24756 assert(MBBI->isCall() && MBBI->getCFIType() &&
24757 "Invalid call instruction for a KCFI check");
24758 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24759 MBBI->getOpcode()));
24760
24761 MachineOperand &Target = MBBI->getOperand(0);
24762 Target.setIsRenamable(false);
24763
24764 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24765 .addReg(Target.getReg())
24766 .addImm(MBBI->getCFIType())
24767 .getInstr();
24768}
24769
24770#define GET_REGISTER_MATCHER
24771#include "RISCVGenAsmMatcher.inc"
24772
24775 const MachineFunction &MF) const {
24777 if (!Reg)
24779 if (!Reg)
24780 return Reg;
24781
24782 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24783 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24784 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24785 StringRef(RegName) + "\"."));
24786 return Reg;
24787}
24788
24791 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24792
24793 if (NontemporalInfo == nullptr)
24795
24796 // 1 for default value work as __RISCV_NTLH_ALL
24797 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24798 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24799 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24800 // 5 -> __RISCV_NTLH_ALL
24801 int NontemporalLevel = 5;
24802 const MDNode *RISCVNontemporalInfo =
24803 I.getMetadata("riscv-nontemporal-domain");
24804 if (RISCVNontemporalInfo != nullptr)
24805 NontemporalLevel =
24807 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24808 ->getValue())
24809 ->getZExtValue();
24810
24811 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24812 "RISC-V target doesn't support this non-temporal domain.");
24813
24814 NontemporalLevel -= 2;
24816 if (NontemporalLevel & 0b1)
24817 Flags |= MONontemporalBit0;
24818 if (NontemporalLevel & 0b10)
24819 Flags |= MONontemporalBit1;
24820
24821 return Flags;
24822}
24823
24826
24827 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24829 TargetFlags |= (NodeFlags & MONontemporalBit0);
24830 TargetFlags |= (NodeFlags & MONontemporalBit1);
24831 return TargetFlags;
24832}
24833
24835 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24836 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24837}
24838
24840 if (VT.isScalableVector())
24841 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
24842 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
24843 return true;
24844 return Subtarget.hasStdExtZbb() &&
24845 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
24846}
24847
24849 ISD::CondCode Cond) const {
24850 return isCtpopFast(VT) ? 0 : 1;
24851}
24852
24854 const Instruction *I) const {
24855 if (Subtarget.hasStdExtZalasr()) {
24856 if (Subtarget.hasStdExtZtso()) {
24857 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24858 // should be lowered to plain load/store. The easiest way to do this is
24859 // to say we should insert fences for them, and the fence insertion code
24860 // will just not insert any fences
24861 auto *LI = dyn_cast<LoadInst>(I);
24862 auto *SI = dyn_cast<StoreInst>(I);
24863 if ((LI &&
24864 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24865 (SI &&
24866 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24867 // Here, this is a load or store which is seq_cst, and needs a .aq or
24868 // .rl therefore we shouldn't try to insert fences
24869 return false;
24870 }
24871 // Here, we are a TSO inst that isn't a seq_cst load/store
24872 return isa<LoadInst>(I) || isa<StoreInst>(I);
24873 }
24874 return false;
24875 }
24876 // Note that one specific case requires fence insertion for an
24877 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24878 // than this hook due to limitations in the interface here.
24879 return isa<LoadInst>(I) || isa<StoreInst>(I);
24880}
24881
24883
24884 // GISel support is in progress or complete for these opcodes.
24885 unsigned Op = Inst.getOpcode();
24886 if (Op == Instruction::Add || Op == Instruction::Sub ||
24887 Op == Instruction::And || Op == Instruction::Or ||
24888 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24889 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24890 Op == Instruction::Freeze || Op == Instruction::Store)
24891 return false;
24892
24893 if (Inst.getType()->isScalableTy())
24894 return true;
24895
24896 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24897 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24898 !isa<ReturnInst>(&Inst))
24899 return true;
24900
24901 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24902 if (AI->getAllocatedType()->isScalableTy())
24903 return true;
24904 }
24905
24906 return false;
24907}
24908
24909SDValue
24910RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24911 SelectionDAG &DAG,
24912 SmallVectorImpl<SDNode *> &Created) const {
24913 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
24914 if (isIntDivCheap(N->getValueType(0), Attr))
24915 return SDValue(N, 0); // Lower SDIV as SDIV
24916
24917 // Only perform this transform if short forward branch opt is supported.
24918 if (!Subtarget.hasShortForwardBranchOpt())
24919 return SDValue();
24920 EVT VT = N->getValueType(0);
24921 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24922 return SDValue();
24923
24924 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24925 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24926 return SDValue();
24927 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24928}
24929
24930bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24931 EVT VT, const APInt &AndMask) const {
24932 if (Subtarget.hasCZEROLike())
24933 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
24935}
24936
24938 return Subtarget.getMinimumJumpTableEntries();
24939}
24940
24942 SDValue Value, SDValue Addr,
24943 int JTI,
24944 SelectionDAG &DAG) const {
24945 if (Subtarget.hasStdExtZicfilp()) {
24946 // When Zicfilp enabled, we need to use software guarded branch for jump
24947 // table branch.
24948 SDValue Chain = Value;
24949 // Jump table debug info is only needed if CodeView is enabled.
24951 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
24952 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
24953 }
24954 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24955}
24956
24957// If an output pattern produces multiple instructions tablegen may pick an
24958// arbitrary type from an instructions destination register class to use for the
24959// VT of that MachineSDNode. This VT may be used to look up the representative
24960// register class. If the type isn't legal, the default implementation will
24961// not find a register class.
24962//
24963// Some integer types smaller than XLen are listed in the GPR register class to
24964// support isel patterns for GISel, but are not legal in SelectionDAG. The
24965// arbitrary type tablegen picks may be one of these smaller types.
24966//
24967// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24968// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24969std::pair<const TargetRegisterClass *, uint8_t>
24970RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24971 MVT VT) const {
24972 switch (VT.SimpleTy) {
24973 default:
24974 break;
24975 case MVT::i8:
24976 case MVT::i16:
24977 case MVT::i32:
24979 case MVT::bf16:
24980 case MVT::f16:
24982 }
24983
24985}
24986
24988
24989#define GET_RISCVVIntrinsicsTable_IMPL
24990#include "RISCVGenSearchableTables.inc"
24991
24992} // namespace llvm::RISCVVIntrinsicsTable
24993
24995
24996 // If the function specifically requests inline stack probes, emit them.
24997 if (MF.getFunction().hasFnAttribute("probe-stack"))
24998 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
24999 "inline-asm";
25000
25001 return false;
25002}
25003
25005 Align StackAlign) const {
25006 // The default stack probe size is 4096 if the function has no
25007 // stack-probe-size attribute.
25008 const Function &Fn = MF.getFunction();
25009 unsigned StackProbeSize =
25010 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25011 // Round down to the stack alignment.
25012 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25013 return StackProbeSize ? StackProbeSize : StackAlign.value();
25014}
25015
25016SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25017 SelectionDAG &DAG) const {
25019 if (!hasInlineStackProbe(MF))
25020 return SDValue();
25021
25022 MVT XLenVT = Subtarget.getXLenVT();
25023 // Get the inputs.
25024 SDValue Chain = Op.getOperand(0);
25025 SDValue Size = Op.getOperand(1);
25026
25028 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25029 SDLoc dl(Op);
25030 EVT VT = Op.getValueType();
25031
25032 // Construct the new SP value in a GPR.
25033 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25034 Chain = SP.getValue(1);
25035 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25036 if (Align)
25037 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25038 DAG.getSignedConstant(-Align->value(), dl, VT));
25039
25040 // Set the real SP to the new value with a probing loop.
25041 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25042 return DAG.getMergeValues({SP, Chain}, dl);
25043}
25044
25047 MachineBasicBlock *MBB) const {
25048 MachineFunction &MF = *MBB->getParent();
25049 MachineBasicBlock::iterator MBBI = MI.getIterator();
25050 DebugLoc DL = MBB->findDebugLoc(MBBI);
25051 Register TargetReg = MI.getOperand(0).getReg();
25052
25053 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25054 bool IsRV64 = Subtarget.is64Bit();
25055 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25056 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25057 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25058
25059 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25060 MachineBasicBlock *LoopTestMBB =
25061 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25062 MF.insert(MBBInsertPoint, LoopTestMBB);
25063 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25064 MF.insert(MBBInsertPoint, ExitMBB);
25065 Register SPReg = RISCV::X2;
25066 Register ScratchReg =
25067 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25068
25069 // ScratchReg = ProbeSize
25070 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25071
25072 // LoopTest:
25073 // SUB SP, SP, ProbeSize
25074 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25075 .addReg(SPReg)
25076 .addReg(ScratchReg);
25077
25078 // s[d|w] zero, 0(sp)
25079 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25080 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25081 .addReg(RISCV::X0)
25082 .addReg(SPReg)
25083 .addImm(0);
25084
25085 // BLT TargetReg, SP, LoopTest
25086 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25087 .addReg(TargetReg)
25088 .addReg(SPReg)
25089 .addMBB(LoopTestMBB);
25090
25091 // Adjust with: MV SP, TargetReg.
25092 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25093 .addReg(TargetReg)
25094 .addImm(0);
25095
25096 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25098
25099 LoopTestMBB->addSuccessor(ExitMBB);
25100 LoopTestMBB->addSuccessor(LoopTestMBB);
25101 MBB->addSuccessor(LoopTestMBB);
25102
25103 MI.eraseFromParent();
25104 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25105 return ExitMBB->begin()->getParent();
25106}
25107
25109 if (Subtarget.hasStdExtFOrZfinx()) {
25110 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25111 return RCRegs;
25112 }
25113 return {};
25114}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:261
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1936
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:134
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:429
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:270
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:551
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1587
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:390
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1970
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:337
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:399
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1963
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:577
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:376
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:287
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:267
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:273
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...