LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/IntrinsicsRISCV.h"
45#include "llvm/Support/Debug.h"
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "riscv-lower"
56
57STATISTIC(NumTailCalls, "Number of tail calls");
58
60 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
61 cl::desc("Give the maximum size (in number of nodes) of the web of "
62 "instructions that we will consider for VW expansion"),
63 cl::init(18));
64
65static cl::opt<bool>
66 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
67 cl::desc("Allow the formation of VW_W operations (e.g., "
68 "VWADD_W) with splat constants"),
69 cl::init(false));
70
72 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
73 cl::desc("Set the minimum number of repetitions of a divisor to allow "
74 "transformation to multiplications by the reciprocal"),
75 cl::init(2));
76
77static cl::opt<int>
79 cl::desc("Give the maximum number of instructions that we will "
80 "use for creating a floating-point immediate value"),
81 cl::init(2));
82
83static cl::opt<bool>
84 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
85 cl::desc("Swap add and addi in cases where the add may "
86 "be combined with a shift"),
87 cl::init(true));
88
90 const RISCVSubtarget &STI)
91 : TargetLowering(TM), Subtarget(STI) {
92
93 RISCVABI::ABI ABI = Subtarget.getTargetABI();
94 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
95
96 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
97 !Subtarget.hasStdExtF()) {
98 errs() << "Hard-float 'f' ABI can't be used for a target that "
99 "doesn't support the F instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
103 !Subtarget.hasStdExtD()) {
104 errs() << "Hard-float 'd' ABI can't be used for a target that "
105 "doesn't support the D instruction set extension (ignoring "
106 "target-abi)\n";
107 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
108 }
109
110 switch (ABI) {
111 default:
112 reportFatalUsageError("Don't know how to lower this ABI");
121 break;
122 }
123
124 MVT XLenVT = Subtarget.getXLenVT();
125
126 // Set up the register classes.
127 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
128
129 if (Subtarget.hasStdExtZfhmin())
130 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
131 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
132 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
133 if (Subtarget.hasStdExtF())
134 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
135 if (Subtarget.hasStdExtD())
136 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
137 if (Subtarget.hasStdExtZhinxmin())
138 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
139 if (Subtarget.hasStdExtZfinx())
140 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
141 if (Subtarget.hasStdExtZdinx()) {
142 if (Subtarget.is64Bit())
143 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
144 else
145 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
146 }
147
148 static const MVT::SimpleValueType BoolVecVTs[] = {
149 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
150 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
151 static const MVT::SimpleValueType IntVecVTs[] = {
152 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
153 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
154 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
155 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
156 MVT::nxv4i64, MVT::nxv8i64};
157 static const MVT::SimpleValueType F16VecVTs[] = {
158 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
159 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
160 static const MVT::SimpleValueType BF16VecVTs[] = {
161 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
162 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
163 static const MVT::SimpleValueType F32VecVTs[] = {
164 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
165 static const MVT::SimpleValueType F64VecVTs[] = {
166 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
167 static const MVT::SimpleValueType VecTupleVTs[] = {
168 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
169 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
170 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
171 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
172 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
173 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
174 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
175 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
176 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
177 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
178 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
179
180 if (Subtarget.hasVInstructions()) {
181 auto addRegClassForRVV = [this](MVT VT) {
182 // Disable the smallest fractional LMUL types if ELEN is less than
183 // RVVBitsPerBlock.
184 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
185 if (VT.getVectorMinNumElements() < MinElts)
186 return;
187
188 unsigned Size = VT.getSizeInBits().getKnownMinValue();
189 const TargetRegisterClass *RC;
191 RC = &RISCV::VRRegClass;
192 else if (Size == 2 * RISCV::RVVBitsPerBlock)
193 RC = &RISCV::VRM2RegClass;
194 else if (Size == 4 * RISCV::RVVBitsPerBlock)
195 RC = &RISCV::VRM4RegClass;
196 else if (Size == 8 * RISCV::RVVBitsPerBlock)
197 RC = &RISCV::VRM8RegClass;
198 else
199 llvm_unreachable("Unexpected size");
200
201 addRegisterClass(VT, RC);
202 };
203
204 for (MVT VT : BoolVecVTs)
205 addRegClassForRVV(VT);
206 for (MVT VT : IntVecVTs) {
207 if (VT.getVectorElementType() == MVT::i64 &&
208 !Subtarget.hasVInstructionsI64())
209 continue;
210 addRegClassForRVV(VT);
211 }
212
213 if (Subtarget.hasVInstructionsF16Minimal() ||
214 Subtarget.hasVendorXAndesVPackFPH())
215 for (MVT VT : F16VecVTs)
216 addRegClassForRVV(VT);
217
218 if (Subtarget.hasVInstructionsBF16Minimal() ||
219 Subtarget.hasVendorXAndesVBFHCvt())
220 for (MVT VT : BF16VecVTs)
221 addRegClassForRVV(VT);
222
223 if (Subtarget.hasVInstructionsF32())
224 for (MVT VT : F32VecVTs)
225 addRegClassForRVV(VT);
226
227 if (Subtarget.hasVInstructionsF64())
228 for (MVT VT : F64VecVTs)
229 addRegClassForRVV(VT);
230
231 if (Subtarget.useRVVForFixedLengthVectors()) {
232 auto addRegClassForFixedVectors = [this](MVT VT) {
233 MVT ContainerVT = getContainerForFixedLengthVector(VT);
234 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
235 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
236 addRegisterClass(VT, TRI.getRegClass(RCID));
237 };
239 if (useRVVForFixedLengthVectorVT(VT))
240 addRegClassForFixedVectors(VT);
241
243 if (useRVVForFixedLengthVectorVT(VT))
244 addRegClassForFixedVectors(VT);
245 }
246
247 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
248 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
254 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
261 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
268 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
275 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
278 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
279 }
280
281 // Compute derived properties from the register classes.
283
285
287 MVT::i1, Promote);
288 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
290 MVT::i1, Promote);
291
292 // TODO: add all necessary setOperationAction calls.
294
299
304 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
307 }
308
310
313
314 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
315 !Subtarget.hasVendorXAndesPerf())
317
319
320 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
321 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
322 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
323 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
324
325 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
328 }
329
330 if (Subtarget.is64Bit()) {
332
335 MVT::i32, Custom);
337 if (!Subtarget.hasStdExtZbb())
340 Custom);
342 }
343 if (!Subtarget.hasStdExtZmmul()) {
345 } else if (Subtarget.is64Bit()) {
348 } else {
350 }
351
352 if (!Subtarget.hasStdExtM()) {
354 Expand);
355 } else if (Subtarget.is64Bit()) {
357 {MVT::i8, MVT::i16, MVT::i32}, Custom);
358 }
359
362 Expand);
363
365 Custom);
366
367 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
368 if (Subtarget.is64Bit())
370 } else if (Subtarget.hasVendorXTHeadBb()) {
371 if (Subtarget.is64Bit())
374 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
376 } else {
378 }
379
380 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
381 // pattern match it directly in isel.
383 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
384 Subtarget.hasVendorXTHeadBb())
385 ? Legal
386 : Expand);
387
388 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
389 !Subtarget.is64Bit()) {
391 } else {
392 // Zbkb can use rev8+brev8 to implement bitreverse.
394 Subtarget.hasStdExtZbkb() ? Custom : Expand);
395 if (Subtarget.hasStdExtZbkb())
397 }
398
399 if (Subtarget.hasStdExtZbb() ||
400 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
402 Legal);
403 }
404
405 if (Subtarget.hasStdExtZbb() ||
406 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
407 if (Subtarget.is64Bit())
409 } else {
411 // TODO: These should be set to LibCall, but this currently breaks
412 // the Linux kernel build. See #101786. Lacks i128 tests, too.
413 if (Subtarget.is64Bit())
415 else
418 }
419
420 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
421 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
422 // We need the custom lowering to make sure that the resulting sequence
423 // for the 32bit case is efficient on 64bit targets.
424 if (Subtarget.is64Bit())
426 } else {
428 }
429
430 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
432 } else if (Subtarget.hasShortForwardBranchOpt()) {
433 // We can use PseudoCCSUB to implement ABS.
435 } else if (Subtarget.is64Bit()) {
437 }
438
439 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
440 !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
442
443 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
450 }
451
452 static const unsigned FPLegalNodeTypes[] = {
460
461 static const ISD::CondCode FPCCToExpand[] = {
465
466 static const unsigned FPOpToExpand[] = {
468 ISD::FREM};
469
470 static const unsigned FPRndMode[] = {
473
474 static const unsigned ZfhminZfbfminPromoteOps[] = {
484
485 if (Subtarget.hasStdExtZfbfmin()) {
491 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
498 }
499
500 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
501 if (Subtarget.hasStdExtZfhOrZhinx()) {
502 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
503 setOperationAction(FPRndMode, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
507 Subtarget.hasStdExtZfa() ? Legal : Custom);
508 if (Subtarget.hasStdExtZfa())
510 } else {
511 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
516 setOperationAction(Op, MVT::f16, Custom);
522 }
523
525
528 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
532
534 ISD::FNEARBYINT, MVT::f16,
535 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
540 MVT::f16, Promote);
541
542 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
543 // complete support for all operations in LegalizeDAG.
548 MVT::f16, Promote);
549
550 // We need to custom promote this.
551 if (Subtarget.is64Bit())
553 }
554
555 if (Subtarget.hasStdExtFOrZfinx()) {
556 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
557 setOperationAction(FPRndMode, MVT::f32,
558 Subtarget.hasStdExtZfa() ? Legal : Custom);
559 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
563 setOperationAction(FPOpToExpand, MVT::f32, Expand);
564 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
565 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
566 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
567 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
571 Subtarget.isSoftFPABI() ? LibCall : Custom);
576
577 if (Subtarget.hasStdExtZfa()) {
581 } else {
583 }
584 }
585
586 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
588
589 if (Subtarget.hasStdExtDOrZdinx()) {
590 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
591
592 if (!Subtarget.is64Bit())
594
595 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
596 !Subtarget.is64Bit()) {
599 }
600
601 if (Subtarget.hasStdExtZfa()) {
603 setOperationAction(FPRndMode, MVT::f64, Legal);
606 } else {
607 if (Subtarget.is64Bit())
608 setOperationAction(FPRndMode, MVT::f64, Custom);
609
611 }
612
615 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
621 setOperationAction(FPOpToExpand, MVT::f64, Expand);
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
623 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
624 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
625 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
629 Subtarget.isSoftFPABI() ? LibCall : Custom);
634 }
635
636 if (Subtarget.is64Bit()) {
639 MVT::i32, Custom);
641 }
642
643 if (Subtarget.hasStdExtFOrZfinx()) {
645 Custom);
646
647 // f16/bf16 require custom handling.
649 Custom);
651 Custom);
652
661 }
662
665 XLenVT, Custom);
666
668
669 if (Subtarget.is64Bit())
671
672 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
673 // Unfortunately this can't be determined just from the ISA naming string.
675 Subtarget.is64Bit() ? Legal : Custom);
677 Subtarget.is64Bit() ? Legal : Custom);
678
679 if (Subtarget.is64Bit()) {
682 }
683
686 if (Subtarget.is64Bit())
688
689 if (Subtarget.hasVendorXMIPSCBOP())
691 else if (Subtarget.hasStdExtZicbop())
693
694 if (Subtarget.hasStdExtA()) {
696 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
698 else
700 } else if (Subtarget.hasForcedAtomics()) {
702 } else {
704 }
705
707
709
710 if (getTargetMachine().getTargetTriple().isOSLinux()) {
711 // Custom lowering of llvm.clear_cache.
713 }
714
715 if (Subtarget.hasVInstructions()) {
717
719
720 // RVV intrinsics may have illegal operands.
721 // We also need to custom legalize vmv.x.s.
724 {MVT::i8, MVT::i16}, Custom);
725 if (Subtarget.is64Bit())
727 MVT::i32, Custom);
728 else
730 MVT::i64, Custom);
731
733 MVT::Other, Custom);
734
735 static const unsigned IntegerVPOps[] = {
736 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
737 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
738 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
739 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
740 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
741 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
742 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
743 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
744 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
745 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
746 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
747 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
748 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
749 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
750 ISD::EXPERIMENTAL_VP_SPLAT};
751
752 static const unsigned FloatingPointVPOps[] = {
753 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
754 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
755 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
756 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
757 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
758 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
759 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
760 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
761 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
762 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
763 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
764 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
765 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
766
767 static const unsigned IntegerVecReduceOps[] = {
771
772 static const unsigned FloatingPointVecReduceOps[] = {
775
776 static const unsigned FloatingPointLibCallOps[] = {
779
780 if (!Subtarget.is64Bit()) {
781 // We must custom-lower certain vXi64 operations on RV32 due to the vector
782 // element type being illegal.
784 MVT::i64, Custom);
785
786 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
787
788 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
789 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
790 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
791 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
792 MVT::i64, Custom);
793 }
794
795 for (MVT VT : BoolVecVTs) {
796 if (!isTypeLegal(VT))
797 continue;
798
800
801 // Mask VTs are custom-expanded into a series of standard nodes
805 VT, Custom);
806
808 Custom);
809
811 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
812 Expand);
813 setOperationAction(ISD::VP_MERGE, VT, Custom);
814
815 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
816 Custom);
817
818 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
819
822 Custom);
823
825 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
826 Custom);
827
828 // RVV has native int->float & float->int conversions where the
829 // element type sizes are within one power-of-two of each other. Any
830 // wider distances between type sizes have to be lowered as sequences
831 // which progressively narrow the gap in stages.
836 VT, Custom);
838 Custom);
839
840 // Expand all extending loads to types larger than this, and truncating
841 // stores from types larger than this.
843 setTruncStoreAction(VT, OtherVT, Expand);
845 OtherVT, Expand);
846 }
847
848 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
849 ISD::VP_TRUNCATE, ISD::VP_SETCC},
850 VT, Custom);
851
854
856
857 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
858 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
859 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
860
863 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
864 }
865
866 for (MVT VT : IntVecVTs) {
867 if (!isTypeLegal(VT))
868 continue;
869
872
873 // Vectors implement MULHS/MULHU.
875
876 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
877 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
879
881 Legal);
882
884
885 // Custom-lower extensions and truncations from/to mask types.
887 VT, Custom);
888
889 // RVV has native int->float & float->int conversions where the
890 // element type sizes are within one power-of-two of each other. Any
891 // wider distances between type sizes have to be lowered as sequences
892 // which progressively narrow the gap in stages.
897 VT, Custom);
899 Custom);
903 VT, Legal);
904
905 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
906 // nodes which truncate by one power of two at a time.
909 Custom);
910
911 // Custom-lower insert/extract operations to simplify patterns.
913 Custom);
914
915 // Custom-lower reduction operations to set up the corresponding custom
916 // nodes' operands.
917 setOperationAction(IntegerVecReduceOps, VT, Custom);
918
919 setOperationAction(IntegerVPOps, VT, Custom);
920
922
924 VT, Custom);
925
927 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
928 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
929 VT, Custom);
930 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
931
934 VT, Custom);
935
938
940
942 setTruncStoreAction(VT, OtherVT, Expand);
944 OtherVT, Expand);
945 }
946
949
950 // Splice
952
953 if (Subtarget.hasStdExtZvkb()) {
955 setOperationAction(ISD::VP_BSWAP, VT, Custom);
956 } else {
957 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
959 }
960
961 if (Subtarget.hasStdExtZvbb()) {
963 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
964 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
965 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
966 VT, Custom);
967 } else {
968 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
970 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
971 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
972 VT, Expand);
973
974 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
975 // range of f32.
976 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
977 if (isTypeLegal(FloatVT)) {
979 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
980 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
981 VT, Custom);
982 }
983 }
984
986 }
987
988 for (MVT VT : VecTupleVTs) {
989 if (!isTypeLegal(VT))
990 continue;
991
993 }
994
995 // Expand various CCs to best match the RVV ISA, which natively supports UNE
996 // but no other unordered comparisons, and supports all ordered comparisons
997 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
998 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
999 // and we pattern-match those back to the "original", swapping operands once
1000 // more. This way we catch both operations and both "vf" and "fv" forms with
1001 // fewer patterns.
1002 static const ISD::CondCode VFPCCToExpand[] = {
1006 };
1007
1008 // TODO: support more ops.
1009 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1014 ISD::FADD,
1015 ISD::FSUB,
1016 ISD::FMUL,
1017 ISD::FMA,
1018 ISD::FDIV,
1019 ISD::FSQRT,
1020 ISD::FCEIL,
1025 ISD::FRINT,
1028 ISD::SETCC,
1041
1042 // TODO: support more vp ops.
1043 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1044 ISD::VP_FADD,
1045 ISD::VP_FSUB,
1046 ISD::VP_FMUL,
1047 ISD::VP_FDIV,
1048 ISD::VP_FMA,
1049 ISD::VP_REDUCE_FMIN,
1050 ISD::VP_REDUCE_FMAX,
1051 ISD::VP_SQRT,
1052 ISD::VP_FMINNUM,
1053 ISD::VP_FMAXNUM,
1054 ISD::VP_FCEIL,
1055 ISD::VP_FFLOOR,
1056 ISD::VP_FROUND,
1057 ISD::VP_FROUNDEVEN,
1058 ISD::VP_FROUNDTOZERO,
1059 ISD::VP_FRINT,
1060 ISD::VP_FNEARBYINT,
1061 ISD::VP_SETCC,
1062 ISD::VP_FMINIMUM,
1063 ISD::VP_FMAXIMUM,
1064 ISD::VP_REDUCE_FMINIMUM,
1065 ISD::VP_REDUCE_FMAXIMUM};
1066
1067 // Sets common operation actions on RVV floating-point vector types.
1068 const auto SetCommonVFPActions = [&](MVT VT) {
1070 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1071 // sizes are within one power-of-two of each other. Therefore conversions
1072 // between vXf16 and vXf64 must be lowered as sequences which convert via
1073 // vXf32.
1077 // Custom-lower insert/extract operations to simplify patterns.
1079 Custom);
1080 // Expand various condition codes (explained above).
1081 setCondCodeAction(VFPCCToExpand, VT, Expand);
1082
1085 Legal);
1087
1091 VT, Custom);
1092
1093 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1094
1095 // Expand FP operations that need libcalls.
1096 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1097
1099
1101
1103 VT, Custom);
1104
1106 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1107 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1108 VT, Custom);
1109 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1110
1113
1116 VT, Custom);
1117
1120
1122 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1123 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1124
1125 setOperationAction(FloatingPointVPOps, VT, Custom);
1126
1128 Custom);
1131 VT, Legal);
1136 VT, Custom);
1137
1139 };
1140
1141 // Sets common extload/truncstore actions on RVV floating-point vector
1142 // types.
1143 const auto SetCommonVFPExtLoadTruncStoreActions =
1144 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1145 for (auto SmallVT : SmallerVTs) {
1146 setTruncStoreAction(VT, SmallVT, Expand);
1147 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1148 }
1149 };
1150
1151 // Sets common actions for f16 and bf16 for when there's only
1152 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1153 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1156 Custom);
1157 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1160 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1161 Custom);
1163 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1169 VT, Custom);
1170 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1171 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1172 MVT EltVT = VT.getVectorElementType();
1173 if (isTypeLegal(EltVT))
1174 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1176 VT, Custom);
1177 else
1178 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1179 EltVT, Custom);
1181 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1182 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1183 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1184 ISD::VP_SCATTER},
1185 VT, Custom);
1186 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1187
1191
1192 // Expand FP operations that need libcalls.
1193 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1194
1195 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1196 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1197 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1198 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1199 } else {
1200 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1201 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1202 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1203 }
1204 };
1205
1206 if (Subtarget.hasVInstructionsF16()) {
1207 for (MVT VT : F16VecVTs) {
1208 if (!isTypeLegal(VT))
1209 continue;
1210 SetCommonVFPActions(VT);
1211 }
1212 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1213 for (MVT VT : F16VecVTs) {
1214 if (!isTypeLegal(VT))
1215 continue;
1216 SetCommonPromoteToF32Actions(VT);
1217 }
1218 }
1219
1220 if (Subtarget.hasVInstructionsBF16Minimal()) {
1221 for (MVT VT : BF16VecVTs) {
1222 if (!isTypeLegal(VT))
1223 continue;
1224 SetCommonPromoteToF32Actions(VT);
1225 }
1226 }
1227
1228 if (Subtarget.hasVInstructionsF32()) {
1229 for (MVT VT : F32VecVTs) {
1230 if (!isTypeLegal(VT))
1231 continue;
1232 SetCommonVFPActions(VT);
1233 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1234 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1235 }
1236 }
1237
1238 if (Subtarget.hasVInstructionsF64()) {
1239 for (MVT VT : F64VecVTs) {
1240 if (!isTypeLegal(VT))
1241 continue;
1242 SetCommonVFPActions(VT);
1243 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1244 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1245 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1246 }
1247 }
1248
1249 if (Subtarget.useRVVForFixedLengthVectors()) {
1251 if (!useRVVForFixedLengthVectorVT(VT))
1252 continue;
1253
1254 // By default everything must be expanded.
1255 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1258 setTruncStoreAction(VT, OtherVT, Expand);
1260 OtherVT, Expand);
1261 }
1262
1263 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1264 // expansion to a build_vector of 0s.
1266
1267 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1269 Custom);
1270
1273 Custom);
1274
1276 VT, Custom);
1277
1279 VT, Custom);
1280
1282
1284
1286
1288
1291 Custom);
1292
1294
1297 Custom);
1298
1300 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1301 Custom);
1302
1304 {
1313 },
1314 VT, Custom);
1316 Custom);
1317
1319
1320 // Operations below are different for between masks and other vectors.
1321 if (VT.getVectorElementType() == MVT::i1) {
1322 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1323 ISD::OR, ISD::XOR},
1324 VT, Custom);
1325
1326 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1327 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1328 VT, Custom);
1329
1330 setOperationAction(ISD::VP_MERGE, VT, Custom);
1331
1332 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1333 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1334 continue;
1335 }
1336
1337 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1338 // it before type legalization for i64 vectors on RV32. It will then be
1339 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1340 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1341 // improvements first.
1342 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1345
1346 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1348 }
1349
1352
1353 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1354 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1355 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1356 ISD::VP_SCATTER},
1357 VT, Custom);
1358 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1359
1363 VT, Custom);
1364
1367
1369
1370 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1371 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1373
1377 VT, Custom);
1378
1380
1383
1384 // Custom-lower reduction operations to set up the corresponding custom
1385 // nodes' operands.
1389 VT, Custom);
1390
1391 setOperationAction(IntegerVPOps, VT, Custom);
1392
1393 if (Subtarget.hasStdExtZvkb())
1395
1396 if (Subtarget.hasStdExtZvbb()) {
1399 VT, Custom);
1400 } else {
1401 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1402 // range of f32.
1403 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1404 if (isTypeLegal(FloatVT))
1407 Custom);
1408 }
1409
1411 }
1412
1414 // There are no extending loads or truncating stores.
1415 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1416 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1417 setTruncStoreAction(VT, InnerVT, Expand);
1418 }
1419
1420 if (!useRVVForFixedLengthVectorVT(VT))
1421 continue;
1422
1423 // By default everything must be expanded.
1424 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1426
1427 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1428 // expansion to a build_vector of 0s.
1430
1435 VT, Custom);
1436 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1437 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1438
1440 VT, Custom);
1441
1444 VT, Custom);
1445 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1446 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1447 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1448 VT, Custom);
1449 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1450
1453 Custom);
1454
1455 if (VT.getVectorElementType() == MVT::f16 &&
1456 !Subtarget.hasVInstructionsF16()) {
1458 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1460 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1461 Custom);
1462 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1463 Custom);
1466 if (Subtarget.hasStdExtZfhmin()) {
1468 } else {
1469 // We need to custom legalize f16 build vectors if Zfhmin isn't
1470 // available.
1472 }
1476 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1477 // Don't promote f16 vector operations to f32 if f32 vector type is
1478 // not legal.
1479 // TODO: could split the f16 vector into two vectors and do promotion.
1480 if (!isTypeLegal(F32VecVT))
1481 continue;
1482 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1483 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1484 continue;
1485 }
1486
1487 if (VT.getVectorElementType() == MVT::bf16) {
1489 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1492 if (Subtarget.hasStdExtZfbfmin()) {
1494 } else {
1495 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1496 // available.
1498 }
1500 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1501 Custom);
1502 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1503 // Don't promote f16 vector operations to f32 if f32 vector type is
1504 // not legal.
1505 // TODO: could split the f16 vector into two vectors and do promotion.
1506 if (!isTypeLegal(F32VecVT))
1507 continue;
1508 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1509 // TODO: Promote VP ops to fp32.
1510 continue;
1511 }
1512
1514 Custom);
1515
1521 VT, Custom);
1522
1527 VT, Custom);
1528
1529 setCondCodeAction(VFPCCToExpand, VT, Expand);
1530
1533
1535
1536 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1537
1538 setOperationAction(FloatingPointVPOps, VT, Custom);
1539
1546 VT, Custom);
1547 }
1548
1549 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1550 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1551 if (Subtarget.is64Bit())
1553 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1555 if (Subtarget.hasStdExtZfbfmin())
1557 if (Subtarget.hasStdExtFOrZfinx())
1559 if (Subtarget.hasStdExtDOrZdinx())
1561 }
1562 }
1563
1564 if (Subtarget.hasStdExtA())
1566
1567 if (Subtarget.hasForcedAtomics()) {
1568 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1574 XLenVT, LibCall);
1575 }
1576
1577 if (Subtarget.hasVendorXTHeadMemIdx()) {
1578 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1579 setIndexedLoadAction(im, MVT::i8, Legal);
1580 setIndexedStoreAction(im, MVT::i8, Legal);
1581 setIndexedLoadAction(im, MVT::i16, Legal);
1582 setIndexedStoreAction(im, MVT::i16, Legal);
1583 setIndexedLoadAction(im, MVT::i32, Legal);
1584 setIndexedStoreAction(im, MVT::i32, Legal);
1585
1586 if (Subtarget.is64Bit()) {
1587 setIndexedLoadAction(im, MVT::i64, Legal);
1588 setIndexedStoreAction(im, MVT::i64, Legal);
1589 }
1590 }
1591 }
1592
1593 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1597
1601 }
1602
1603 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1604 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1605 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1610 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1611 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1612 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1613
1614 if (Subtarget.useRVVForFixedLengthVectors()) {
1616 if (VT.getVectorElementType() != MVT::i32 ||
1617 !useRVVForFixedLengthVectorVT(VT))
1618 continue;
1619 ElementCount EC = VT.getVectorElementCount();
1620 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1621 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1622 }
1623 }
1624 }
1625
1626 // Customize load and store operation for bf16 if zfh isn't enabled.
1627 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1628 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1630 }
1631
1632 // Function alignments.
1633 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1634 setMinFunctionAlignment(FunctionAlignment);
1635 // Set preferred alignments.
1638
1644
1645 if (Subtarget.hasStdExtFOrZfinx())
1647
1648 if (Subtarget.hasStdExtZbb())
1650
1651 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1652 Subtarget.hasVInstructions())
1654
1655 if (Subtarget.hasStdExtZbkb())
1657
1658 if (Subtarget.hasStdExtFOrZfinx())
1661 if (Subtarget.hasVInstructions())
1664 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1667 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1672
1673 if (Subtarget.hasVendorXTHeadMemPair())
1675 if (Subtarget.useRVVForFixedLengthVectors())
1677
1678 // Disable strict node mutation.
1679 IsStrictFPEnabled = true;
1680 EnableExtLdPromotion = true;
1681
1682 // Let the subtarget decide if a predictable select is more expensive than the
1683 // corresponding branch. This information is used in CGP/SelectOpt to decide
1684 // when to convert selects into branches.
1685 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1686
1687 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1688 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1689
1691 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1692 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1693
1695 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1696 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1697
1698 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1699 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1700}
1701
1703 LLVMContext &Context,
1704 EVT VT) const {
1705 if (!VT.isVector())
1706 return getPointerTy(DL);
1707 if (Subtarget.hasVInstructions() &&
1708 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1709 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1711}
1712
1713MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1714 return Subtarget.getXLenVT();
1715}
1716
1717// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1718bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1719 unsigned VF,
1720 bool IsScalable) const {
1721 if (!Subtarget.hasVInstructions())
1722 return true;
1723
1724 if (!IsScalable)
1725 return true;
1726
1727 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1728 return true;
1729
1730 // Don't allow VF=1 if those types are't legal.
1731 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1732 return true;
1733
1734 // VLEN=32 support is incomplete.
1735 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1736 return true;
1737
1738 // The maximum VF is for the smallest element width with LMUL=8.
1739 // VF must be a power of 2.
1740 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1741 return VF > MaxVF || !isPowerOf2_32(VF);
1742}
1743
1745 return !Subtarget.hasVInstructions() ||
1746 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1747}
1748
1750 const CallInst &I,
1751 MachineFunction &MF,
1752 unsigned Intrinsic) const {
1753 auto &DL = I.getDataLayout();
1754
1755 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1756 bool IsUnitStrided, bool UsePtrVal = false) {
1758 // We can't use ptrVal if the intrinsic can access memory before the
1759 // pointer. This means we can't use it for strided or indexed intrinsics.
1760 if (UsePtrVal)
1761 Info.ptrVal = I.getArgOperand(PtrOp);
1762 else
1763 Info.fallbackAddressSpace =
1764 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1765 Type *MemTy;
1766 if (IsStore) {
1767 // Store value is the first operand.
1768 MemTy = I.getArgOperand(0)->getType();
1769 } else {
1770 // Use return type. If it's segment load, return type is a struct.
1771 MemTy = I.getType();
1772 if (MemTy->isStructTy())
1773 MemTy = MemTy->getStructElementType(0);
1774 }
1775 if (!IsUnitStrided)
1776 MemTy = MemTy->getScalarType();
1777
1778 Info.memVT = getValueType(DL, MemTy);
1779 if (MemTy->isTargetExtTy()) {
1780 // RISC-V vector tuple type's alignment type should be its element type.
1781 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1782 MemTy = Type::getIntNTy(
1783 MemTy->getContext(),
1784 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1785 ->getZExtValue());
1786 Info.align = DL.getABITypeAlign(MemTy);
1787 } else {
1788 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1789 }
1791 Info.flags |=
1793 return true;
1794 };
1795
1796 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1798
1800 switch (Intrinsic) {
1801 default:
1802 return false;
1803 case Intrinsic::riscv_masked_atomicrmw_xchg:
1804 case Intrinsic::riscv_masked_atomicrmw_add:
1805 case Intrinsic::riscv_masked_atomicrmw_sub:
1806 case Intrinsic::riscv_masked_atomicrmw_nand:
1807 case Intrinsic::riscv_masked_atomicrmw_max:
1808 case Intrinsic::riscv_masked_atomicrmw_min:
1809 case Intrinsic::riscv_masked_atomicrmw_umax:
1810 case Intrinsic::riscv_masked_atomicrmw_umin:
1811 case Intrinsic::riscv_masked_cmpxchg:
1812 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1813 // narrow atomic operation. These will be expanded to an LR/SC loop that
1814 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1815 // will be used to modify the appropriate part of the 4 byte data and
1816 // preserve the rest.
1818 Info.memVT = MVT::i32;
1819 Info.ptrVal = I.getArgOperand(0);
1820 Info.offset = 0;
1821 Info.align = Align(4);
1824 return true;
1825 case Intrinsic::riscv_seg2_load_mask:
1826 case Intrinsic::riscv_seg3_load_mask:
1827 case Intrinsic::riscv_seg4_load_mask:
1828 case Intrinsic::riscv_seg5_load_mask:
1829 case Intrinsic::riscv_seg6_load_mask:
1830 case Intrinsic::riscv_seg7_load_mask:
1831 case Intrinsic::riscv_seg8_load_mask:
1832 case Intrinsic::riscv_sseg2_load_mask:
1833 case Intrinsic::riscv_sseg3_load_mask:
1834 case Intrinsic::riscv_sseg4_load_mask:
1835 case Intrinsic::riscv_sseg5_load_mask:
1836 case Intrinsic::riscv_sseg6_load_mask:
1837 case Intrinsic::riscv_sseg7_load_mask:
1838 case Intrinsic::riscv_sseg8_load_mask:
1839 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1840 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1841 case Intrinsic::riscv_seg2_store_mask:
1842 case Intrinsic::riscv_seg3_store_mask:
1843 case Intrinsic::riscv_seg4_store_mask:
1844 case Intrinsic::riscv_seg5_store_mask:
1845 case Intrinsic::riscv_seg6_store_mask:
1846 case Intrinsic::riscv_seg7_store_mask:
1847 case Intrinsic::riscv_seg8_store_mask:
1848 // Operands are (vec, ..., vec, ptr, mask, vl)
1849 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1850 /*IsStore*/ true,
1851 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1852 case Intrinsic::riscv_sseg2_store_mask:
1853 case Intrinsic::riscv_sseg3_store_mask:
1854 case Intrinsic::riscv_sseg4_store_mask:
1855 case Intrinsic::riscv_sseg5_store_mask:
1856 case Intrinsic::riscv_sseg6_store_mask:
1857 case Intrinsic::riscv_sseg7_store_mask:
1858 case Intrinsic::riscv_sseg8_store_mask:
1859 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1860 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1861 /*IsStore*/ true,
1862 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1863 case Intrinsic::riscv_vlm:
1864 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1865 /*IsStore*/ false,
1866 /*IsUnitStrided*/ true,
1867 /*UsePtrVal*/ true);
1868 case Intrinsic::riscv_vle:
1869 case Intrinsic::riscv_vle_mask:
1870 case Intrinsic::riscv_vleff:
1871 case Intrinsic::riscv_vleff_mask:
1872 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1873 /*IsStore*/ false,
1874 /*IsUnitStrided*/ true,
1875 /*UsePtrVal*/ true);
1876 case Intrinsic::riscv_vsm:
1877 case Intrinsic::riscv_vse:
1878 case Intrinsic::riscv_vse_mask:
1879 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1880 /*IsStore*/ true,
1881 /*IsUnitStrided*/ true,
1882 /*UsePtrVal*/ true);
1883 case Intrinsic::riscv_vlse:
1884 case Intrinsic::riscv_vlse_mask:
1885 case Intrinsic::riscv_vloxei:
1886 case Intrinsic::riscv_vloxei_mask:
1887 case Intrinsic::riscv_vluxei:
1888 case Intrinsic::riscv_vluxei_mask:
1889 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1890 /*IsStore*/ false,
1891 /*IsUnitStrided*/ false);
1892 case Intrinsic::riscv_vsse:
1893 case Intrinsic::riscv_vsse_mask:
1894 case Intrinsic::riscv_vsoxei:
1895 case Intrinsic::riscv_vsoxei_mask:
1896 case Intrinsic::riscv_vsuxei:
1897 case Intrinsic::riscv_vsuxei_mask:
1898 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1899 /*IsStore*/ true,
1900 /*IsUnitStrided*/ false);
1901 case Intrinsic::riscv_vlseg2:
1902 case Intrinsic::riscv_vlseg3:
1903 case Intrinsic::riscv_vlseg4:
1904 case Intrinsic::riscv_vlseg5:
1905 case Intrinsic::riscv_vlseg6:
1906 case Intrinsic::riscv_vlseg7:
1907 case Intrinsic::riscv_vlseg8:
1908 case Intrinsic::riscv_vlseg2ff:
1909 case Intrinsic::riscv_vlseg3ff:
1910 case Intrinsic::riscv_vlseg4ff:
1911 case Intrinsic::riscv_vlseg5ff:
1912 case Intrinsic::riscv_vlseg6ff:
1913 case Intrinsic::riscv_vlseg7ff:
1914 case Intrinsic::riscv_vlseg8ff:
1915 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1916 /*IsStore*/ false,
1917 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1918 case Intrinsic::riscv_vlseg2_mask:
1919 case Intrinsic::riscv_vlseg3_mask:
1920 case Intrinsic::riscv_vlseg4_mask:
1921 case Intrinsic::riscv_vlseg5_mask:
1922 case Intrinsic::riscv_vlseg6_mask:
1923 case Intrinsic::riscv_vlseg7_mask:
1924 case Intrinsic::riscv_vlseg8_mask:
1925 case Intrinsic::riscv_vlseg2ff_mask:
1926 case Intrinsic::riscv_vlseg3ff_mask:
1927 case Intrinsic::riscv_vlseg4ff_mask:
1928 case Intrinsic::riscv_vlseg5ff_mask:
1929 case Intrinsic::riscv_vlseg6ff_mask:
1930 case Intrinsic::riscv_vlseg7ff_mask:
1931 case Intrinsic::riscv_vlseg8ff_mask:
1932 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1933 /*IsStore*/ false,
1934 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1935 case Intrinsic::riscv_vlsseg2:
1936 case Intrinsic::riscv_vlsseg3:
1937 case Intrinsic::riscv_vlsseg4:
1938 case Intrinsic::riscv_vlsseg5:
1939 case Intrinsic::riscv_vlsseg6:
1940 case Intrinsic::riscv_vlsseg7:
1941 case Intrinsic::riscv_vlsseg8:
1942 case Intrinsic::riscv_vloxseg2:
1943 case Intrinsic::riscv_vloxseg3:
1944 case Intrinsic::riscv_vloxseg4:
1945 case Intrinsic::riscv_vloxseg5:
1946 case Intrinsic::riscv_vloxseg6:
1947 case Intrinsic::riscv_vloxseg7:
1948 case Intrinsic::riscv_vloxseg8:
1949 case Intrinsic::riscv_vluxseg2:
1950 case Intrinsic::riscv_vluxseg3:
1951 case Intrinsic::riscv_vluxseg4:
1952 case Intrinsic::riscv_vluxseg5:
1953 case Intrinsic::riscv_vluxseg6:
1954 case Intrinsic::riscv_vluxseg7:
1955 case Intrinsic::riscv_vluxseg8:
1956 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1957 /*IsStore*/ false,
1958 /*IsUnitStrided*/ false);
1959 case Intrinsic::riscv_vlsseg2_mask:
1960 case Intrinsic::riscv_vlsseg3_mask:
1961 case Intrinsic::riscv_vlsseg4_mask:
1962 case Intrinsic::riscv_vlsseg5_mask:
1963 case Intrinsic::riscv_vlsseg6_mask:
1964 case Intrinsic::riscv_vlsseg7_mask:
1965 case Intrinsic::riscv_vlsseg8_mask:
1966 case Intrinsic::riscv_vloxseg2_mask:
1967 case Intrinsic::riscv_vloxseg3_mask:
1968 case Intrinsic::riscv_vloxseg4_mask:
1969 case Intrinsic::riscv_vloxseg5_mask:
1970 case Intrinsic::riscv_vloxseg6_mask:
1971 case Intrinsic::riscv_vloxseg7_mask:
1972 case Intrinsic::riscv_vloxseg8_mask:
1973 case Intrinsic::riscv_vluxseg2_mask:
1974 case Intrinsic::riscv_vluxseg3_mask:
1975 case Intrinsic::riscv_vluxseg4_mask:
1976 case Intrinsic::riscv_vluxseg5_mask:
1977 case Intrinsic::riscv_vluxseg6_mask:
1978 case Intrinsic::riscv_vluxseg7_mask:
1979 case Intrinsic::riscv_vluxseg8_mask:
1980 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1981 /*IsStore*/ false,
1982 /*IsUnitStrided*/ false);
1983 case Intrinsic::riscv_vsseg2:
1984 case Intrinsic::riscv_vsseg3:
1985 case Intrinsic::riscv_vsseg4:
1986 case Intrinsic::riscv_vsseg5:
1987 case Intrinsic::riscv_vsseg6:
1988 case Intrinsic::riscv_vsseg7:
1989 case Intrinsic::riscv_vsseg8:
1990 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1991 /*IsStore*/ true,
1992 /*IsUnitStrided*/ false);
1993 case Intrinsic::riscv_vsseg2_mask:
1994 case Intrinsic::riscv_vsseg3_mask:
1995 case Intrinsic::riscv_vsseg4_mask:
1996 case Intrinsic::riscv_vsseg5_mask:
1997 case Intrinsic::riscv_vsseg6_mask:
1998 case Intrinsic::riscv_vsseg7_mask:
1999 case Intrinsic::riscv_vsseg8_mask:
2000 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2001 /*IsStore*/ true,
2002 /*IsUnitStrided*/ false);
2003 case Intrinsic::riscv_vssseg2:
2004 case Intrinsic::riscv_vssseg3:
2005 case Intrinsic::riscv_vssseg4:
2006 case Intrinsic::riscv_vssseg5:
2007 case Intrinsic::riscv_vssseg6:
2008 case Intrinsic::riscv_vssseg7:
2009 case Intrinsic::riscv_vssseg8:
2010 case Intrinsic::riscv_vsoxseg2:
2011 case Intrinsic::riscv_vsoxseg3:
2012 case Intrinsic::riscv_vsoxseg4:
2013 case Intrinsic::riscv_vsoxseg5:
2014 case Intrinsic::riscv_vsoxseg6:
2015 case Intrinsic::riscv_vsoxseg7:
2016 case Intrinsic::riscv_vsoxseg8:
2017 case Intrinsic::riscv_vsuxseg2:
2018 case Intrinsic::riscv_vsuxseg3:
2019 case Intrinsic::riscv_vsuxseg4:
2020 case Intrinsic::riscv_vsuxseg5:
2021 case Intrinsic::riscv_vsuxseg6:
2022 case Intrinsic::riscv_vsuxseg7:
2023 case Intrinsic::riscv_vsuxseg8:
2024 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2025 /*IsStore*/ true,
2026 /*IsUnitStrided*/ false);
2027 case Intrinsic::riscv_vssseg2_mask:
2028 case Intrinsic::riscv_vssseg3_mask:
2029 case Intrinsic::riscv_vssseg4_mask:
2030 case Intrinsic::riscv_vssseg5_mask:
2031 case Intrinsic::riscv_vssseg6_mask:
2032 case Intrinsic::riscv_vssseg7_mask:
2033 case Intrinsic::riscv_vssseg8_mask:
2034 case Intrinsic::riscv_vsoxseg2_mask:
2035 case Intrinsic::riscv_vsoxseg3_mask:
2036 case Intrinsic::riscv_vsoxseg4_mask:
2037 case Intrinsic::riscv_vsoxseg5_mask:
2038 case Intrinsic::riscv_vsoxseg6_mask:
2039 case Intrinsic::riscv_vsoxseg7_mask:
2040 case Intrinsic::riscv_vsoxseg8_mask:
2041 case Intrinsic::riscv_vsuxseg2_mask:
2042 case Intrinsic::riscv_vsuxseg3_mask:
2043 case Intrinsic::riscv_vsuxseg4_mask:
2044 case Intrinsic::riscv_vsuxseg5_mask:
2045 case Intrinsic::riscv_vsuxseg6_mask:
2046 case Intrinsic::riscv_vsuxseg7_mask:
2047 case Intrinsic::riscv_vsuxseg8_mask:
2048 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2049 /*IsStore*/ true,
2050 /*IsUnitStrided*/ false);
2051 }
2052}
2053
2055 const AddrMode &AM, Type *Ty,
2056 unsigned AS,
2057 Instruction *I) const {
2058 // No global is ever allowed as a base.
2059 if (AM.BaseGV)
2060 return false;
2061
2062 // None of our addressing modes allows a scalable offset
2063 if (AM.ScalableOffset)
2064 return false;
2065
2066 // RVV instructions only support register addressing.
2067 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2068 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2069
2070 // Require a 12-bit signed offset.
2071 if (!isInt<12>(AM.BaseOffs))
2072 return false;
2073
2074 switch (AM.Scale) {
2075 case 0: // "r+i" or just "i", depending on HasBaseReg.
2076 break;
2077 case 1:
2078 if (!AM.HasBaseReg) // allow "r+i".
2079 break;
2080 return false; // disallow "r+r" or "r+r+i".
2081 default:
2082 return false;
2083 }
2084
2085 return true;
2086}
2087
2089 return isInt<12>(Imm);
2090}
2091
2093 return isInt<12>(Imm);
2094}
2095
2096// On RV32, 64-bit integers are split into their high and low parts and held
2097// in two different registers, so the trunc is free since the low register can
2098// just be used.
2099// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2100// isTruncateFree?
2102 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2103 return false;
2104 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2105 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2106 return (SrcBits == 64 && DestBits == 32);
2107}
2108
2110 // We consider i64->i32 free on RV64 since we have good selection of W
2111 // instructions that make promoting operations back to i64 free in many cases.
2112 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2113 !DstVT.isInteger())
2114 return false;
2115 unsigned SrcBits = SrcVT.getSizeInBits();
2116 unsigned DestBits = DstVT.getSizeInBits();
2117 return (SrcBits == 64 && DestBits == 32);
2118}
2119
2121 EVT SrcVT = Val.getValueType();
2122 // free truncate from vnsrl and vnsra
2123 if (Subtarget.hasVInstructions() &&
2124 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2125 SrcVT.isVector() && VT2.isVector()) {
2126 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2127 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2128 if (SrcBits == DestBits * 2) {
2129 return true;
2130 }
2131 }
2132 return TargetLowering::isTruncateFree(Val, VT2);
2133}
2134
2136 // Zexts are free if they can be combined with a load.
2137 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2138 // poorly with type legalization of compares preferring sext.
2139 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2140 EVT MemVT = LD->getMemoryVT();
2141 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2142 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2143 LD->getExtensionType() == ISD::ZEXTLOAD))
2144 return true;
2145 }
2146
2147 return TargetLowering::isZExtFree(Val, VT2);
2148}
2149
2151 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2152}
2153
2155 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2156}
2157
2159 return Subtarget.hasStdExtZbb() ||
2160 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2161}
2162
2164 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2165 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2166}
2167
2169 const Instruction &AndI) const {
2170 // We expect to be able to match a bit extraction instruction if the Zbs
2171 // extension is supported and the mask is a power of two. However, we
2172 // conservatively return false if the mask would fit in an ANDI instruction,
2173 // on the basis that it's possible the sinking+duplication of the AND in
2174 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2175 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2176 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2177 return false;
2178 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2179 if (!Mask)
2180 return false;
2181 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2182}
2183
2185 EVT VT = Y.getValueType();
2186
2187 if (VT.isVector())
2188 return false;
2189
2190 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2191 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2192}
2193
2195 EVT VT = Y.getValueType();
2196
2197 if (!VT.isVector())
2198 return hasAndNotCompare(Y);
2199
2200 return Subtarget.hasStdExtZvkb();
2201}
2202
2204 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2205 if (Subtarget.hasStdExtZbs())
2206 return X.getValueType().isScalarInteger();
2207 auto *C = dyn_cast<ConstantSDNode>(Y);
2208 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2209 if (Subtarget.hasVendorXTHeadBs())
2210 return C != nullptr;
2211 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2212 return C && C->getAPIntValue().ule(10);
2213}
2214
2216 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2217 SDValue Y) const {
2218 if (SelectOpcode != ISD::VSELECT)
2219 return false;
2220
2221 // Only enable for rvv.
2222 if (!VT.isVector() || !Subtarget.hasVInstructions())
2223 return false;
2224
2225 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2226 return false;
2227
2228 return true;
2229}
2230
2232 Type *Ty) const {
2233 assert(Ty->isIntegerTy());
2234
2235 unsigned BitSize = Ty->getIntegerBitWidth();
2236 if (BitSize > Subtarget.getXLen())
2237 return false;
2238
2239 // Fast path, assume 32-bit immediates are cheap.
2240 int64_t Val = Imm.getSExtValue();
2241 if (isInt<32>(Val))
2242 return true;
2243
2244 // A constant pool entry may be more aligned than the load we're trying to
2245 // replace. If we don't support unaligned scalar mem, prefer the constant
2246 // pool.
2247 // TODO: Can the caller pass down the alignment?
2248 if (!Subtarget.enableUnalignedScalarMem())
2249 return true;
2250
2251 // Prefer to keep the load if it would require many instructions.
2252 // This uses the same threshold we use for constant pools but doesn't
2253 // check useConstantPoolForLargeInts.
2254 // TODO: Should we keep the load only when we're definitely going to emit a
2255 // constant pool?
2256
2258 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2259}
2260
2264 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2265 SelectionDAG &DAG) const {
2266 // One interesting pattern that we'd want to form is 'bit extract':
2267 // ((1 >> Y) & 1) ==/!= 0
2268 // But we also need to be careful not to try to reverse that fold.
2269
2270 // Is this '((1 >> Y) & 1)'?
2271 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2272 return false; // Keep the 'bit extract' pattern.
2273
2274 // Will this be '((1 >> Y) & 1)' after the transform?
2275 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2276 return true; // Do form the 'bit extract' pattern.
2277
2278 // If 'X' is a constant, and we transform, then we will immediately
2279 // try to undo the fold, thus causing endless combine loop.
2280 // So only do the transform if X is not a constant. This matches the default
2281 // implementation of this function.
2282 return !XC;
2283}
2284
2286 unsigned Opc = VecOp.getOpcode();
2287
2288 // Assume target opcodes can't be scalarized.
2289 // TODO - do we have any exceptions?
2290 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2291 return false;
2292
2293 // If the vector op is not supported, try to convert to scalar.
2294 EVT VecVT = VecOp.getValueType();
2296 return true;
2297
2298 // If the vector op is supported, but the scalar op is not, the transform may
2299 // not be worthwhile.
2300 // Permit a vector binary operation can be converted to scalar binary
2301 // operation which is custom lowered with illegal type.
2302 EVT ScalarVT = VecVT.getScalarType();
2303 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2304 isOperationCustom(Opc, ScalarVT);
2305}
2306
2308 const GlobalAddressSDNode *GA) const {
2309 // In order to maximise the opportunity for common subexpression elimination,
2310 // keep a separate ADD node for the global address offset instead of folding
2311 // it in the global address node. Later peephole optimisations may choose to
2312 // fold it back in when profitable.
2313 return false;
2314}
2315
2316// Returns 0-31 if the fli instruction is available for the type and this is
2317// legal FP immediate for the type. Returns -1 otherwise.
2319 if (!Subtarget.hasStdExtZfa())
2320 return -1;
2321
2322 bool IsSupportedVT = false;
2323 if (VT == MVT::f16) {
2324 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2325 } else if (VT == MVT::f32) {
2326 IsSupportedVT = true;
2327 } else if (VT == MVT::f64) {
2328 assert(Subtarget.hasStdExtD() && "Expect D extension");
2329 IsSupportedVT = true;
2330 }
2331
2332 if (!IsSupportedVT)
2333 return -1;
2334
2335 return RISCVLoadFPImm::getLoadFPImm(Imm);
2336}
2337
2339 bool ForCodeSize) const {
2340 bool IsLegalVT = false;
2341 if (VT == MVT::f16)
2342 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2343 else if (VT == MVT::f32)
2344 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2345 else if (VT == MVT::f64)
2346 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2347 else if (VT == MVT::bf16)
2348 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2349
2350 if (!IsLegalVT)
2351 return false;
2352
2353 if (getLegalZfaFPImm(Imm, VT) >= 0)
2354 return true;
2355
2356 // Some constants can be produced by fli+fneg.
2357 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2358 return true;
2359
2360 // Cannot create a 64 bit floating-point immediate value for rv32.
2361 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2362 // td can handle +0.0 or -0.0 already.
2363 // -0.0 can be created by fmv + fneg.
2364 return Imm.isZero();
2365 }
2366
2367 // Special case: fmv + fneg
2368 if (Imm.isNegZero())
2369 return true;
2370
2371 // Building an integer and then converting requires a fmv at the end of
2372 // the integer sequence. The fmv is not required for Zfinx.
2373 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2374 const int Cost =
2375 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2376 Subtarget.getXLen(), Subtarget);
2377 return Cost <= FPImmCost;
2378}
2379
2380// TODO: This is very conservative.
2382 unsigned Index) const {
2384 return false;
2385
2386 // Extracts from index 0 are just subreg extracts.
2387 if (Index == 0)
2388 return true;
2389
2390 // Only support extracting a fixed from a fixed vector for now.
2391 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2392 return false;
2393
2394 EVT EltVT = ResVT.getVectorElementType();
2395 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2396
2397 // The smallest type we can slide is i8.
2398 // TODO: We can extract index 0 from a mask vector without a slide.
2399 if (EltVT == MVT::i1)
2400 return false;
2401
2402 unsigned ResElts = ResVT.getVectorNumElements();
2403 unsigned SrcElts = SrcVT.getVectorNumElements();
2404
2405 unsigned MinVLen = Subtarget.getRealMinVLen();
2406 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2407
2408 // If we're extracting only data from the first VLEN bits of the source
2409 // then we can always do this with an m1 vslidedown.vx. Restricting the
2410 // Index ensures we can use a vslidedown.vi.
2411 // TODO: We can generalize this when the exact VLEN is known.
2412 if (Index + ResElts <= MinVLMAX && Index < 31)
2413 return true;
2414
2415 // Convervatively only handle extracting half of a vector.
2416 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2417 // the upper half of a vector until we have more test coverage.
2418 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2419 // a cheap extract. However, this case is important in practice for
2420 // shuffled extracts of longer vectors. How resolve?
2421 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2422}
2423
2425 CallingConv::ID CC,
2426 EVT VT) const {
2427 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2428 // We might still end up using a GPR but that will be decided based on ABI.
2429 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2430 !Subtarget.hasStdExtZfhminOrZhinxmin())
2431 return MVT::f32;
2432
2434
2435 return PartVT;
2436}
2437
2438unsigned
2440 std::optional<MVT> RegisterVT) const {
2441 // Pair inline assembly operand
2442 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2443 *RegisterVT == MVT::Untyped)
2444 return 1;
2445
2446 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2447}
2448
2450 CallingConv::ID CC,
2451 EVT VT) const {
2452 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2453 // We might still end up using a GPR but that will be decided based on ABI.
2454 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2455 !Subtarget.hasStdExtZfhminOrZhinxmin())
2456 return 1;
2457
2459}
2460
2462 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2463 unsigned &NumIntermediates, MVT &RegisterVT) const {
2465 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2466
2467 return NumRegs;
2468}
2469
2470// Changes the condition code and swaps operands if necessary, so the SetCC
2471// operation matches one of the comparisons supported directly by branches
2472// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2473// with 1/-1.
2474static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2475 ISD::CondCode &CC, SelectionDAG &DAG,
2476 const RISCVSubtarget &Subtarget) {
2477 // If this is a single bit test that can't be handled by ANDI, shift the
2478 // bit to be tested to the MSB and perform a signed compare with 0.
2479 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2480 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2481 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2482 // XAndesPerf supports branch on test bit.
2483 !Subtarget.hasVendorXAndesPerf()) {
2484 uint64_t Mask = LHS.getConstantOperandVal(1);
2485 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2486 unsigned ShAmt = 0;
2487 if (isPowerOf2_64(Mask)) {
2488 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2489 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2490 } else {
2491 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2492 }
2493
2494 LHS = LHS.getOperand(0);
2495 if (ShAmt != 0)
2496 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2497 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2498 return;
2499 }
2500 }
2501
2502 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2503 int64_t C = RHSC->getSExtValue();
2504 switch (CC) {
2505 default: break;
2506 case ISD::SETGT:
2507 // Convert X > -1 to X >= 0.
2508 if (C == -1) {
2509 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2510 CC = ISD::SETGE;
2511 return;
2512 }
2513 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2514 // We have a branch immediate instruction for SETGE but not SETGT.
2515 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2516 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2517 CC = ISD::SETGE;
2518 return;
2519 }
2520 break;
2521 case ISD::SETLT:
2522 // Convert X < 1 to 0 >= X.
2523 if (C == 1) {
2524 RHS = LHS;
2525 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2526 CC = ISD::SETGE;
2527 return;
2528 }
2529 break;
2530 case ISD::SETUGT:
2531 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2532 // We have a branch immediate instruction for SETUGE but not SETUGT.
2533 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2534 // immediate.
2535 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2536 CC = ISD::SETUGE;
2537 return;
2538 }
2539 break;
2540 }
2541 }
2542
2543 switch (CC) {
2544 default:
2545 break;
2546 case ISD::SETGT:
2547 case ISD::SETLE:
2548 case ISD::SETUGT:
2549 case ISD::SETULE:
2551 std::swap(LHS, RHS);
2552 break;
2553 }
2554}
2555
2557 if (VT.isRISCVVectorTuple()) {
2558 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2559 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2560 return RISCVVType::LMUL_F8;
2561 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2562 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2563 return RISCVVType::LMUL_F4;
2564 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2565 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2566 return RISCVVType::LMUL_F2;
2567 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2568 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2569 return RISCVVType::LMUL_1;
2570 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2571 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2572 return RISCVVType::LMUL_2;
2573 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2574 return RISCVVType::LMUL_4;
2575 llvm_unreachable("Invalid vector tuple type LMUL.");
2576 }
2577
2578 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2579 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2580 if (VT.getVectorElementType() == MVT::i1)
2581 KnownSize *= 8;
2582
2583 switch (KnownSize) {
2584 default:
2585 llvm_unreachable("Invalid LMUL.");
2586 case 8:
2587 return RISCVVType::LMUL_F8;
2588 case 16:
2589 return RISCVVType::LMUL_F4;
2590 case 32:
2591 return RISCVVType::LMUL_F2;
2592 case 64:
2593 return RISCVVType::LMUL_1;
2594 case 128:
2595 return RISCVVType::LMUL_2;
2596 case 256:
2597 return RISCVVType::LMUL_4;
2598 case 512:
2599 return RISCVVType::LMUL_8;
2600 }
2601}
2602
2604 switch (LMul) {
2605 default:
2606 llvm_unreachable("Invalid LMUL.");
2610 case RISCVVType::LMUL_1:
2611 return RISCV::VRRegClassID;
2612 case RISCVVType::LMUL_2:
2613 return RISCV::VRM2RegClassID;
2614 case RISCVVType::LMUL_4:
2615 return RISCV::VRM4RegClassID;
2616 case RISCVVType::LMUL_8:
2617 return RISCV::VRM8RegClassID;
2618 }
2619}
2620
2621unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2622 RISCVVType::VLMUL LMUL = getLMUL(VT);
2623 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2624 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2625 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2626 "Unexpected subreg numbering");
2627 return RISCV::sub_vrm1_0 + Index;
2628 }
2629 if (LMUL == RISCVVType::LMUL_2) {
2630 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2631 "Unexpected subreg numbering");
2632 return RISCV::sub_vrm2_0 + Index;
2633 }
2634 if (LMUL == RISCVVType::LMUL_4) {
2635 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2636 "Unexpected subreg numbering");
2637 return RISCV::sub_vrm4_0 + Index;
2638 }
2639 llvm_unreachable("Invalid vector type.");
2640}
2641
2643 if (VT.isRISCVVectorTuple()) {
2644 unsigned NF = VT.getRISCVVectorTupleNumFields();
2645 unsigned RegsPerField =
2646 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2647 (NF * RISCV::RVVBitsPerBlock));
2648 switch (RegsPerField) {
2649 case 1:
2650 if (NF == 2)
2651 return RISCV::VRN2M1RegClassID;
2652 if (NF == 3)
2653 return RISCV::VRN3M1RegClassID;
2654 if (NF == 4)
2655 return RISCV::VRN4M1RegClassID;
2656 if (NF == 5)
2657 return RISCV::VRN5M1RegClassID;
2658 if (NF == 6)
2659 return RISCV::VRN6M1RegClassID;
2660 if (NF == 7)
2661 return RISCV::VRN7M1RegClassID;
2662 if (NF == 8)
2663 return RISCV::VRN8M1RegClassID;
2664 break;
2665 case 2:
2666 if (NF == 2)
2667 return RISCV::VRN2M2RegClassID;
2668 if (NF == 3)
2669 return RISCV::VRN3M2RegClassID;
2670 if (NF == 4)
2671 return RISCV::VRN4M2RegClassID;
2672 break;
2673 case 4:
2674 assert(NF == 2);
2675 return RISCV::VRN2M4RegClassID;
2676 default:
2677 break;
2678 }
2679 llvm_unreachable("Invalid vector tuple type RegClass.");
2680 }
2681
2682 if (VT.getVectorElementType() == MVT::i1)
2683 return RISCV::VRRegClassID;
2684 return getRegClassIDForLMUL(getLMUL(VT));
2685}
2686
2687// Attempt to decompose a subvector insert/extract between VecVT and
2688// SubVecVT via subregister indices. Returns the subregister index that
2689// can perform the subvector insert/extract with the given element index, as
2690// well as the index corresponding to any leftover subvectors that must be
2691// further inserted/extracted within the register class for SubVecVT.
2692std::pair<unsigned, unsigned>
2694 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2695 const RISCVRegisterInfo *TRI) {
2696 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2697 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2698 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2699 "Register classes not ordered");
2700 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2701 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2702
2703 // If VecVT is a vector tuple type, either it's the tuple type with same
2704 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2705 if (VecVT.isRISCVVectorTuple()) {
2706 if (VecRegClassID == SubRegClassID)
2707 return {RISCV::NoSubRegister, 0};
2708
2709 assert(SubVecVT.isScalableVector() &&
2710 "Only allow scalable vector subvector.");
2711 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2712 "Invalid vector tuple insert/extract for vector and subvector with "
2713 "different LMUL.");
2714 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2715 }
2716
2717 // Try to compose a subregister index that takes us from the incoming
2718 // LMUL>1 register class down to the outgoing one. At each step we half
2719 // the LMUL:
2720 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2721 // Note that this is not guaranteed to find a subregister index, such as
2722 // when we are extracting from one VR type to another.
2723 unsigned SubRegIdx = RISCV::NoSubRegister;
2724 for (const unsigned RCID :
2725 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2726 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2727 VecVT = VecVT.getHalfNumVectorElementsVT();
2728 bool IsHi =
2729 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2730 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2731 getSubregIndexByMVT(VecVT, IsHi));
2732 if (IsHi)
2733 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2734 }
2735 return {SubRegIdx, InsertExtractIdx};
2736}
2737
2738// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2739// stores for those types.
2740bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2741 return !Subtarget.useRVVForFixedLengthVectors() ||
2742 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2743}
2744
2746 if (!ScalarTy.isSimple())
2747 return false;
2748 switch (ScalarTy.getSimpleVT().SimpleTy) {
2749 case MVT::iPTR:
2750 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2751 case MVT::i8:
2752 case MVT::i16:
2753 case MVT::i32:
2754 return true;
2755 case MVT::i64:
2756 return Subtarget.hasVInstructionsI64();
2757 case MVT::f16:
2758 return Subtarget.hasVInstructionsF16Minimal();
2759 case MVT::bf16:
2760 return Subtarget.hasVInstructionsBF16Minimal();
2761 case MVT::f32:
2762 return Subtarget.hasVInstructionsF32();
2763 case MVT::f64:
2764 return Subtarget.hasVInstructionsF64();
2765 default:
2766 return false;
2767 }
2768}
2769
2770
2771unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2772 return NumRepeatedDivisors;
2773}
2774
2776 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2777 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2778 "Unexpected opcode");
2779 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2780 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2782 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2783 if (!II)
2784 return SDValue();
2785 return Op.getOperand(II->VLOperand + 1 + HasChain);
2786}
2787
2789 const RISCVSubtarget &Subtarget) {
2790 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2791 if (!Subtarget.useRVVForFixedLengthVectors())
2792 return false;
2793
2794 // We only support a set of vector types with a consistent maximum fixed size
2795 // across all supported vector element types to avoid legalization issues.
2796 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2797 // fixed-length vector type we support is 1024 bytes.
2798 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2799 return false;
2800
2801 unsigned MinVLen = Subtarget.getRealMinVLen();
2802
2803 MVT EltVT = VT.getVectorElementType();
2804
2805 // Don't use RVV for vectors we cannot scalarize if required.
2806 switch (EltVT.SimpleTy) {
2807 // i1 is supported but has different rules.
2808 default:
2809 return false;
2810 case MVT::i1:
2811 // Masks can only use a single register.
2812 if (VT.getVectorNumElements() > MinVLen)
2813 return false;
2814 MinVLen /= 8;
2815 break;
2816 case MVT::i8:
2817 case MVT::i16:
2818 case MVT::i32:
2819 break;
2820 case MVT::i64:
2821 if (!Subtarget.hasVInstructionsI64())
2822 return false;
2823 break;
2824 case MVT::f16:
2825 if (!Subtarget.hasVInstructionsF16Minimal())
2826 return false;
2827 break;
2828 case MVT::bf16:
2829 if (!Subtarget.hasVInstructionsBF16Minimal())
2830 return false;
2831 break;
2832 case MVT::f32:
2833 if (!Subtarget.hasVInstructionsF32())
2834 return false;
2835 break;
2836 case MVT::f64:
2837 if (!Subtarget.hasVInstructionsF64())
2838 return false;
2839 break;
2840 }
2841
2842 // Reject elements larger than ELEN.
2843 if (EltVT.getSizeInBits() > Subtarget.getELen())
2844 return false;
2845
2846 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2847 // Don't use RVV for types that don't fit.
2848 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2849 return false;
2850
2851 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2852 // the base fixed length RVV support in place.
2853 if (!VT.isPow2VectorType())
2854 return false;
2855
2856 return true;
2857}
2858
2859bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2860 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2861}
2862
2863// Return the largest legal scalable vector type that matches VT's element type.
2865 const RISCVSubtarget &Subtarget) {
2866 // This may be called before legal types are setup.
2867 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2868 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2869 "Expected legal fixed length vector!");
2870
2871 unsigned MinVLen = Subtarget.getRealMinVLen();
2872 unsigned MaxELen = Subtarget.getELen();
2873
2874 MVT EltVT = VT.getVectorElementType();
2875 switch (EltVT.SimpleTy) {
2876 default:
2877 llvm_unreachable("unexpected element type for RVV container");
2878 case MVT::i1:
2879 case MVT::i8:
2880 case MVT::i16:
2881 case MVT::i32:
2882 case MVT::i64:
2883 case MVT::bf16:
2884 case MVT::f16:
2885 case MVT::f32:
2886 case MVT::f64: {
2887 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2888 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2889 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2890 unsigned NumElts =
2892 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2893 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2894 return MVT::getScalableVectorVT(EltVT, NumElts);
2895 }
2896 }
2897}
2898
2900 const RISCVSubtarget &Subtarget) {
2902 Subtarget);
2903}
2904
2906 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2907}
2908
2909// Grow V to consume an entire RVV register.
2911 const RISCVSubtarget &Subtarget) {
2912 assert(VT.isScalableVector() &&
2913 "Expected to convert into a scalable vector!");
2914 assert(V.getValueType().isFixedLengthVector() &&
2915 "Expected a fixed length vector operand!");
2916 SDLoc DL(V);
2917 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2918}
2919
2920// Shrink V so it's just big enough to maintain a VT's worth of data.
2922 const RISCVSubtarget &Subtarget) {
2924 "Expected to convert into a fixed length vector!");
2925 assert(V.getValueType().isScalableVector() &&
2926 "Expected a scalable vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getExtractSubvector(DL, VT, V, 0);
2929}
2930
2931/// Return the type of the mask type suitable for masking the provided
2932/// vector type. This is simply an i1 element type vector of the same
2933/// (possibly scalable) length.
2934static MVT getMaskTypeFor(MVT VecVT) {
2935 assert(VecVT.isVector());
2937 return MVT::getVectorVT(MVT::i1, EC);
2938}
2939
2940/// Creates an all ones mask suitable for masking a vector of type VecTy with
2941/// vector length VL. .
2942static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2943 SelectionDAG &DAG) {
2944 MVT MaskVT = getMaskTypeFor(VecVT);
2945 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2946}
2947
2948static std::pair<SDValue, SDValue>
2950 const RISCVSubtarget &Subtarget) {
2951 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2952 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2953 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2954 return {Mask, VL};
2955}
2956
2957static std::pair<SDValue, SDValue>
2958getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2959 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2960 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2961 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2962 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2963 return {Mask, VL};
2964}
2965
2966// Gets the two common "VL" operands: an all-ones mask and the vector length.
2967// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2968// the vector type that the fixed-length vector is contained in. Otherwise if
2969// VecVT is scalable, then ContainerVT should be the same as VecVT.
2970static std::pair<SDValue, SDValue>
2971getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2972 const RISCVSubtarget &Subtarget) {
2973 if (VecVT.isFixedLengthVector())
2974 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2975 Subtarget);
2976 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2977 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2978}
2979
2981 SelectionDAG &DAG) const {
2982 assert(VecVT.isScalableVector() && "Expected scalable vector");
2983 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2984 VecVT.getVectorElementCount());
2985}
2986
2987std::pair<unsigned, unsigned>
2989 const RISCVSubtarget &Subtarget) {
2990 assert(VecVT.isScalableVector() && "Expected scalable vector");
2991
2992 unsigned EltSize = VecVT.getScalarSizeInBits();
2993 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2994
2995 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2996 unsigned MaxVLMAX =
2997 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2998
2999 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3000 unsigned MinVLMAX =
3001 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3002
3003 return std::make_pair(MinVLMAX, MaxVLMAX);
3004}
3005
3006// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3007// of either is (currently) supported. This can get us into an infinite loop
3008// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3009// as a ..., etc.
3010// Until either (or both) of these can reliably lower any node, reporting that
3011// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3012// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3013// which is not desirable.
3015 EVT VT, unsigned DefinedValues) const {
3016 return false;
3017}
3018
3020 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3021 // implementation-defined.
3022 if (!VT.isVector())
3024 unsigned DLenFactor = Subtarget.getDLenFactor();
3025 unsigned Cost;
3026 if (VT.isScalableVector()) {
3027 unsigned LMul;
3028 bool Fractional;
3029 std::tie(LMul, Fractional) =
3031 if (Fractional)
3032 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3033 else
3034 Cost = (LMul * DLenFactor);
3035 } else {
3036 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3037 }
3038 return Cost;
3039}
3040
3041
3042/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3043/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3044/// be by default. VRGatherCostModel reflects available options. Note that
3045/// operand (index and possibly mask) are handled separately.
3047 auto LMULCost = getLMULCost(VT);
3048 bool Log2CostModel =
3050 if (Log2CostModel && LMULCost.isValid()) {
3051 unsigned Log = Log2_64(LMULCost.getValue());
3052 if (Log > 0)
3053 return LMULCost * Log;
3054 }
3055 return LMULCost * LMULCost;
3056}
3057
3058/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3059/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3060/// or may track the vrgather.vv cost. It is implementation-dependent.
3062 return getLMULCost(VT);
3063}
3064
3065/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3066/// for the type VT. (This does not cover the vslide1up or vslide1down
3067/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3068/// or may track the vrgather.vv cost. It is implementation-dependent.
3070 return getLMULCost(VT);
3071}
3072
3073/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3074/// for the type VT. (This does not cover the vslide1up or vslide1down
3075/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3076/// or may track the vrgather.vv cost. It is implementation-dependent.
3078 return getLMULCost(VT);
3079}
3080
3082 const RISCVSubtarget &Subtarget) {
3083 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3084 // bf16 conversions are always promoted to f32.
3085 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3086 Op.getValueType() == MVT::bf16) {
3087 bool IsStrict = Op->isStrictFPOpcode();
3088
3089 SDLoc DL(Op);
3090 if (IsStrict) {
3091 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3092 {Op.getOperand(0), Op.getOperand(1)});
3093 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3094 {Op.getValueType(), MVT::Other},
3095 {Val.getValue(1), Val.getValue(0),
3096 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3097 }
3098 return DAG.getNode(
3099 ISD::FP_ROUND, DL, Op.getValueType(),
3100 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3101 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3102 }
3103
3104 // Other operations are legal.
3105 return Op;
3106}
3107
3109 const RISCVSubtarget &Subtarget) {
3110 // RISC-V FP-to-int conversions saturate to the destination register size, but
3111 // don't produce 0 for nan. We can use a conversion instruction and fix the
3112 // nan case with a compare and a select.
3113 SDValue Src = Op.getOperand(0);
3114
3115 MVT DstVT = Op.getSimpleValueType();
3116 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3117
3118 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3119
3120 if (!DstVT.isVector()) {
3121 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3122 // the result.
3123 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3124 Src.getValueType() == MVT::bf16) {
3125 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3126 }
3127
3128 unsigned Opc;
3129 if (SatVT == DstVT)
3130 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3131 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3132 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3133 else
3134 return SDValue();
3135 // FIXME: Support other SatVTs by clamping before or after the conversion.
3136
3137 SDLoc DL(Op);
3138 SDValue FpToInt = DAG.getNode(
3139 Opc, DL, DstVT, Src,
3141
3142 if (Opc == RISCVISD::FCVT_WU_RV64)
3143 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3144
3145 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3146 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3148 }
3149
3150 // Vectors.
3151
3152 MVT DstEltVT = DstVT.getVectorElementType();
3153 MVT SrcVT = Src.getSimpleValueType();
3154 MVT SrcEltVT = SrcVT.getVectorElementType();
3155 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3156 unsigned DstEltSize = DstEltVT.getSizeInBits();
3157
3158 // Only handle saturating to the destination type.
3159 if (SatVT != DstEltVT)
3160 return SDValue();
3161
3162 MVT DstContainerVT = DstVT;
3163 MVT SrcContainerVT = SrcVT;
3164 if (DstVT.isFixedLengthVector()) {
3165 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3166 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3167 assert(DstContainerVT.getVectorElementCount() ==
3168 SrcContainerVT.getVectorElementCount() &&
3169 "Expected same element count");
3170 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3171 }
3172
3173 SDLoc DL(Op);
3174
3175 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3176
3177 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3178 {Src, Src, DAG.getCondCode(ISD::SETNE),
3179 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3180
3181 // Need to widen by more than 1 step, promote the FP type, then do a widening
3182 // convert.
3183 if (DstEltSize > (2 * SrcEltSize)) {
3184 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3185 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3186 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3187 }
3188
3189 MVT CvtContainerVT = DstContainerVT;
3190 MVT CvtEltVT = DstEltVT;
3191 if (SrcEltSize > (2 * DstEltSize)) {
3192 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3193 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3194 }
3195
3196 unsigned RVVOpc =
3197 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3198 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3199
3200 while (CvtContainerVT != DstContainerVT) {
3201 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3202 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3203 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3204 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3205 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3206 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3207 }
3208
3209 SDValue SplatZero = DAG.getNode(
3210 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3211 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3212 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3213 Res, DAG.getUNDEF(DstContainerVT), VL);
3214
3215 if (DstVT.isFixedLengthVector())
3216 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3217
3218 return Res;
3219}
3220
3222 const RISCVSubtarget &Subtarget) {
3223 bool IsStrict = Op->isStrictFPOpcode();
3224 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3225
3226 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3227 // bf16 conversions are always promoted to f32.
3228 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3229 SrcVal.getValueType() == MVT::bf16) {
3230 SDLoc DL(Op);
3231 if (IsStrict) {
3232 SDValue Ext =
3233 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3234 {Op.getOperand(0), SrcVal});
3235 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3236 {Ext.getValue(1), Ext.getValue(0)});
3237 }
3238 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3239 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3240 }
3241
3242 // Other operations are legal.
3243 return Op;
3244}
3245
3247 switch (Opc) {
3248 case ISD::FROUNDEVEN:
3250 case ISD::VP_FROUNDEVEN:
3251 return RISCVFPRndMode::RNE;
3252 case ISD::FTRUNC:
3253 case ISD::STRICT_FTRUNC:
3254 case ISD::VP_FROUNDTOZERO:
3255 return RISCVFPRndMode::RTZ;
3256 case ISD::FFLOOR:
3257 case ISD::STRICT_FFLOOR:
3258 case ISD::VP_FFLOOR:
3259 return RISCVFPRndMode::RDN;
3260 case ISD::FCEIL:
3261 case ISD::STRICT_FCEIL:
3262 case ISD::VP_FCEIL:
3263 return RISCVFPRndMode::RUP;
3264 case ISD::FROUND:
3265 case ISD::LROUND:
3266 case ISD::LLROUND:
3267 case ISD::STRICT_FROUND:
3268 case ISD::STRICT_LROUND:
3270 case ISD::VP_FROUND:
3271 return RISCVFPRndMode::RMM;
3272 case ISD::FRINT:
3273 case ISD::LRINT:
3274 case ISD::LLRINT:
3275 case ISD::STRICT_FRINT:
3276 case ISD::STRICT_LRINT:
3277 case ISD::STRICT_LLRINT:
3278 case ISD::VP_FRINT:
3279 case ISD::VP_LRINT:
3280 case ISD::VP_LLRINT:
3281 return RISCVFPRndMode::DYN;
3282 }
3283
3285}
3286
3287// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3288// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3289// the integer domain and back. Taking care to avoid converting values that are
3290// nan or already correct.
3291static SDValue
3293 const RISCVSubtarget &Subtarget) {
3294 MVT VT = Op.getSimpleValueType();
3295 assert(VT.isVector() && "Unexpected type");
3296
3297 SDLoc DL(Op);
3298
3299 SDValue Src = Op.getOperand(0);
3300
3301 // Freeze the source since we are increasing the number of uses.
3302 Src = DAG.getFreeze(Src);
3303
3304 MVT ContainerVT = VT;
3305 if (VT.isFixedLengthVector()) {
3306 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3307 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3308 }
3309
3310 SDValue Mask, VL;
3311 if (Op->isVPOpcode()) {
3312 Mask = Op.getOperand(1);
3313 if (VT.isFixedLengthVector())
3314 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3315 Subtarget);
3316 VL = Op.getOperand(2);
3317 } else {
3318 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3319 }
3320
3321 // We do the conversion on the absolute value and fix the sign at the end.
3322 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3323
3324 // Determine the largest integer that can be represented exactly. This and
3325 // values larger than it don't have any fractional bits so don't need to
3326 // be converted.
3327 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3328 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3329 APFloat MaxVal = APFloat(FltSem);
3330 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3331 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3332 SDValue MaxValNode =
3333 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3334 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3335 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3336
3337 // If abs(Src) was larger than MaxVal or nan, keep it.
3338 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3339 Mask =
3340 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3341 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3342 Mask, Mask, VL});
3343
3344 // Truncate to integer and convert back to FP.
3345 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3346 MVT XLenVT = Subtarget.getXLenVT();
3347 SDValue Truncated;
3348
3349 switch (Op.getOpcode()) {
3350 default:
3351 llvm_unreachable("Unexpected opcode");
3352 case ISD::FRINT:
3353 case ISD::VP_FRINT:
3354 case ISD::FCEIL:
3355 case ISD::VP_FCEIL:
3356 case ISD::FFLOOR:
3357 case ISD::VP_FFLOOR:
3358 case ISD::FROUND:
3359 case ISD::FROUNDEVEN:
3360 case ISD::VP_FROUND:
3361 case ISD::VP_FROUNDEVEN:
3362 case ISD::VP_FROUNDTOZERO: {
3365 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3366 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3367 break;
3368 }
3369 case ISD::FTRUNC:
3370 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3371 Mask, VL);
3372 break;
3373 case ISD::FNEARBYINT:
3374 case ISD::VP_FNEARBYINT:
3375 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3376 Mask, VL);
3377 break;
3378 }
3379
3380 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3381 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3382 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3383 Mask, VL);
3384
3385 // Restore the original sign so that -0.0 is preserved.
3386 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3387 Src, Src, Mask, VL);
3388
3389 if (!VT.isFixedLengthVector())
3390 return Truncated;
3391
3392 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3393}
3394
3395// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3396// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3397// qNan and converting the new source to integer and back to FP.
3398static SDValue
3400 const RISCVSubtarget &Subtarget) {
3401 SDLoc DL(Op);
3402 MVT VT = Op.getSimpleValueType();
3403 SDValue Chain = Op.getOperand(0);
3404 SDValue Src = Op.getOperand(1);
3405
3406 MVT ContainerVT = VT;
3407 if (VT.isFixedLengthVector()) {
3408 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3409 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3410 }
3411
3412 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3413
3414 // Freeze the source since we are increasing the number of uses.
3415 Src = DAG.getFreeze(Src);
3416
3417 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3418 MVT MaskVT = Mask.getSimpleValueType();
3419 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3420 DAG.getVTList(MaskVT, MVT::Other),
3421 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3422 DAG.getUNDEF(MaskVT), Mask, VL});
3423 Chain = Unorder.getValue(1);
3424 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3425 DAG.getVTList(ContainerVT, MVT::Other),
3426 {Chain, Src, Src, Src, Unorder, VL});
3427 Chain = Src.getValue(1);
3428
3429 // We do the conversion on the absolute value and fix the sign at the end.
3430 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3431
3432 // Determine the largest integer that can be represented exactly. This and
3433 // values larger than it don't have any fractional bits so don't need to
3434 // be converted.
3435 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3436 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3437 APFloat MaxVal = APFloat(FltSem);
3438 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3439 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3440 SDValue MaxValNode =
3441 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3442 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3443 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3444
3445 // If abs(Src) was larger than MaxVal or nan, keep it.
3446 Mask = DAG.getNode(
3447 RISCVISD::SETCC_VL, DL, MaskVT,
3448 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3449
3450 // Truncate to integer and convert back to FP.
3451 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3452 MVT XLenVT = Subtarget.getXLenVT();
3453 SDValue Truncated;
3454
3455 switch (Op.getOpcode()) {
3456 default:
3457 llvm_unreachable("Unexpected opcode");
3458 case ISD::STRICT_FCEIL:
3459 case ISD::STRICT_FFLOOR:
3460 case ISD::STRICT_FROUND:
3464 Truncated = DAG.getNode(
3465 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3466 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3467 break;
3468 }
3469 case ISD::STRICT_FTRUNC:
3470 Truncated =
3471 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3472 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3473 break;
3475 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3476 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3477 Mask, VL);
3478 break;
3479 }
3480 Chain = Truncated.getValue(1);
3481
3482 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3483 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3484 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3485 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3486 Truncated, Mask, VL);
3487 Chain = Truncated.getValue(1);
3488 }
3489
3490 // Restore the original sign so that -0.0 is preserved.
3491 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3492 Src, Src, Mask, VL);
3493
3494 if (VT.isFixedLengthVector())
3495 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3496 return DAG.getMergeValues({Truncated, Chain}, DL);
3497}
3498
3499static SDValue
3501 const RISCVSubtarget &Subtarget) {
3502 MVT VT = Op.getSimpleValueType();
3503 if (VT.isVector())
3504 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3505
3506 if (DAG.shouldOptForSize())
3507 return SDValue();
3508
3509 SDLoc DL(Op);
3510 SDValue Src = Op.getOperand(0);
3511
3512 // Create an integer the size of the mantissa with the MSB set. This and all
3513 // values larger than it don't have any fractional bits so don't need to be
3514 // converted.
3515 const fltSemantics &FltSem = VT.getFltSemantics();
3516 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3517 APFloat MaxVal = APFloat(FltSem);
3518 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3519 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3520 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3521
3523 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3524 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3525}
3526
3527// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3529 const RISCVSubtarget &Subtarget) {
3530 SDLoc DL(Op);
3531 MVT DstVT = Op.getSimpleValueType();
3532 SDValue Src = Op.getOperand(0);
3533 MVT SrcVT = Src.getSimpleValueType();
3534 assert(SrcVT.isVector() && DstVT.isVector() &&
3535 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3536 "Unexpected type");
3537
3538 MVT DstContainerVT = DstVT;
3539 MVT SrcContainerVT = SrcVT;
3540
3541 if (DstVT.isFixedLengthVector()) {
3542 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3543 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3544 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3545 }
3546
3547 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3548
3549 // [b]f16 -> f32
3550 MVT SrcElemType = SrcVT.getVectorElementType();
3551 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3552 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3553 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3554 }
3555
3556 SDValue Res =
3557 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3558 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3559 Subtarget.getXLenVT()),
3560 VL);
3561
3562 if (!DstVT.isFixedLengthVector())
3563 return Res;
3564
3565 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3566}
3567
3568static SDValue
3570 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3571 SDValue Offset, SDValue Mask, SDValue VL,
3573 if (Passthru.isUndef())
3575 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3576 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3577 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3578}
3579
3580static SDValue
3581getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3582 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3583 SDValue VL,
3585 if (Passthru.isUndef())
3587 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3588 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3589 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3590}
3591
3595 int64_t Addend;
3596};
3597
3598static std::optional<APInt> getExactInteger(const APFloat &APF,
3600 // We will use a SINT_TO_FP to materialize this constant so we should use a
3601 // signed APSInt here.
3602 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3603 // We use an arbitrary rounding mode here. If a floating-point is an exact
3604 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3605 // the rounding mode changes the output value, then it is not an exact
3606 // integer.
3608 bool IsExact;
3609 // If it is out of signed integer range, it will return an invalid operation.
3610 // If it is not an exact integer, IsExact is false.
3611 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3613 !IsExact)
3614 return std::nullopt;
3615 return ValInt.extractBits(BitWidth, 0);
3616}
3617
3618// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3619// to the (non-zero) step S and start value X. This can be then lowered as the
3620// RVV sequence (VID * S) + X, for example.
3621// The step S is represented as an integer numerator divided by a positive
3622// denominator. Note that the implementation currently only identifies
3623// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3624// cannot detect 2/3, for example.
3625// Note that this method will also match potentially unappealing index
3626// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3627// determine whether this is worth generating code for.
3628//
3629// EltSizeInBits is the size of the type that the sequence will be calculated
3630// in, i.e. SEW for build_vectors or XLEN for address calculations.
3631static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3632 unsigned EltSizeInBits) {
3633 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3634 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3635 return std::nullopt;
3636 bool IsInteger = Op.getValueType().isInteger();
3637
3638 std::optional<unsigned> SeqStepDenom;
3639 std::optional<APInt> SeqStepNum;
3640 std::optional<APInt> SeqAddend;
3641 std::optional<std::pair<APInt, unsigned>> PrevElt;
3642 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3643
3644 // First extract the ops into a list of constant integer values. This may not
3645 // be possible for floats if they're not all representable as integers.
3647 const unsigned OpSize = Op.getScalarValueSizeInBits();
3648 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3649 if (Elt.isUndef()) {
3650 Elts[Idx] = std::nullopt;
3651 continue;
3652 }
3653 if (IsInteger) {
3654 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3655 } else {
3656 auto ExactInteger =
3657 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3658 if (!ExactInteger)
3659 return std::nullopt;
3660 Elts[Idx] = *ExactInteger;
3661 }
3662 }
3663
3664 for (auto [Idx, Elt] : enumerate(Elts)) {
3665 // Assume undef elements match the sequence; we just have to be careful
3666 // when interpolating across them.
3667 if (!Elt)
3668 continue;
3669
3670 if (PrevElt) {
3671 // Calculate the step since the last non-undef element, and ensure
3672 // it's consistent across the entire sequence.
3673 unsigned IdxDiff = Idx - PrevElt->second;
3674 APInt ValDiff = *Elt - PrevElt->first;
3675
3676 // A zero-value value difference means that we're somewhere in the middle
3677 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3678 // step change before evaluating the sequence.
3679 if (ValDiff == 0)
3680 continue;
3681
3682 int64_t Remainder = ValDiff.srem(IdxDiff);
3683 // Normalize the step if it's greater than 1.
3684 if (Remainder != ValDiff.getSExtValue()) {
3685 // The difference must cleanly divide the element span.
3686 if (Remainder != 0)
3687 return std::nullopt;
3688 ValDiff = ValDiff.sdiv(IdxDiff);
3689 IdxDiff = 1;
3690 }
3691
3692 if (!SeqStepNum)
3693 SeqStepNum = ValDiff;
3694 else if (ValDiff != SeqStepNum)
3695 return std::nullopt;
3696
3697 if (!SeqStepDenom)
3698 SeqStepDenom = IdxDiff;
3699 else if (IdxDiff != *SeqStepDenom)
3700 return std::nullopt;
3701 }
3702
3703 // Record this non-undef element for later.
3704 if (!PrevElt || PrevElt->first != *Elt)
3705 PrevElt = std::make_pair(*Elt, Idx);
3706 }
3707
3708 // We need to have logged a step for this to count as a legal index sequence.
3709 if (!SeqStepNum || !SeqStepDenom)
3710 return std::nullopt;
3711
3712 // Loop back through the sequence and validate elements we might have skipped
3713 // while waiting for a valid step. While doing this, log any sequence addend.
3714 for (auto [Idx, Elt] : enumerate(Elts)) {
3715 if (!Elt)
3716 continue;
3717 APInt ExpectedVal =
3718 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3719 *SeqStepNum)
3720 .sdiv(*SeqStepDenom);
3721
3722 APInt Addend = *Elt - ExpectedVal;
3723 if (!SeqAddend)
3724 SeqAddend = Addend;
3725 else if (Addend != SeqAddend)
3726 return std::nullopt;
3727 }
3728
3729 assert(SeqAddend && "Must have an addend if we have a step");
3730
3731 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3732 SeqAddend->getSExtValue()};
3733}
3734
3735// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3736// and lower it as a VRGATHER_VX_VL from the source vector.
3737static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3738 SelectionDAG &DAG,
3739 const RISCVSubtarget &Subtarget) {
3740 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3741 return SDValue();
3742 SDValue Src = SplatVal.getOperand(0);
3743 // Don't perform this optimization for i1 vectors, or if the element types are
3744 // different
3745 // FIXME: Support i1 vectors, maybe by promoting to i8?
3746 MVT EltTy = VT.getVectorElementType();
3747 MVT SrcVT = Src.getSimpleValueType();
3748 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() ||
3749 !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
3750 return SDValue();
3751 SDValue Idx = SplatVal.getOperand(1);
3752 // The index must be a legal type.
3753 if (Idx.getValueType() != Subtarget.getXLenVT())
3754 return SDValue();
3755
3756 // Check that we know Idx lies within VT
3757 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3758 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3759 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3760 return SDValue();
3761 }
3762
3763 // Convert fixed length vectors to scalable
3764 MVT ContainerVT = VT;
3765 if (VT.isFixedLengthVector())
3766 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3767
3768 MVT SrcContainerVT = SrcVT;
3769 if (SrcVT.isFixedLengthVector()) {
3770 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3771 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3772 }
3773
3774 // Put Vec in a VT sized vector
3775 if (SrcContainerVT.getVectorMinNumElements() <
3776 ContainerVT.getVectorMinNumElements())
3777 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3778 else
3779 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3780
3781 // We checked that Idx fits inside VT earlier
3782 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3783 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3784 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3785 if (VT.isFixedLengthVector())
3786 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3787 return Gather;
3788}
3789
3791 const RISCVSubtarget &Subtarget) {
3792 MVT VT = Op.getSimpleValueType();
3793 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3794
3795 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3796
3797 SDLoc DL(Op);
3798 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3799
3800 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3801 int64_t StepNumerator = SimpleVID->StepNumerator;
3802 unsigned StepDenominator = SimpleVID->StepDenominator;
3803 int64_t Addend = SimpleVID->Addend;
3804
3805 assert(StepNumerator != 0 && "Invalid step");
3806 bool Negate = false;
3807 int64_t SplatStepVal = StepNumerator;
3808 unsigned StepOpcode = ISD::MUL;
3809 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3810 // anyway as the shift of 63 won't fit in uimm5.
3811 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3812 isPowerOf2_64(std::abs(StepNumerator))) {
3813 Negate = StepNumerator < 0;
3814 StepOpcode = ISD::SHL;
3815 SplatStepVal = Log2_64(std::abs(StepNumerator));
3816 }
3817
3818 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3819 // since it's the immediate value many RVV instructions accept. There is
3820 // no vmul.vi instruction so ensure multiply constant can fit in a
3821 // single addi instruction. For the addend, we allow up to 32 bits..
3822 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3823 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3824 isPowerOf2_32(StepDenominator) &&
3825 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3826 MVT VIDVT =
3828 MVT VIDContainerVT =
3829 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3830 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3831 // Convert right out of the scalable type so we can use standard ISD
3832 // nodes for the rest of the computation. If we used scalable types with
3833 // these, we'd lose the fixed-length vector info and generate worse
3834 // vsetvli code.
3835 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3836 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3837 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3838 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3839 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3840 }
3841 if (StepDenominator != 1) {
3842 SDValue SplatStep =
3843 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3844 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3845 }
3846 if (Addend != 0 || Negate) {
3847 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3848 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3849 VID);
3850 }
3851 if (VT.isFloatingPoint()) {
3852 // TODO: Use vfwcvt to reduce register pressure.
3853 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3854 }
3855 return VID;
3856 }
3857 }
3858
3859 return SDValue();
3860}
3861
3862/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3863/// which constitute a large proportion of the elements. In such cases we can
3864/// splat a vector with the dominant element and make up the shortfall with
3865/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3866/// Note that this includes vectors of 2 elements by association. The
3867/// upper-most element is the "dominant" one, allowing us to use a splat to
3868/// "insert" the upper element, and an insert of the lower element at position
3869/// 0, which improves codegen.
3871 const RISCVSubtarget &Subtarget) {
3872 MVT VT = Op.getSimpleValueType();
3873 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3874
3875 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3876
3877 SDLoc DL(Op);
3878 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3879
3880 MVT XLenVT = Subtarget.getXLenVT();
3881 unsigned NumElts = Op.getNumOperands();
3882
3883 SDValue DominantValue;
3884 unsigned MostCommonCount = 0;
3885 DenseMap<SDValue, unsigned> ValueCounts;
3886 unsigned NumUndefElts =
3887 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3888
3889 // Track the number of scalar loads we know we'd be inserting, estimated as
3890 // any non-zero floating-point constant. Other kinds of element are either
3891 // already in registers or are materialized on demand. The threshold at which
3892 // a vector load is more desirable than several scalar materializion and
3893 // vector-insertion instructions is not known.
3894 unsigned NumScalarLoads = 0;
3895
3896 for (SDValue V : Op->op_values()) {
3897 if (V.isUndef())
3898 continue;
3899
3900 unsigned &Count = ValueCounts[V];
3901 if (0 == Count)
3902 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3903 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3904
3905 // Is this value dominant? In case of a tie, prefer the highest element as
3906 // it's cheaper to insert near the beginning of a vector than it is at the
3907 // end.
3908 if (++Count >= MostCommonCount) {
3909 DominantValue = V;
3910 MostCommonCount = Count;
3911 }
3912 }
3913
3914 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3915 unsigned NumDefElts = NumElts - NumUndefElts;
3916 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3917
3918 // Don't perform this optimization when optimizing for size, since
3919 // materializing elements and inserting them tends to cause code bloat.
3920 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3921 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3922 ((MostCommonCount > DominantValueCountThreshold) ||
3923 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3924 // Start by splatting the most common element.
3925 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3926
3927 DenseSet<SDValue> Processed{DominantValue};
3928
3929 // We can handle an insert into the last element (of a splat) via
3930 // v(f)slide1down. This is slightly better than the vslideup insert
3931 // lowering as it avoids the need for a vector group temporary. It
3932 // is also better than using vmerge.vx as it avoids the need to
3933 // materialize the mask in a vector register.
3934 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3935 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3936 LastOp != DominantValue) {
3937 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3938 auto OpCode =
3939 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3940 if (!VT.isFloatingPoint())
3941 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3942 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3943 LastOp, Mask, VL);
3944 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3945 Processed.insert(LastOp);
3946 }
3947
3948 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3949 for (const auto &OpIdx : enumerate(Op->ops())) {
3950 const SDValue &V = OpIdx.value();
3951 if (V.isUndef() || !Processed.insert(V).second)
3952 continue;
3953 if (ValueCounts[V] == 1) {
3954 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3955 } else {
3956 // Blend in all instances of this value using a VSELECT, using a
3957 // mask where each bit signals whether that element is the one
3958 // we're after.
3960 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3961 return DAG.getConstant(V == V1, DL, XLenVT);
3962 });
3963 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3964 DAG.getBuildVector(SelMaskTy, DL, Ops),
3965 DAG.getSplatBuildVector(VT, DL, V), Vec);
3966 }
3967 }
3968
3969 return Vec;
3970 }
3971
3972 return SDValue();
3973}
3974
3976 const RISCVSubtarget &Subtarget) {
3977 MVT VT = Op.getSimpleValueType();
3978 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3979
3980 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3981
3982 SDLoc DL(Op);
3983 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3984
3985 MVT XLenVT = Subtarget.getXLenVT();
3986 unsigned NumElts = Op.getNumOperands();
3987
3988 if (VT.getVectorElementType() == MVT::i1) {
3989 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3990 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3991 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3992 }
3993
3994 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3995 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3996 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3997 }
3998
3999 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4000 // scalar integer chunks whose bit-width depends on the number of mask
4001 // bits and XLEN.
4002 // First, determine the most appropriate scalar integer type to use. This
4003 // is at most XLenVT, but may be shrunk to a smaller vector element type
4004 // according to the size of the final vector - use i8 chunks rather than
4005 // XLenVT if we're producing a v8i1. This results in more consistent
4006 // codegen across RV32 and RV64.
4007 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4008 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4009 // If we have to use more than one INSERT_VECTOR_ELT then this
4010 // optimization is likely to increase code size; avoid performing it in
4011 // such a case. We can use a load from a constant pool in this case.
4012 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4013 return SDValue();
4014 // Now we can create our integer vector type. Note that it may be larger
4015 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4016 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4017 MVT IntegerViaVecVT =
4018 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4019 IntegerViaVecElts);
4020
4021 uint64_t Bits = 0;
4022 unsigned BitPos = 0, IntegerEltIdx = 0;
4023 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4024
4025 for (unsigned I = 0; I < NumElts;) {
4026 SDValue V = Op.getOperand(I);
4027 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4028 Bits |= ((uint64_t)BitValue << BitPos);
4029 ++BitPos;
4030 ++I;
4031
4032 // Once we accumulate enough bits to fill our scalar type or process the
4033 // last element, insert into our vector and clear our accumulated data.
4034 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4035 if (NumViaIntegerBits <= 32)
4036 Bits = SignExtend64<32>(Bits);
4037 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4038 Elts[IntegerEltIdx] = Elt;
4039 Bits = 0;
4040 BitPos = 0;
4041 IntegerEltIdx++;
4042 }
4043 }
4044
4045 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4046
4047 if (NumElts < NumViaIntegerBits) {
4048 // If we're producing a smaller vector than our minimum legal integer
4049 // type, bitcast to the equivalent (known-legal) mask type, and extract
4050 // our final mask.
4051 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4052 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4053 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4054 } else {
4055 // Else we must have produced an integer type with the same size as the
4056 // mask type; bitcast for the final result.
4057 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4058 Vec = DAG.getBitcast(VT, Vec);
4059 }
4060
4061 return Vec;
4062 }
4063
4064 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4065 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4066 : RISCVISD::VMV_V_X_VL;
4067 if (!VT.isFloatingPoint())
4068 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4069 Splat =
4070 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4071 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4072 }
4073
4074 // Try and match index sequences, which we can lower to the vid instruction
4075 // with optional modifications. An all-undef vector is matched by
4076 // getSplatValue, above.
4077 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4078 return Res;
4079
4080 // For very small build_vectors, use a single scalar insert of a constant.
4081 // TODO: Base this on constant rematerialization cost, not size.
4082 const unsigned EltBitSize = VT.getScalarSizeInBits();
4083 if (VT.getSizeInBits() <= 32 &&
4085 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4086 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4087 "Unexpected sequence type");
4088 // If we can use the original VL with the modified element type, this
4089 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4090 // be moved into InsertVSETVLI?
4091 unsigned ViaVecLen =
4092 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4093 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4094
4095 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4096 uint64_t SplatValue = 0;
4097 // Construct the amalgamated value at this larger vector type.
4098 for (const auto &OpIdx : enumerate(Op->op_values())) {
4099 const auto &SeqV = OpIdx.value();
4100 if (!SeqV.isUndef())
4101 SplatValue |=
4102 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4103 }
4104
4105 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4106 // achieve better constant materializion.
4107 // On RV32, we need to sign-extend to use getSignedConstant.
4108 if (ViaIntVT == MVT::i32)
4109 SplatValue = SignExtend64<32>(SplatValue);
4110
4111 SDValue Vec = DAG.getInsertVectorElt(
4112 DL, DAG.getUNDEF(ViaVecVT),
4113 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4114 if (ViaVecLen != 1)
4115 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4116 return DAG.getBitcast(VT, Vec);
4117 }
4118
4119
4120 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4121 // when re-interpreted as a vector with a larger element type. For example,
4122 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4123 // could be instead splat as
4124 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4125 // TODO: This optimization could also work on non-constant splats, but it
4126 // would require bit-manipulation instructions to construct the splat value.
4127 SmallVector<SDValue> Sequence;
4128 const auto *BV = cast<BuildVectorSDNode>(Op);
4129 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4131 BV->getRepeatedSequence(Sequence) &&
4132 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4133 unsigned SeqLen = Sequence.size();
4134 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4135 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4136 ViaIntVT == MVT::i64) &&
4137 "Unexpected sequence type");
4138
4139 // If we can use the original VL with the modified element type, this
4140 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4141 // be moved into InsertVSETVLI?
4142 const unsigned RequiredVL = NumElts / SeqLen;
4143 const unsigned ViaVecLen =
4144 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4145 NumElts : RequiredVL;
4146 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4147
4148 unsigned EltIdx = 0;
4149 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4150 uint64_t SplatValue = 0;
4151 // Construct the amalgamated value which can be splatted as this larger
4152 // vector type.
4153 for (const auto &SeqV : Sequence) {
4154 if (!SeqV.isUndef())
4155 SplatValue |=
4156 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4157 EltIdx++;
4158 }
4159
4160 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4161 // achieve better constant materializion.
4162 // On RV32, we need to sign-extend to use getSignedConstant.
4163 if (ViaIntVT == MVT::i32)
4164 SplatValue = SignExtend64<32>(SplatValue);
4165
4166 // Since we can't introduce illegal i64 types at this stage, we can only
4167 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4168 // way we can use RVV instructions to splat.
4169 assert((ViaIntVT.bitsLE(XLenVT) ||
4170 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4171 "Unexpected bitcast sequence");
4172 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4173 SDValue ViaVL =
4174 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4175 MVT ViaContainerVT =
4176 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4177 SDValue Splat =
4178 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4179 DAG.getUNDEF(ViaContainerVT),
4180 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4181 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4182 if (ViaVecLen != RequiredVL)
4184 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4185 return DAG.getBitcast(VT, Splat);
4186 }
4187 }
4188
4189 // If the number of signbits allows, see if we can lower as a <N x i8>.
4190 // Our main goal here is to reduce LMUL (and thus work) required to
4191 // build the constant, but we will also narrow if the resulting
4192 // narrow vector is known to materialize cheaply.
4193 // TODO: We really should be costing the smaller vector. There are
4194 // profitable cases this misses.
4195 if (EltBitSize > 8 && VT.isInteger() &&
4196 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4197 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4198 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4199 DL, Op->ops());
4200 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4201 Source, DAG, Subtarget);
4202 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4203 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4204 }
4205
4206 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4207 return Res;
4208
4209 // For constant vectors, use generic constant pool lowering. Otherwise,
4210 // we'd have to materialize constants in GPRs just to move them into the
4211 // vector.
4212 return SDValue();
4213}
4214
4215static unsigned getPACKOpcode(unsigned DestBW,
4216 const RISCVSubtarget &Subtarget) {
4217 switch (DestBW) {
4218 default:
4219 llvm_unreachable("Unsupported pack size");
4220 case 16:
4221 return RISCV::PACKH;
4222 case 32:
4223 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4224 case 64:
4225 assert(Subtarget.is64Bit());
4226 return RISCV::PACK;
4227 }
4228}
4229
4230/// Double the element size of the build vector to reduce the number
4231/// of vslide1down in the build vector chain. In the worst case, this
4232/// trades three scalar operations for 1 vector operation. Scalar
4233/// operations are generally lower latency, and for out-of-order cores
4234/// we also benefit from additional parallelism.
4236 const RISCVSubtarget &Subtarget) {
4237 SDLoc DL(Op);
4238 MVT VT = Op.getSimpleValueType();
4239 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4240 MVT ElemVT = VT.getVectorElementType();
4241 if (!ElemVT.isInteger())
4242 return SDValue();
4243
4244 // TODO: Relax these architectural restrictions, possibly with costing
4245 // of the actual instructions required.
4246 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4247 return SDValue();
4248
4249 unsigned NumElts = VT.getVectorNumElements();
4250 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4251 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4252 NumElts % 2 != 0)
4253 return SDValue();
4254
4255 // Produce [B,A] packed into a type twice as wide. Note that all
4256 // scalars are XLenVT, possibly masked (see below).
4257 MVT XLenVT = Subtarget.getXLenVT();
4258 SDValue Mask = DAG.getConstant(
4259 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4260 auto pack = [&](SDValue A, SDValue B) {
4261 // Bias the scheduling of the inserted operations to near the
4262 // definition of the element - this tends to reduce register
4263 // pressure overall.
4264 SDLoc ElemDL(B);
4265 if (Subtarget.hasStdExtZbkb())
4266 // Note that we're relying on the high bits of the result being
4267 // don't care. For PACKW, the result is *sign* extended.
4268 return SDValue(
4269 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4270 ElemDL, XLenVT, A, B),
4271 0);
4272
4273 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4274 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4275 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4276 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4277 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4279 };
4280
4281 SmallVector<SDValue> NewOperands;
4282 NewOperands.reserve(NumElts / 2);
4283 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4284 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4285 assert(NumElts == NewOperands.size() * 2);
4286 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4287 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4288 return DAG.getNode(ISD::BITCAST, DL, VT,
4289 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4290}
4291
4293 const RISCVSubtarget &Subtarget) {
4294 MVT VT = Op.getSimpleValueType();
4295 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4296
4297 MVT EltVT = VT.getVectorElementType();
4298 MVT XLenVT = Subtarget.getXLenVT();
4299
4300 SDLoc DL(Op);
4301
4302 // Proper support for f16 requires Zvfh. bf16 always requires special
4303 // handling. We need to cast the scalar to integer and create an integer
4304 // build_vector.
4305 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4306 MVT IVT = VT.changeVectorElementType(MVT::i16);
4308 for (const auto &[I, U] : enumerate(Op->ops())) {
4309 SDValue Elem = U.get();
4310 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4311 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4312 // Called by LegalizeDAG, we need to use XLenVT operations since we
4313 // can't create illegal types.
4314 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4315 // Manually constant fold so the integer build_vector can be lowered
4316 // better. Waiting for DAGCombine will be too late.
4317 APInt V =
4318 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4319 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4320 } else {
4321 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4322 }
4323 } else {
4324 // Called by scalar type legalizer, we can use i16.
4325 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4326 }
4327 }
4328 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4329 return DAG.getBitcast(VT, Res);
4330 }
4331
4332 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4334 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4335
4336 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4337
4338 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4339
4340 if (VT.getVectorElementType() == MVT::i1) {
4341 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4342 // vector type, we have a legal equivalently-sized i8 type, so we can use
4343 // that.
4344 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4345 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4346
4347 SDValue WideVec;
4348 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4349 // For a splat, perform a scalar truncate before creating the wider
4350 // vector.
4351 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4352 DAG.getConstant(1, DL, Splat.getValueType()));
4353 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4354 } else {
4355 SmallVector<SDValue, 8> Ops(Op->op_values());
4356 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4357 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4358 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4359 }
4360
4361 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4362 }
4363
4364 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4365 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4366 return Gather;
4367
4368 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4369 // pressure at high LMUL.
4370 if (all_of(Op->ops().drop_front(),
4371 [](const SDUse &U) { return U.get().isUndef(); })) {
4372 unsigned Opc =
4373 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4374 if (!VT.isFloatingPoint())
4375 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4376 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4377 Splat, VL);
4378 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4379 }
4380
4381 unsigned Opc =
4382 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4383 if (!VT.isFloatingPoint())
4384 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4385 Splat =
4386 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4387 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4388 }
4389
4390 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4391 return Res;
4392
4393 // If we're compiling for an exact VLEN value, we can split our work per
4394 // register in the register group.
4395 if (const auto VLen = Subtarget.getRealVLen();
4396 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4397 MVT ElemVT = VT.getVectorElementType();
4398 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4399 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4400 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4401 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4402 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4403
4404 // The following semantically builds up a fixed length concat_vector
4405 // of the component build_vectors. We eagerly lower to scalable and
4406 // insert_subvector here to avoid DAG combining it back to a large
4407 // build_vector.
4408 SmallVector<SDValue> BuildVectorOps(Op->ops());
4409 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4410 SDValue Vec = DAG.getUNDEF(ContainerVT);
4411 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4412 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4413 SDValue SubBV =
4414 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4415 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4416 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4417 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4418 }
4419 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4420 }
4421
4422 // If we're about to resort to vslide1down (or stack usage), pack our
4423 // elements into the widest scalar type we can. This will force a VL/VTYPE
4424 // toggle, but reduces the critical path, the number of vslide1down ops
4425 // required, and possibly enables scalar folds of the values.
4426 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4427 return Res;
4428
4429 // For m1 vectors, if we have non-undef values in both halves of our vector,
4430 // split the vector into low and high halves, build them separately, then
4431 // use a vselect to combine them. For long vectors, this cuts the critical
4432 // path of the vslide1down sequence in half, and gives us an opportunity
4433 // to special case each half independently. Note that we don't change the
4434 // length of the sub-vectors here, so if both fallback to the generic
4435 // vslide1down path, we should be able to fold the vselect into the final
4436 // vslidedown (for the undef tail) for the first half w/ masking.
4437 unsigned NumElts = VT.getVectorNumElements();
4438 unsigned NumUndefElts =
4439 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4440 unsigned NumDefElts = NumElts - NumUndefElts;
4441 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4442 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4443 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4444 SmallVector<SDValue> MaskVals;
4445 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4446 SubVecAOps.reserve(NumElts);
4447 SubVecBOps.reserve(NumElts);
4448 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4449 SDValue Elem = U.get();
4450 if (Idx < NumElts / 2) {
4451 SubVecAOps.push_back(Elem);
4452 SubVecBOps.push_back(UndefElem);
4453 } else {
4454 SubVecAOps.push_back(UndefElem);
4455 SubVecBOps.push_back(Elem);
4456 }
4457 bool SelectMaskVal = (Idx < NumElts / 2);
4458 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4459 }
4460 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4461 MaskVals.size() == NumElts);
4462
4463 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4464 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4465 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4466 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4467 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4468 }
4469
4470 // Cap the cost at a value linear to the number of elements in the vector.
4471 // The default lowering is to use the stack. The vector store + scalar loads
4472 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4473 // being (at least) linear in LMUL. As a result, using the vslidedown
4474 // lowering for every element ends up being VL*LMUL..
4475 // TODO: Should we be directly costing the stack alternative? Doing so might
4476 // give us a more accurate upper bound.
4477 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4478
4479 // TODO: unify with TTI getSlideCost.
4480 InstructionCost PerSlideCost = 1;
4481 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4482 default: break;
4483 case RISCVVType::LMUL_2:
4484 PerSlideCost = 2;
4485 break;
4486 case RISCVVType::LMUL_4:
4487 PerSlideCost = 4;
4488 break;
4489 case RISCVVType::LMUL_8:
4490 PerSlideCost = 8;
4491 break;
4492 }
4493
4494 // TODO: Should we be using the build instseq then cost + evaluate scheme
4495 // we use for integer constants here?
4496 unsigned UndefCount = 0;
4497 for (const SDValue &V : Op->ops()) {
4498 if (V.isUndef()) {
4499 UndefCount++;
4500 continue;
4501 }
4502 if (UndefCount) {
4503 LinearBudget -= PerSlideCost;
4504 UndefCount = 0;
4505 }
4506 LinearBudget -= PerSlideCost;
4507 }
4508 if (UndefCount) {
4509 LinearBudget -= PerSlideCost;
4510 }
4511
4512 if (LinearBudget < 0)
4513 return SDValue();
4514
4515 assert((!VT.isFloatingPoint() ||
4516 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4517 "Illegal type which will result in reserved encoding");
4518
4519 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4520
4521 // General case: splat the first operand and slide other operands down one
4522 // by one to form a vector. Alternatively, if every operand is an
4523 // extraction from element 0 of a vector, we use that vector from the last
4524 // extraction as the start value and slide up instead of slide down. Such that
4525 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4526 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4527 // something we cannot do with vslide1down/vslidedown.
4528 // Of course, using vslide1up/vslideup might increase the register pressure,
4529 // and that's why we conservatively limit to cases where every operand is an
4530 // extraction from the first element.
4531 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4532 SDValue EVec;
4533 bool SlideUp = false;
4534 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4535 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4536 if (SlideUp)
4537 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4538 Mask, VL, Policy);
4539 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4540 Mask, VL, Policy);
4541 };
4542
4543 // The reason we don't use all_of here is because we're also capturing EVec
4544 // from the last non-undef operand. If the std::execution_policy of the
4545 // underlying std::all_of is anything but std::sequenced_policy we might
4546 // capture the wrong EVec.
4547 for (SDValue V : Operands) {
4548 using namespace SDPatternMatch;
4549 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4550 if (!SlideUp)
4551 break;
4552 }
4553
4554 if (SlideUp) {
4555 MVT EVecContainerVT = EVec.getSimpleValueType();
4556 // Make sure the original vector has scalable vector type.
4557 if (EVecContainerVT.isFixedLengthVector()) {
4558 EVecContainerVT =
4559 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4560 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4561 }
4562
4563 // Adapt EVec's type into ContainerVT.
4564 if (EVecContainerVT.getVectorMinNumElements() <
4565 ContainerVT.getVectorMinNumElements())
4566 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4567 else
4568 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4569
4570 // Reverse the elements as we're going to slide up from the last element.
4571 std::reverse(Operands.begin(), Operands.end());
4572 }
4573
4574 SDValue Vec;
4575 UndefCount = 0;
4576 for (SDValue V : Operands) {
4577 if (V.isUndef()) {
4578 UndefCount++;
4579 continue;
4580 }
4581
4582 // Start our sequence with either a TA splat or extract source in the
4583 // hopes that hardware is able to recognize there's no dependency on the
4584 // prior value of our temporary register.
4585 if (!Vec) {
4586 if (SlideUp) {
4587 Vec = EVec;
4588 } else {
4589 Vec = DAG.getSplatVector(VT, DL, V);
4590 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4591 }
4592
4593 UndefCount = 0;
4594 continue;
4595 }
4596
4597 if (UndefCount) {
4598 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4599 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4600 VL);
4601 UndefCount = 0;
4602 }
4603
4604 unsigned Opcode;
4605 if (VT.isFloatingPoint())
4606 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4607 else
4608 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4609
4610 if (!VT.isFloatingPoint())
4611 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4612 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4613 V, Mask, VL);
4614 }
4615 if (UndefCount) {
4616 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4617 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4618 VL);
4619 }
4620 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4621}
4622
4623static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4625 SelectionDAG &DAG) {
4626 if (!Passthru)
4627 Passthru = DAG.getUNDEF(VT);
4628 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4629 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4630 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4631 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4632 // node in order to try and match RVV vector/scalar instructions.
4633 if ((LoC >> 31) == HiC)
4634 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4635
4636 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4637 // VL. This can temporarily increase VL if VL less than VLMAX.
4638 if (LoC == HiC) {
4639 SDValue NewVL;
4640 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4641 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4642 else
4643 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4644 MVT InterVT =
4645 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4646 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4647 DAG.getUNDEF(InterVT), Lo, NewVL);
4648 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4649 }
4650 }
4651
4652 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4653 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4654 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4655 Hi.getConstantOperandVal(1) == 31)
4656 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4657
4658 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4659 // even if it might be sign extended.
4660 if (Hi.isUndef())
4661 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4662
4663 // Fall back to a stack store and stride x0 vector load.
4664 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4665 Hi, VL);
4666}
4667
4668// Called by type legalization to handle splat of i64 on RV32.
4669// FIXME: We can optimize this when the type has sign or zero bits in one
4670// of the halves.
4671static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4672 SDValue Scalar, SDValue VL,
4673 SelectionDAG &DAG) {
4674 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4675 SDValue Lo, Hi;
4676 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4677 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4678}
4679
4680// This function lowers a splat of a scalar operand Splat with the vector
4681// length VL. It ensures the final sequence is type legal, which is useful when
4682// lowering a splat after type legalization.
4683static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4684 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4685 const RISCVSubtarget &Subtarget) {
4686 bool HasPassthru = Passthru && !Passthru.isUndef();
4687 if (!HasPassthru && !Passthru)
4688 Passthru = DAG.getUNDEF(VT);
4689
4690 MVT EltVT = VT.getVectorElementType();
4691 MVT XLenVT = Subtarget.getXLenVT();
4692
4693 if (VT.isFloatingPoint()) {
4694 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4695 EltVT == MVT::bf16) {
4696 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4697 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4698 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4699 else
4700 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4701 MVT IVT = VT.changeVectorElementType(MVT::i16);
4702 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4703 SDValue Splat =
4704 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4705 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4706 }
4707 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4708 }
4709
4710 // Simplest case is that the operand needs to be promoted to XLenVT.
4711 if (Scalar.getValueType().bitsLE(XLenVT)) {
4712 // If the operand is a constant, sign extend to increase our chances
4713 // of being able to use a .vi instruction. ANY_EXTEND would become a
4714 // a zero extend and the simm5 check in isel would fail.
4715 // FIXME: Should we ignore the upper bits in isel instead?
4716 unsigned ExtOpc =
4717 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4718 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4719 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4720 }
4721
4722 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4723 "Unexpected scalar for splat lowering!");
4724
4725 if (isOneConstant(VL) && isNullConstant(Scalar))
4726 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4727 DAG.getConstant(0, DL, XLenVT), VL);
4728
4729 // Otherwise use the more complicated splatting algorithm.
4730 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4731}
4732
4733// This function lowers an insert of a scalar operand Scalar into lane
4734// 0 of the vector regardless of the value of VL. The contents of the
4735// remaining lanes of the result vector are unspecified. VL is assumed
4736// to be non-zero.
4738 const SDLoc &DL, SelectionDAG &DAG,
4739 const RISCVSubtarget &Subtarget) {
4740 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4741
4742 const MVT XLenVT = Subtarget.getXLenVT();
4743 SDValue Passthru = DAG.getUNDEF(VT);
4744
4745 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4746 isNullConstant(Scalar.getOperand(1))) {
4747 SDValue ExtractedVal = Scalar.getOperand(0);
4748 // The element types must be the same.
4749 if (ExtractedVal.getValueType().getVectorElementType() ==
4750 VT.getVectorElementType()) {
4751 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4752 MVT ExtractedContainerVT = ExtractedVT;
4753 if (ExtractedContainerVT.isFixedLengthVector()) {
4754 ExtractedContainerVT = getContainerForFixedLengthVector(
4755 DAG, ExtractedContainerVT, Subtarget);
4756 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4757 ExtractedVal, DAG, Subtarget);
4758 }
4759 if (ExtractedContainerVT.bitsLE(VT))
4760 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4761 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4762 }
4763 }
4764
4765 if (VT.isFloatingPoint())
4766 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4767 VL);
4768
4769 // Avoid the tricky legalization cases by falling back to using the
4770 // splat code which already handles it gracefully.
4771 if (!Scalar.getValueType().bitsLE(XLenVT))
4772 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4773 DAG.getConstant(1, DL, XLenVT),
4774 VT, DL, DAG, Subtarget);
4775
4776 // If the operand is a constant, sign extend to increase our chances
4777 // of being able to use a .vi instruction. ANY_EXTEND would become a
4778 // a zero extend and the simm5 check in isel would fail.
4779 // FIXME: Should we ignore the upper bits in isel instead?
4780 unsigned ExtOpc =
4781 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4782 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4783 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4784 VL);
4785}
4786
4787/// If concat_vector(V1,V2) could be folded away to some existing
4788/// vector source, return it. Note that the source may be larger
4789/// than the requested concat_vector (i.e. a extract_subvector
4790/// might be required.)
4792 EVT VT = V1.getValueType();
4793 assert(VT == V2.getValueType() && "argument types must match");
4794 // Both input must be extracts.
4795 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4797 return SDValue();
4798
4799 // Extracting from the same source.
4800 SDValue Src = V1.getOperand(0);
4801 if (Src != V2.getOperand(0) ||
4802 VT.isScalableVector() != Src.getValueType().isScalableVector())
4803 return SDValue();
4804
4805 // The extracts must extract the two halves of the source.
4806 if (V1.getConstantOperandVal(1) != 0 ||
4808 return SDValue();
4809
4810 return Src;
4811}
4812
4813// Can this shuffle be performed on exactly one (possibly larger) input?
4815
4816 if (V2.isUndef())
4817 return V1;
4818
4819 unsigned NumElts = VT.getVectorNumElements();
4820 // Src needs to have twice the number of elements.
4821 // TODO: Update shuffle lowering to add the extract subvector
4822 if (SDValue Src = foldConcatVector(V1, V2);
4823 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4824 return Src;
4825
4826 return SDValue();
4827}
4828
4829/// Is this shuffle interleaving contiguous elements from one vector into the
4830/// even elements and contiguous elements from another vector into the odd
4831/// elements. \p EvenSrc will contain the element that should be in the first
4832/// even element. \p OddSrc will contain the element that should be in the first
4833/// odd element. These can be the first element in a source or the element half
4834/// way through the source.
4835static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4836 int &OddSrc, const RISCVSubtarget &Subtarget) {
4837 // We need to be able to widen elements to the next larger integer type or
4838 // use the zip2a instruction at e64.
4839 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4840 !Subtarget.hasVendorXRivosVizip())
4841 return false;
4842
4843 int Size = Mask.size();
4844 int NumElts = VT.getVectorNumElements();
4845 assert(Size == (int)NumElts && "Unexpected mask size");
4846
4847 SmallVector<unsigned, 2> StartIndexes;
4848 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4849 return false;
4850
4851 EvenSrc = StartIndexes[0];
4852 OddSrc = StartIndexes[1];
4853
4854 // One source should be low half of first vector.
4855 if (EvenSrc != 0 && OddSrc != 0)
4856 return false;
4857
4858 // Subvectors will be subtracted from either at the start of the two input
4859 // vectors, or at the start and middle of the first vector if it's an unary
4860 // interleave.
4861 // In both cases, HalfNumElts will be extracted.
4862 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4863 // we'll create an illegal extract_subvector.
4864 // FIXME: We could support other values using a slidedown first.
4865 int HalfNumElts = NumElts / 2;
4866 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4867}
4868
4869/// Is this mask representing a masked combination of two slides?
4871 std::array<std::pair<int, int>, 2> &SrcInfo) {
4872 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4873 return false;
4874
4875 // Avoid matching vselect idioms
4876 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4877 return false;
4878 // Prefer vslideup as the second instruction, and identity
4879 // only as the initial instruction.
4880 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4881 SrcInfo[1].second == 0)
4882 std::swap(SrcInfo[0], SrcInfo[1]);
4883 assert(SrcInfo[0].first != -1 && "Must find one slide");
4884 return true;
4885}
4886
4887// Exactly matches the semantics of a previously existing custom matcher
4888// to allow migration to new matcher without changing output.
4889static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4890 unsigned NumElts) {
4891 if (SrcInfo[1].first == -1)
4892 return true;
4893 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4894 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4895}
4896
4897static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4898 ArrayRef<int> Mask, unsigned Factor,
4899 bool RequiredPolarity) {
4900 int NumElts = Mask.size();
4901 for (const auto &[Idx, M] : enumerate(Mask)) {
4902 if (M < 0)
4903 continue;
4904 int Src = M >= NumElts;
4905 int Diff = (int)Idx - (M % NumElts);
4906 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4907 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4908 "Must match exactly one of the two slides");
4909 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4910 return false;
4911 }
4912 return true;
4913}
4914
4915/// Given a shuffle which can be represented as a pair of two slides,
4916/// see if it is a zipeven idiom. Zipeven is:
4917/// vs2: a0 a1 a2 a3
4918/// vs1: b0 b1 b2 b3
4919/// vd: a0 b0 a2 b2
4920static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4921 ArrayRef<int> Mask, unsigned &Factor) {
4922 Factor = SrcInfo[1].second;
4923 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4924 Mask.size() % Factor == 0 &&
4925 isAlternating(SrcInfo, Mask, Factor, true);
4926}
4927
4928/// Given a shuffle which can be represented as a pair of two slides,
4929/// see if it is a zipodd idiom. Zipodd is:
4930/// vs2: a0 a1 a2 a3
4931/// vs1: b0 b1 b2 b3
4932/// vd: a1 b1 a3 b3
4933/// Note that the operand order is swapped due to the way we canonicalize
4934/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4935static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4936 ArrayRef<int> Mask, unsigned &Factor) {
4937 Factor = -SrcInfo[1].second;
4938 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4939 Mask.size() % Factor == 0 &&
4940 isAlternating(SrcInfo, Mask, Factor, false);
4941}
4942
4943// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4944// 2, 4, 8 and the integer type Factor-times larger than VT's
4945// element type must be a legal element type.
4946// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4947// -> [p, q, r, s] (Factor=2, Index=1)
4949 SDValue Src, unsigned Factor,
4950 unsigned Index, SelectionDAG &DAG) {
4951 unsigned EltBits = VT.getScalarSizeInBits();
4952 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4953 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4954 SrcEC.divideCoefficientBy(Factor));
4955 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4956 SrcEC.divideCoefficientBy(Factor));
4957 Src = DAG.getBitcast(WideSrcVT, Src);
4958
4959 unsigned Shift = Index * EltBits;
4960 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4961 DAG.getConstant(Shift, DL, WideSrcVT));
4962 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4964 Res = DAG.getBitcast(CastVT, Res);
4965 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4966}
4967
4968/// Match a single source shuffle which is an identity except that some
4969/// particular element is repeated. This can be lowered as a masked
4970/// vrgather.vi/vx. Note that the two source form of this is handled
4971/// by the recursive splitting logic and doesn't need special handling.
4973 const RISCVSubtarget &Subtarget,
4974 SelectionDAG &DAG) {
4975
4976 SDLoc DL(SVN);
4977 MVT VT = SVN->getSimpleValueType(0);
4978 SDValue V1 = SVN->getOperand(0);
4979 assert(SVN->getOperand(1).isUndef());
4980 ArrayRef<int> Mask = SVN->getMask();
4981 const unsigned NumElts = VT.getVectorNumElements();
4982 MVT XLenVT = Subtarget.getXLenVT();
4983
4984 std::optional<int> SplatIdx;
4985 for (auto [I, M] : enumerate(Mask)) {
4986 if (M == -1 || I == (unsigned)M)
4987 continue;
4988 if (SplatIdx && *SplatIdx != M)
4989 return SDValue();
4990 SplatIdx = M;
4991 }
4992
4993 if (!SplatIdx)
4994 return SDValue();
4995
4996 SmallVector<SDValue> MaskVals;
4997 for (int MaskIndex : Mask) {
4998 bool SelectMaskVal = MaskIndex == *SplatIdx;
4999 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5000 }
5001 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5002 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5003 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5004 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5005 SmallVector<int>(NumElts, *SplatIdx));
5006 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5007}
5008
5009// Lower the following shuffle to vslidedown.
5010// a)
5011// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5012// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5013// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5014// b)
5015// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5016// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5017// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5018// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5019// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5020// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5022 SDValue V1, SDValue V2,
5023 ArrayRef<int> Mask,
5024 const RISCVSubtarget &Subtarget,
5025 SelectionDAG &DAG) {
5026 auto findNonEXTRACT_SUBVECTORParent =
5027 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5028 uint64_t Offset = 0;
5029 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5030 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5031 // a scalable vector. But we don't want to match the case.
5032 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5033 Offset += Parent.getConstantOperandVal(1);
5034 Parent = Parent.getOperand(0);
5035 }
5036 return std::make_pair(Parent, Offset);
5037 };
5038
5039 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5040 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5041
5042 // Extracting from the same source.
5043 SDValue Src = V1Src;
5044 if (Src != V2Src)
5045 return SDValue();
5046
5047 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5048 SmallVector<int, 16> NewMask(Mask);
5049 for (size_t i = 0; i != NewMask.size(); ++i) {
5050 if (NewMask[i] == -1)
5051 continue;
5052
5053 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5054 NewMask[i] = NewMask[i] + V1IndexOffset;
5055 } else {
5056 // Minus NewMask.size() is needed. Otherwise, the b case would be
5057 // <5,6,7,12> instead of <5,6,7,8>.
5058 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5059 }
5060 }
5061
5062 // First index must be known and non-zero. It will be used as the slidedown
5063 // amount.
5064 if (NewMask[0] <= 0)
5065 return SDValue();
5066
5067 // NewMask is also continuous.
5068 for (unsigned i = 1; i != NewMask.size(); ++i)
5069 if (NewMask[i - 1] + 1 != NewMask[i])
5070 return SDValue();
5071
5072 MVT XLenVT = Subtarget.getXLenVT();
5073 MVT SrcVT = Src.getSimpleValueType();
5074 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5075 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5076 SDValue Slidedown =
5077 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5078 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5079 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5080 return DAG.getExtractSubvector(
5081 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5082}
5083
5084// Because vslideup leaves the destination elements at the start intact, we can
5085// use it to perform shuffles that insert subvectors:
5086//
5087// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5088// ->
5089// vsetvli zero, 8, e8, mf2, ta, ma
5090// vslideup.vi v8, v9, 4
5091//
5092// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5093// ->
5094// vsetvli zero, 5, e8, mf2, tu, ma
5095// vslideup.v1 v8, v9, 2
5097 SDValue V1, SDValue V2,
5098 ArrayRef<int> Mask,
5099 const RISCVSubtarget &Subtarget,
5100 SelectionDAG &DAG) {
5101 unsigned NumElts = VT.getVectorNumElements();
5102 int NumSubElts, Index;
5103 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5104 Index))
5105 return SDValue();
5106
5107 bool OpsSwapped = Mask[Index] < (int)NumElts;
5108 SDValue InPlace = OpsSwapped ? V2 : V1;
5109 SDValue ToInsert = OpsSwapped ? V1 : V2;
5110
5111 MVT XLenVT = Subtarget.getXLenVT();
5112 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5113 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5114 // We slide up by the index that the subvector is being inserted at, and set
5115 // VL to the index + the number of elements being inserted.
5116 unsigned Policy =
5118 // If the we're adding a suffix to the in place vector, i.e. inserting right
5119 // up to the very end of it, then we don't actually care about the tail.
5120 if (NumSubElts + Index >= (int)NumElts)
5121 Policy |= RISCVVType::TAIL_AGNOSTIC;
5122
5123 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5124 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5125 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5126
5127 SDValue Res;
5128 // If we're inserting into the lowest elements, use a tail undisturbed
5129 // vmv.v.v.
5130 if (Index == 0)
5131 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5132 VL);
5133 else
5134 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5135 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5136 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5137}
5138
5139/// Match v(f)slide1up/down idioms. These operations involve sliding
5140/// N-1 elements to make room for an inserted scalar at one end.
5142 SDValue V1, SDValue V2,
5143 ArrayRef<int> Mask,
5144 const RISCVSubtarget &Subtarget,
5145 SelectionDAG &DAG) {
5146 bool OpsSwapped = false;
5147 if (!isa<BuildVectorSDNode>(V1)) {
5148 if (!isa<BuildVectorSDNode>(V2))
5149 return SDValue();
5150 std::swap(V1, V2);
5151 OpsSwapped = true;
5152 }
5153 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5154 if (!Splat)
5155 return SDValue();
5156
5157 // Return true if the mask could describe a slide of Mask.size() - 1
5158 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5159 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5160 const unsigned S = (Offset > 0) ? 0 : -Offset;
5161 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5162 for (unsigned i = S; i != E; ++i)
5163 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5164 return false;
5165 return true;
5166 };
5167
5168 const unsigned NumElts = VT.getVectorNumElements();
5169 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5170 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5171 return SDValue();
5172
5173 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5174 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5175 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5176 return SDValue();
5177
5178 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5179 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5180
5181 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5182 // vslide1{down,up}.vx instead.
5183 if (VT.getVectorElementType() == MVT::bf16 ||
5184 (VT.getVectorElementType() == MVT::f16 &&
5185 !Subtarget.hasVInstructionsF16())) {
5186 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5187 Splat =
5188 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5189 V2 = DAG.getBitcast(
5190 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5191 SDValue Vec = DAG.getNode(
5192 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5193 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5194 Vec = DAG.getBitcast(ContainerVT, Vec);
5195 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5196 }
5197
5198 auto OpCode = IsVSlidedown ?
5199 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5200 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5201 if (!VT.isFloatingPoint())
5202 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5203 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5204 DAG.getUNDEF(ContainerVT),
5205 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5206 Splat, TrueMask, VL);
5207 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5208}
5209
5210/// Match a mask which "spreads" the leading elements of a vector evenly
5211/// across the result. Factor is the spread amount, and Index is the
5212/// offset applied. (on success, Index < Factor) This is the inverse
5213/// of a deinterleave with the same Factor and Index. This is analogous
5214/// to an interleave, except that all but one lane is undef.
5216 unsigned &Index) {
5217 SmallVector<bool> LaneIsUndef(Factor, true);
5218 for (unsigned i = 0; i < Mask.size(); i++)
5219 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5220
5221 bool Found = false;
5222 for (unsigned i = 0; i < Factor; i++) {
5223 if (LaneIsUndef[i])
5224 continue;
5225 if (Found)
5226 return false;
5227 Index = i;
5228 Found = true;
5229 }
5230 if (!Found)
5231 return false;
5232
5233 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5234 unsigned j = i * Factor + Index;
5235 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5236 return false;
5237 }
5238 return true;
5239}
5240
5241static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5242 const SDLoc &DL, SelectionDAG &DAG,
5243 const RISCVSubtarget &Subtarget) {
5244 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5245 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5246 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5248
5249 MVT VT = Op0.getSimpleValueType();
5251 Op0 = DAG.getBitcast(IntVT, Op0);
5252 Op1 = DAG.getBitcast(IntVT, Op1);
5253
5254 MVT ContainerVT = IntVT;
5255 if (VT.isFixedLengthVector()) {
5256 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5257 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5258 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5259 }
5260
5261 MVT InnerVT = ContainerVT;
5262 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5263 if (Op1.isUndef() &&
5264 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5265 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5266 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5267 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5268 Subtarget.getXLenVT());
5269 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5270 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5271 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5272 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5273 }
5274
5275 SDValue Passthru = DAG.getUNDEF(InnerVT);
5276 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5277 if (InnerVT.bitsLT(ContainerVT))
5278 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5279 if (IntVT.isFixedLengthVector())
5280 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5281 Res = DAG.getBitcast(VT, Res);
5282 return Res;
5283}
5284
5285// Given a vector a, b, c, d return a vector Factor times longer
5286// with Factor-1 undef's between elements. Ex:
5287// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5288// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5289static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5290 const SDLoc &DL, SelectionDAG &DAG) {
5291
5292 MVT VT = V.getSimpleValueType();
5293 unsigned EltBits = VT.getScalarSizeInBits();
5295 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5296
5297 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5298
5299 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5300 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5301 // allow the SHL to fold away if Index is 0.
5302 if (Index != 0)
5303 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5304 DAG.getConstant(EltBits * Index, DL, WideVT));
5305 // Make sure to use original element type
5307 EC.multiplyCoefficientBy(Factor));
5308 return DAG.getBitcast(ResultVT, Result);
5309}
5310
5311// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5312// to create an interleaved vector of <[vscale x] n*2 x ty>.
5313// This requires that the size of ty is less than the subtarget's maximum ELEN.
5315 const SDLoc &DL, SelectionDAG &DAG,
5316 const RISCVSubtarget &Subtarget) {
5317
5318 // FIXME: Not only does this optimize the code, it fixes some correctness
5319 // issues because MIR does not have freeze.
5320 if (EvenV.isUndef())
5321 return getWideningSpread(OddV, 2, 1, DL, DAG);
5322 if (OddV.isUndef())
5323 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5324
5325 MVT VecVT = EvenV.getSimpleValueType();
5326 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5327 // Convert fixed vectors to scalable if needed
5328 if (VecContainerVT.isFixedLengthVector()) {
5329 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5330 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5331 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5332 }
5333
5334 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5335
5336 // We're working with a vector of the same size as the resulting
5337 // interleaved vector, but with half the number of elements and
5338 // twice the SEW (Hence the restriction on not using the maximum
5339 // ELEN)
5340 MVT WideVT =
5342 VecVT.getVectorElementCount());
5343 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5344 if (WideContainerVT.isFixedLengthVector())
5345 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5346
5347 // Bitcast the input vectors to integers in case they are FP
5348 VecContainerVT = VecContainerVT.changeTypeToInteger();
5349 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5350 OddV = DAG.getBitcast(VecContainerVT, OddV);
5351
5352 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5353 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5354
5355 SDValue Interleaved;
5356 if (Subtarget.hasStdExtZvbb()) {
5357 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5358 SDValue OffsetVec =
5359 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5360 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5361 OffsetVec, Passthru, Mask, VL);
5362 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5363 Interleaved, EvenV, Passthru, Mask, VL);
5364 } else {
5365 // FIXME: We should freeze the odd vector here. We already handled the case
5366 // of provably undef/poison above.
5367
5368 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5369 // vwaddu.vv
5370 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5371 OddV, Passthru, Mask, VL);
5372
5373 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5374 SDValue AllOnesVec = DAG.getSplatVector(
5375 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5376 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5377 OddV, AllOnesVec, Passthru, Mask, VL);
5378
5379 // Add the two together so we get
5380 // (OddV * 0xff...ff) + (OddV + EvenV)
5381 // = (OddV * 0x100...00) + EvenV
5382 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5383 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5384 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5385 Interleaved, OddsMul, Passthru, Mask, VL);
5386 }
5387
5388 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5389 MVT ResultContainerVT = MVT::getVectorVT(
5390 VecVT.getVectorElementType(), // Make sure to use original type
5391 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5392 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5393
5394 // Convert back to a fixed vector if needed
5395 MVT ResultVT =
5398 if (ResultVT.isFixedLengthVector())
5399 Interleaved =
5400 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5401
5402 return Interleaved;
5403}
5404
5405// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5406// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5408 SelectionDAG &DAG,
5409 const RISCVSubtarget &Subtarget) {
5410 SDLoc DL(SVN);
5411 MVT VT = SVN->getSimpleValueType(0);
5412 SDValue V = SVN->getOperand(0);
5413 unsigned NumElts = VT.getVectorNumElements();
5414
5415 assert(VT.getVectorElementType() == MVT::i1);
5416
5418 SVN->getMask().size()) ||
5419 !SVN->getOperand(1).isUndef())
5420 return SDValue();
5421
5422 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5423 EVT ViaVT = EVT::getVectorVT(
5424 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5425 EVT ViaBitVT =
5426 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5427
5428 // If we don't have zvbb or the larger element type > ELEN, the operation will
5429 // be illegal.
5431 ViaVT) ||
5432 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5433 return SDValue();
5434
5435 // If the bit vector doesn't fit exactly into the larger element type, we need
5436 // to insert it into the larger vector and then shift up the reversed bits
5437 // afterwards to get rid of the gap introduced.
5438 if (ViaEltSize > NumElts)
5439 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5440
5441 SDValue Res =
5442 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5443
5444 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5445 // element type.
5446 if (ViaEltSize > NumElts)
5447 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5448 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5449
5450 Res = DAG.getBitcast(ViaBitVT, Res);
5451
5452 if (ViaEltSize > NumElts)
5453 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5454 return Res;
5455}
5456
5458 const RISCVSubtarget &Subtarget,
5459 MVT &RotateVT, unsigned &RotateAmt) {
5460 unsigned NumElts = VT.getVectorNumElements();
5461 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5462 unsigned NumSubElts;
5463 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5464 NumElts, NumSubElts, RotateAmt))
5465 return false;
5466 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5467 NumElts / NumSubElts);
5468
5469 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5470 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5471}
5472
5473// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5474// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5475// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5477 SelectionDAG &DAG,
5478 const RISCVSubtarget &Subtarget) {
5479 SDLoc DL(SVN);
5480
5481 EVT VT = SVN->getValueType(0);
5482 unsigned RotateAmt;
5483 MVT RotateVT;
5484 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5485 return SDValue();
5486
5487 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5488
5489 SDValue Rotate;
5490 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5491 // so canonicalize to vrev8.
5492 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5493 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5494 else
5495 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5496 DAG.getConstant(RotateAmt, DL, RotateVT));
5497
5498 return DAG.getBitcast(VT, Rotate);
5499}
5500
5501// If compiling with an exactly known VLEN, see if we can split a
5502// shuffle on m2 or larger into a small number of m1 sized shuffles
5503// which write each destination registers exactly once.
5505 SelectionDAG &DAG,
5506 const RISCVSubtarget &Subtarget) {
5507 SDLoc DL(SVN);
5508 MVT VT = SVN->getSimpleValueType(0);
5509 SDValue V1 = SVN->getOperand(0);
5510 SDValue V2 = SVN->getOperand(1);
5511 ArrayRef<int> Mask = SVN->getMask();
5512
5513 // If we don't know exact data layout, not much we can do. If this
5514 // is already m1 or smaller, no point in splitting further.
5515 const auto VLen = Subtarget.getRealVLen();
5516 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5517 return SDValue();
5518
5519 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5520 // expansion for.
5521 unsigned RotateAmt;
5522 MVT RotateVT;
5523 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5524 return SDValue();
5525
5526 MVT ElemVT = VT.getVectorElementType();
5527 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5528
5529 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5530 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5531 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5532 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5533 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5534 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5535 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5536 unsigned NumOfDestRegs = NumElts / NumOpElts;
5537 // The following semantically builds up a fixed length concat_vector
5538 // of the component shuffle_vectors. We eagerly lower to scalable here
5539 // to avoid DAG combining it back to a large shuffle_vector again.
5540 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5541 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5543 Operands;
5545 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5546 [&]() { Operands.emplace_back(); },
5547 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5548 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5549 SmallVector<int>(SrcSubMask));
5550 },
5551 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5552 if (NewReg)
5553 Operands.emplace_back();
5554 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5555 });
5556 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5557 // Note: check that we do not emit too many shuffles here to prevent code
5558 // size explosion.
5559 // TODO: investigate, if it can be improved by extra analysis of the masks to
5560 // check if the code is more profitable.
5561 unsigned NumShuffles = std::accumulate(
5562 Operands.begin(), Operands.end(), 0u,
5563 [&](unsigned N,
5564 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5565 if (Data.empty())
5566 return N;
5567 N += Data.size();
5568 for (const auto &P : Data) {
5569 unsigned Idx2 = std::get<1>(P);
5570 ArrayRef<int> Mask = std::get<2>(P);
5571 if (Idx2 != UINT_MAX)
5572 ++N;
5573 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5574 --N;
5575 }
5576 return N;
5577 });
5578 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5579 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5580 return SDValue();
5581 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5582 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5583 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5584 return SubVec;
5585 };
5586 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5588 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5589 return SubVec;
5590 };
5591 SDValue Vec = DAG.getUNDEF(ContainerVT);
5592 for (auto [I, Data] : enumerate(Operands)) {
5593 if (Data.empty())
5594 continue;
5596 for (unsigned I : seq<unsigned>(Data.size())) {
5597 const auto &[Idx1, Idx2, _] = Data[I];
5598 // If the shuffle contains permutation of odd number of elements,
5599 // Idx1 might be used already in the first iteration.
5600 //
5601 // Idx1 = shuffle Idx1, Idx2
5602 // Idx1 = shuffle Idx1, Idx3
5603 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5604 if (!V)
5605 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5606 (Idx1 % NumOfSrcRegs) * NumOpElts);
5607 if (Idx2 != UINT_MAX) {
5608 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5609 if (!V)
5610 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5611 (Idx2 % NumOfSrcRegs) * NumOpElts);
5612 }
5613 }
5614 SDValue V;
5615 for (const auto &[Idx1, Idx2, Mask] : Data) {
5616 SDValue V1 = Values.at(Idx1);
5617 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5618 V = PerformShuffle(V1, V2, Mask);
5619 Values[Idx1] = V;
5620 }
5621
5622 unsigned InsertIdx = I * NumOpElts;
5623 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5624 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5625 }
5626 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5627}
5628
5629// Matches a subset of compress masks with a contiguous prefix of output
5630// elements. This could be extended to allow gaps by deciding which
5631// source elements to spuriously demand.
5633 int Last = -1;
5634 bool SawUndef = false;
5635 for (const auto &[Idx, M] : enumerate(Mask)) {
5636 if (M == -1) {
5637 SawUndef = true;
5638 continue;
5639 }
5640 if (SawUndef)
5641 return false;
5642 if (Idx > (unsigned)M)
5643 return false;
5644 if (M <= Last)
5645 return false;
5646 Last = M;
5647 }
5648 return true;
5649}
5650
5651/// Given a shuffle where the indices are disjoint between the two sources,
5652/// e.g.:
5653///
5654/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5655///
5656/// Merge the two sources into one and do a single source shuffle:
5657///
5658/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5659/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5660///
5661/// A vselect will either be merged into a masked instruction or be lowered as a
5662/// vmerge.vvm, which is cheaper than a vrgather.vv.
5664 SelectionDAG &DAG,
5665 const RISCVSubtarget &Subtarget) {
5666 MVT VT = SVN->getSimpleValueType(0);
5667 MVT XLenVT = Subtarget.getXLenVT();
5668 SDLoc DL(SVN);
5669
5670 const ArrayRef<int> Mask = SVN->getMask();
5671
5672 // Work out which source each lane will come from.
5673 SmallVector<int, 16> Srcs(Mask.size(), -1);
5674
5675 for (int Idx : Mask) {
5676 if (Idx == -1)
5677 continue;
5678 unsigned SrcIdx = Idx % Mask.size();
5679 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5680 if (Srcs[SrcIdx] == -1)
5681 // Mark this source as using this lane.
5682 Srcs[SrcIdx] = Src;
5683 else if (Srcs[SrcIdx] != Src)
5684 // The other source is using this lane: not disjoint.
5685 return SDValue();
5686 }
5687
5688 SmallVector<SDValue> SelectMaskVals;
5689 for (int Lane : Srcs) {
5690 if (Lane == -1)
5691 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5692 else
5693 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5694 }
5695 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5696 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5697 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5698 SVN->getOperand(0), SVN->getOperand(1));
5699
5700 // Move all indices relative to the first source.
5701 SmallVector<int> NewMask(Mask.size());
5702 for (unsigned I = 0; I < Mask.size(); I++) {
5703 if (Mask[I] == -1)
5704 NewMask[I] = -1;
5705 else
5706 NewMask[I] = Mask[I] % Mask.size();
5707 }
5708
5709 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5710}
5711
5712/// Is this mask local (i.e. elements only move within their local span), and
5713/// repeating (that is, the same rearrangement is being done within each span)?
5714static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5715 // Require a prefix from the original mask until the consumer code
5716 // is adjusted to rewrite the mask instead of just taking a prefix.
5717 for (auto [I, M] : enumerate(Mask)) {
5718 if (M == -1)
5719 continue;
5720 if ((M / Span) != (int)(I / Span))
5721 return false;
5722 int SpanIdx = I % Span;
5723 int Expected = M % Span;
5724 if (Mask[SpanIdx] != Expected)
5725 return false;
5726 }
5727 return true;
5728}
5729
5730/// Is this mask only using elements from the first span of the input?
5731static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5732 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5733}
5734
5735/// Return true for a mask which performs an arbitrary shuffle within the first
5736/// span, and then repeats that same result across all remaining spans. Note
5737/// that this doesn't check if all the inputs come from a single span!
5738static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5739 // Require a prefix from the original mask until the consumer code
5740 // is adjusted to rewrite the mask instead of just taking a prefix.
5741 for (auto [I, M] : enumerate(Mask)) {
5742 if (M == -1)
5743 continue;
5744 int SpanIdx = I % Span;
5745 if (Mask[SpanIdx] != M)
5746 return false;
5747 }
5748 return true;
5749}
5750
5751/// Try to widen element type to get a new mask value for a better permutation
5752/// sequence. This doesn't try to inspect the widened mask for profitability;
5753/// we speculate the widened form is equal or better. This has the effect of
5754/// reducing mask constant sizes - allowing cheaper materialization sequences
5755/// - and index sequence sizes - reducing register pressure and materialization
5756/// cost, at the cost of (possibly) an extra VTYPE toggle.
5758 SDLoc DL(Op);
5759 MVT VT = Op.getSimpleValueType();
5760 MVT ScalarVT = VT.getVectorElementType();
5761 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5762 SDValue V0 = Op.getOperand(0);
5763 SDValue V1 = Op.getOperand(1);
5764 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5765
5766 // Avoid wasted work leading to isTypeLegal check failing below
5767 if (ElementSize > 32)
5768 return SDValue();
5769
5770 SmallVector<int, 8> NewMask;
5771 if (!widenShuffleMaskElts(Mask, NewMask))
5772 return SDValue();
5773
5774 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5775 : MVT::getIntegerVT(ElementSize * 2);
5776 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5777 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5778 return SDValue();
5779 V0 = DAG.getBitcast(NewVT, V0);
5780 V1 = DAG.getBitcast(NewVT, V1);
5781 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5782}
5783
5785 const RISCVSubtarget &Subtarget) {
5786 SDValue V1 = Op.getOperand(0);
5787 SDValue V2 = Op.getOperand(1);
5788 SDLoc DL(Op);
5789 MVT XLenVT = Subtarget.getXLenVT();
5790 MVT VT = Op.getSimpleValueType();
5791 unsigned NumElts = VT.getVectorNumElements();
5792 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5793
5794 if (VT.getVectorElementType() == MVT::i1) {
5795 // Lower to a vror.vi of a larger element type if possible before we promote
5796 // i1s to i8s.
5797 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5798 return V;
5799 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5800 return V;
5801
5802 // Promote i1 shuffle to i8 shuffle.
5803 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5804 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5805 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5806 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5807 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5808 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5809 ISD::SETNE);
5810 }
5811
5812 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5813
5814 // Store the return value in a single variable instead of structured bindings
5815 // so that we can pass it to GetSlide below, which cannot capture structured
5816 // bindings until C++20.
5817 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5818 auto [TrueMask, VL] = TrueMaskVL;
5819
5820 if (SVN->isSplat()) {
5821 const int Lane = SVN->getSplatIndex();
5822 if (Lane >= 0) {
5823 MVT SVT = VT.getVectorElementType();
5824
5825 // Turn splatted vector load into a strided load with an X0 stride.
5826 SDValue V = V1;
5827 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5828 // with undef.
5829 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5830 int Offset = Lane;
5831 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5832 int OpElements =
5833 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5834 V = V.getOperand(Offset / OpElements);
5835 Offset %= OpElements;
5836 }
5837
5838 // We need to ensure the load isn't atomic or volatile.
5839 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5840 auto *Ld = cast<LoadSDNode>(V);
5841 Offset *= SVT.getStoreSize();
5842 SDValue NewAddr = DAG.getMemBasePlusOffset(
5843 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5844
5845 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5846 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5847 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5848 SDValue IntID =
5849 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5850 SDValue Ops[] = {Ld->getChain(),
5851 IntID,
5852 DAG.getUNDEF(ContainerVT),
5853 NewAddr,
5854 DAG.getRegister(RISCV::X0, XLenVT),
5855 VL};
5856 SDValue NewLoad = DAG.getMemIntrinsicNode(
5857 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5859 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5860 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5861 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5862 }
5863
5864 MVT SplatVT = ContainerVT;
5865
5866 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5867 if (SVT == MVT::bf16 ||
5868 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5869 SVT = MVT::i16;
5870 SplatVT = ContainerVT.changeVectorElementType(SVT);
5871 }
5872
5873 // Otherwise use a scalar load and splat. This will give the best
5874 // opportunity to fold a splat into the operation. ISel can turn it into
5875 // the x0 strided load if we aren't able to fold away the select.
5876 if (SVT.isFloatingPoint())
5877 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5878 Ld->getPointerInfo().getWithOffset(Offset),
5879 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5880 else
5881 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5882 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5883 Ld->getBaseAlign(),
5884 Ld->getMemOperand()->getFlags());
5886
5887 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5888 : RISCVISD::VMV_V_X_VL;
5889 SDValue Splat =
5890 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5891 Splat = DAG.getBitcast(ContainerVT, Splat);
5892 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5893 }
5894
5895 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5896 assert(Lane < (int)NumElts && "Unexpected lane!");
5897 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5898 V1, DAG.getConstant(Lane, DL, XLenVT),
5899 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5900 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5901 }
5902 }
5903
5904 // For exact VLEN m2 or greater, try to split to m1 operations if we
5905 // can split cleanly.
5906 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5907 return V;
5908
5909 ArrayRef<int> Mask = SVN->getMask();
5910
5911 if (SDValue V =
5912 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5913 return V;
5914
5915 if (SDValue V =
5916 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5917 return V;
5918
5919 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5920 // available.
5921 if (Subtarget.hasStdExtZvkb())
5922 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5923 return V;
5924
5925 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5926 NumElts != 2)
5927 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5928
5929 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5930 // use shift and truncate to perform the shuffle.
5931 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5932 // shift-and-trunc reducing total cost for everything except an mf8 result.
5933 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5934 // to do the entire operation.
5935 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5936 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5937 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5938 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5939 unsigned Index = 0;
5940 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5941 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5942 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5943 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5944 if (1 < count_if(Mask,
5945 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5946 1 < count_if(Mask, [&Mask](int Idx) {
5947 return Idx >= (int)Mask.size();
5948 })) {
5949 // Narrow each source and concatenate them.
5950 // FIXME: For small LMUL it is better to concatenate first.
5951 MVT EltVT = VT.getVectorElementType();
5952 auto EltCnt = VT.getVectorElementCount();
5953 MVT SubVT =
5954 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5955
5956 SDValue Lo =
5957 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5958 SDValue Hi =
5959 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5960
5961 SDValue Concat =
5964 if (Factor == 2)
5965 return Concat;
5966
5967 SDValue Vec = DAG.getUNDEF(VT);
5968 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5969 }
5970 }
5971 }
5972 }
5973
5974 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5975 // e64 which can't match above.
5976 unsigned Index = 0;
5977 if (Subtarget.hasVendorXRivosVizip() &&
5979 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5980 unsigned Opc =
5981 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5982 if (V2.isUndef())
5983 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5984 if (auto VLEN = Subtarget.getRealVLen();
5985 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5986 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5987 if (SDValue Src = foldConcatVector(V1, V2)) {
5988 EVT NewVT = VT.getDoubleNumVectorElementsVT();
5989 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
5990 SDValue Res =
5991 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
5992 return DAG.getExtractSubvector(DL, VT, Res, 0);
5993 }
5994 // Deinterleave each source and concatenate them, or concat first, then
5995 // deinterleave.
5996 if (1 < count_if(Mask,
5997 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5998 1 < count_if(Mask,
5999 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6000
6001 const unsigned EltSize = VT.getScalarSizeInBits();
6002 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6003 if (NumElts < MinVLMAX) {
6004 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6005 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6006 SDValue Res =
6007 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6008 return DAG.getExtractSubvector(DL, VT, Res, 0);
6009 }
6010
6011 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6012 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6013
6014 MVT SubVT = VT.getHalfNumVectorElementsVT();
6015 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6016 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6017 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6018 }
6019 }
6020
6021 if (SDValue V =
6022 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6023 return V;
6024
6025 // Detect an interleave shuffle and lower to
6026 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6027 int EvenSrc, OddSrc;
6028 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6029 !(NumElts == 2 &&
6030 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6031 // Extract the halves of the vectors.
6032 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6033
6034 // Recognize if one half is actually undef; the matching above will
6035 // otherwise reuse the even stream for the undef one. This improves
6036 // spread(2) shuffles.
6037 bool LaneIsUndef[2] = { true, true};
6038 for (const auto &[Idx, M] : enumerate(Mask))
6039 LaneIsUndef[Idx % 2] &= (M == -1);
6040
6041 int Size = Mask.size();
6042 SDValue EvenV, OddV;
6043 if (LaneIsUndef[0]) {
6044 EvenV = DAG.getUNDEF(HalfVT);
6045 } else {
6046 assert(EvenSrc >= 0 && "Undef source?");
6047 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6048 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6049 }
6050
6051 if (LaneIsUndef[1]) {
6052 OddV = DAG.getUNDEF(HalfVT);
6053 } else {
6054 assert(OddSrc >= 0 && "Undef source?");
6055 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6056 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6057 }
6058
6059 // Prefer vzip2a if available.
6060 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6061 if (Subtarget.hasVendorXRivosVizip()) {
6062 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6063 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6064 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6065 }
6066 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6067 }
6068
6069 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6070 // instructions (in any combination) with masking on the second instruction.
6071 // Also handles masked slides into an identity source, and single slides
6072 // without masking. Avoid matching bit rotates (which are not also element
6073 // rotates) as slide pairs. This is a performance heuristic, not a
6074 // functional check.
6075 std::array<std::pair<int, int>, 2> SrcInfo;
6076 unsigned RotateAmt;
6077 MVT RotateVT;
6078 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6079 (isElementRotate(SrcInfo, NumElts) ||
6080 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6081 SDValue Sources[2];
6082 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6083 int SrcIdx = Info.first;
6084 assert(SrcIdx == 0 || SrcIdx == 1);
6085 SDValue &Src = Sources[SrcIdx];
6086 if (!Src) {
6087 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6088 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6089 }
6090 return Src;
6091 };
6092 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6093 SDValue Passthru) {
6094 auto [TrueMask, VL] = TrueMaskVL;
6095 SDValue SrcV = GetSourceFor(Src);
6096 int SlideAmt = Src.second;
6097 if (SlideAmt == 0) {
6098 // Should never be second operation
6099 assert(Mask == TrueMask);
6100 return SrcV;
6101 }
6102 if (SlideAmt < 0)
6103 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6104 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6106 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6107 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6109 };
6110
6111 if (SrcInfo[1].first == -1) {
6112 SDValue Res = DAG.getUNDEF(ContainerVT);
6113 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6114 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6115 }
6116
6117 if (Subtarget.hasVendorXRivosVizip()) {
6118 bool TryWiden = false;
6119 unsigned Factor;
6120 if (isZipEven(SrcInfo, Mask, Factor)) {
6121 if (Factor == 1) {
6122 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6123 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6124 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6125 Subtarget);
6126 }
6127 TryWiden = true;
6128 }
6129 if (isZipOdd(SrcInfo, Mask, Factor)) {
6130 if (Factor == 1) {
6131 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6132 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6133 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6134 Subtarget);
6135 }
6136 TryWiden = true;
6137 }
6138 // If we found a widening oppurtunity which would let us form a
6139 // zipeven or zipodd, use the generic code to widen the shuffle
6140 // and recurse through this logic.
6141 if (TryWiden)
6142 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6143 return V;
6144 }
6145
6146 // Build the mask. Note that vslideup unconditionally preserves elements
6147 // below the slide amount in the destination, and thus those elements are
6148 // undefined in the mask. If the mask ends up all true (or undef), it
6149 // will be folded away by general logic.
6150 SmallVector<SDValue> MaskVals;
6151 for (const auto &[Idx, M] : enumerate(Mask)) {
6152 if (M < 0 ||
6153 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6154 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6155 continue;
6156 }
6157 int Src = M >= (int)NumElts;
6158 int Diff = (int)Idx - (M % NumElts);
6159 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6160 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6161 "Must match exactly one of the two slides");
6162 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6163 }
6164 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6165 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6166 SDValue SelectMask = convertToScalableVector(
6167 ContainerVT.changeVectorElementType(MVT::i1),
6168 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6169
6170 SDValue Res = DAG.getUNDEF(ContainerVT);
6171 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6172 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6173 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6174 }
6175
6176 // Handle any remaining single source shuffles
6177 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6178 if (V2.isUndef()) {
6179 // We might be able to express the shuffle as a bitrotate. But even if we
6180 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6181 // shifts and a vor will have a higher throughput than a vrgather.
6182 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6183 return V;
6184
6185 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6186 return V;
6187
6188 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6189 // is fully covered in interleave(2) above, so it is ignored here.
6190 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6191 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6192 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6193 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6194 unsigned Index;
6195 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6196 MVT NarrowVT =
6197 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6198 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6199 return getWideningSpread(Src, Factor, Index, DL, DAG);
6200 }
6201 }
6202 }
6203
6204 // If only a prefix of the source elements influence a prefix of the
6205 // destination elements, try to see if we can reduce the required LMUL
6206 unsigned MinVLen = Subtarget.getRealMinVLen();
6207 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6208 if (NumElts > MinVLMAX) {
6209 unsigned MaxIdx = 0;
6210 for (auto [I, M] : enumerate(Mask)) {
6211 if (M == -1)
6212 continue;
6213 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6214 }
6215 unsigned NewNumElts =
6216 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6217 if (NewNumElts != NumElts) {
6218 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6219 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6220 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6221 Mask.take_front(NewNumElts));
6222 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6223 }
6224 }
6225
6226 // Before hitting generic lowering fallbacks, try to widen the mask
6227 // to a wider SEW.
6228 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6229 return V;
6230
6231 // Can we generate a vcompress instead of a vrgather? These scale better
6232 // at high LMUL, at the cost of not being able to fold a following select
6233 // into them. The mask constants are also smaller than the index vector
6234 // constants, and thus easier to materialize.
6235 if (isCompressMask(Mask)) {
6236 SmallVector<SDValue> MaskVals(NumElts,
6237 DAG.getConstant(false, DL, XLenVT));
6238 for (auto Idx : Mask) {
6239 if (Idx == -1)
6240 break;
6241 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6242 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6243 }
6244 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6245 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6246 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6247 DAG.getUNDEF(VT));
6248 }
6249
6250 if (VT.getScalarSizeInBits() == 8 &&
6251 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6252 // On such a vector we're unable to use i8 as the index type.
6253 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6254 // may involve vector splitting if we're already at LMUL=8, or our
6255 // user-supplied maximum fixed-length LMUL.
6256 return SDValue();
6257 }
6258
6259 // Base case for the two operand recursion below - handle the worst case
6260 // single source shuffle.
6261 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6262 MVT IndexVT = VT.changeTypeToInteger();
6263 // Since we can't introduce illegal index types at this stage, use i16 and
6264 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6265 // than XLenVT.
6266 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6267 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6268 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6269 }
6270
6271 // If the mask allows, we can do all the index computation in 16 bits. This
6272 // requires less work and less register pressure at high LMUL, and creates
6273 // smaller constants which may be cheaper to materialize.
6274 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6275 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6276 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6277 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6278 }
6279
6280 MVT IndexContainerVT =
6281 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6282
6283 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6284 SmallVector<SDValue> GatherIndicesLHS;
6285 for (int MaskIndex : Mask) {
6286 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6287 GatherIndicesLHS.push_back(IsLHSIndex
6288 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6289 : DAG.getUNDEF(XLenVT));
6290 }
6291 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6292 LHSIndices =
6293 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6294 // At m1 and less, there's no point trying any of the high LMUL splitting
6295 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6296 if (NumElts <= MinVLMAX) {
6297 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6298 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6299 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6300 }
6301
6302 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6303 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6304 auto [InnerTrueMask, InnerVL] =
6305 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6306 int N =
6307 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6308 assert(isPowerOf2_32(N) && N <= 8);
6309
6310 // If we have a locally repeating mask, then we can reuse the first
6311 // register in the index register group for all registers within the
6312 // source register group. TODO: This generalizes to m2, and m4.
6313 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6314 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6315 SDValue Gather = DAG.getUNDEF(ContainerVT);
6316 for (int i = 0; i < N; i++) {
6317 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6318 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6319 SDValue SubVec =
6320 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6321 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6322 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6323 }
6324 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6325 }
6326
6327 // If we have a shuffle which only uses the first register in our source
6328 // register group, and repeats the same index across all spans, we can
6329 // use a single vrgather (and possibly some register moves).
6330 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6331 // which we can do a linear number of shuffles to form an m1 which
6332 // contains all the output elements.
6333 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6334 isSpanSplatShuffle(Mask, MinVLMAX)) {
6335 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6336 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6337 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6338 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6339 SDValue Gather = DAG.getUNDEF(ContainerVT);
6340 for (int i = 0; i < N; i++)
6341 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6342 M1VT.getVectorMinNumElements() * i);
6343 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6344 }
6345
6346 // If we have a shuffle which only uses the first register in our
6347 // source register group, we can do a linear number of m1 vrgathers
6348 // reusing the same source register (but with different indices)
6349 // TODO: This can be generalized for m2 or m4, or for any shuffle
6350 // for which we can do a vslidedown followed by this expansion.
6351 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6352 SDValue SlideAmt =
6353 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6354 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6355 SDValue Gather = DAG.getUNDEF(ContainerVT);
6356 for (int i = 0; i < N; i++) {
6357 if (i != 0)
6358 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6359 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6360 SlideAmt, TrueMask, VL);
6361 SDValue SubIndex =
6362 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6363 SDValue SubVec =
6364 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6365 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6366 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6367 M1VT.getVectorMinNumElements() * i);
6368 }
6369 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6370 }
6371
6372 // Fallback to generic vrgather if we can't find anything better.
6373 // On many machines, this will be O(LMUL^2)
6374 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6375 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6376 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6377 }
6378
6379 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6380 // merged with a second vrgather.
6381 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6382
6383 // Now construct the mask that will be used by the blended vrgather operation.
6384 // Construct the appropriate indices into each vector.
6385 for (int MaskIndex : Mask) {
6386 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6387 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6388 ? MaskIndex : -1);
6389 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6390 }
6391
6392 // If the mask indices are disjoint between the two sources, we can lower it
6393 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6394 // operands may end up being lowered to something cheaper than a vrgather.vv.
6395 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6396 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6397 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6398 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6399 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6400 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6401 return V;
6402
6403 // Before hitting generic lowering fallbacks, try to widen the mask
6404 // to a wider SEW.
6405 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6406 return V;
6407
6408 // Try to pick a profitable operand order.
6409 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6410 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6411
6412 // Recursively invoke lowering for each operand if we had two
6413 // independent single source shuffles, and then combine the result via a
6414 // vselect. Note that the vselect will likely be folded back into the
6415 // second permute (vrgather, or other) by the post-isel combine.
6416 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6417 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6418
6419 SmallVector<SDValue> MaskVals;
6420 for (int MaskIndex : Mask) {
6421 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6422 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6423 }
6424
6425 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6426 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6427 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6428
6429 if (SwapOps)
6430 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6431 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6432}
6433
6435 // Only support legal VTs for other shuffles for now.
6436 if (!isTypeLegal(VT))
6437 return false;
6438
6439 // Support splats for any type. These should type legalize well.
6441 return true;
6442
6443 const unsigned NumElts = M.size();
6444 MVT SVT = VT.getSimpleVT();
6445
6446 // Not for i1 vectors.
6447 if (SVT.getScalarType() == MVT::i1)
6448 return false;
6449
6450 std::array<std::pair<int, int>, 2> SrcInfo;
6451 int Dummy1, Dummy2;
6452 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6453 (::isMaskedSlidePair(M, SrcInfo) &&
6454 isElementRotate(SrcInfo, NumElts)) ||
6455 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6456}
6457
6458// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6459// the exponent.
6460SDValue
6461RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6462 SelectionDAG &DAG) const {
6463 MVT VT = Op.getSimpleValueType();
6464 unsigned EltSize = VT.getScalarSizeInBits();
6465 SDValue Src = Op.getOperand(0);
6466 SDLoc DL(Op);
6467 MVT ContainerVT = VT;
6468
6469 SDValue Mask, VL;
6470 if (Op->isVPOpcode()) {
6471 Mask = Op.getOperand(1);
6472 if (VT.isFixedLengthVector())
6473 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6474 Subtarget);
6475 VL = Op.getOperand(2);
6476 }
6477
6478 // We choose FP type that can represent the value if possible. Otherwise, we
6479 // use rounding to zero conversion for correct exponent of the result.
6480 // TODO: Use f16 for i8 when possible?
6481 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6482 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6483 FloatEltVT = MVT::f32;
6484 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6485
6486 // Legal types should have been checked in the RISCVTargetLowering
6487 // constructor.
6488 // TODO: Splitting may make sense in some cases.
6489 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6490 "Expected legal float type!");
6491
6492 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6493 // The trailing zero count is equal to log2 of this single bit value.
6494 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6495 SDValue Neg = DAG.getNegative(Src, DL, VT);
6496 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6497 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6498 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6499 Src, Mask, VL);
6500 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6501 }
6502
6503 // We have a legal FP type, convert to it.
6504 SDValue FloatVal;
6505 if (FloatVT.bitsGT(VT)) {
6506 if (Op->isVPOpcode())
6507 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6508 else
6509 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6510 } else {
6511 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6512 if (VT.isFixedLengthVector()) {
6513 ContainerVT = getContainerForFixedLengthVector(VT);
6514 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6515 }
6516 if (!Op->isVPOpcode())
6517 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6518 SDValue RTZRM =
6520 MVT ContainerFloatVT =
6521 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6522 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6523 Src, Mask, RTZRM, VL);
6524 if (VT.isFixedLengthVector())
6525 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6526 }
6527 // Bitcast to integer and shift the exponent to the LSB.
6528 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6529 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6530 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6531
6532 SDValue Exp;
6533 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6534 if (Op->isVPOpcode()) {
6535 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6536 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6537 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6538 } else {
6539 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6540 DAG.getConstant(ShiftAmt, DL, IntVT));
6541 if (IntVT.bitsLT(VT))
6542 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6543 else if (IntVT.bitsGT(VT))
6544 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6545 }
6546
6547 // The exponent contains log2 of the value in biased form.
6548 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6549 // For trailing zeros, we just need to subtract the bias.
6550 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6551 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6552 DAG.getConstant(ExponentBias, DL, VT));
6553 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6554 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6555 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6556
6557 // For leading zeros, we need to remove the bias and convert from log2 to
6558 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6559 unsigned Adjust = ExponentBias + (EltSize - 1);
6560 SDValue Res;
6561 if (Op->isVPOpcode())
6562 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6563 Mask, VL);
6564 else
6565 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6566
6567 // The above result with zero input equals to Adjust which is greater than
6568 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6569 if (Op.getOpcode() == ISD::CTLZ)
6570 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6571 else if (Op.getOpcode() == ISD::VP_CTLZ)
6572 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6573 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6574 return Res;
6575}
6576
6577SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6578 SelectionDAG &DAG) const {
6579 SDLoc DL(Op);
6580 MVT XLenVT = Subtarget.getXLenVT();
6581 SDValue Source = Op->getOperand(0);
6582 MVT SrcVT = Source.getSimpleValueType();
6583 SDValue Mask = Op->getOperand(1);
6584 SDValue EVL = Op->getOperand(2);
6585
6586 if (SrcVT.isFixedLengthVector()) {
6587 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6588 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6589 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6590 Subtarget);
6591 SrcVT = ContainerVT;
6592 }
6593
6594 // Convert to boolean vector.
6595 if (SrcVT.getScalarType() != MVT::i1) {
6596 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6597 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6598 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6599 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6600 DAG.getUNDEF(SrcVT), Mask, EVL});
6601 }
6602
6603 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6604 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6605 // In this case, we can interpret poison as -1, so nothing to do further.
6606 return Res;
6607
6608 // Convert -1 to VL.
6609 SDValue SetCC =
6610 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6611 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6612 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6613}
6614
6615// While RVV has alignment restrictions, we should always be able to load as a
6616// legal equivalently-sized byte-typed vector instead. This method is
6617// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6618// the load is already correctly-aligned, it returns SDValue().
6619SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6620 SelectionDAG &DAG) const {
6621 auto *Load = cast<LoadSDNode>(Op);
6622 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6623
6625 Load->getMemoryVT(),
6626 *Load->getMemOperand()))
6627 return SDValue();
6628
6629 SDLoc DL(Op);
6630 MVT VT = Op.getSimpleValueType();
6631 unsigned EltSizeBits = VT.getScalarSizeInBits();
6632 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6633 "Unexpected unaligned RVV load type");
6634 MVT NewVT =
6635 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6636 assert(NewVT.isValid() &&
6637 "Expecting equally-sized RVV vector types to be legal");
6638 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6639 Load->getPointerInfo(), Load->getBaseAlign(),
6640 Load->getMemOperand()->getFlags());
6641 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6642}
6643
6644// While RVV has alignment restrictions, we should always be able to store as a
6645// legal equivalently-sized byte-typed vector instead. This method is
6646// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6647// returns SDValue() if the store is already correctly aligned.
6648SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6649 SelectionDAG &DAG) const {
6650 auto *Store = cast<StoreSDNode>(Op);
6651 assert(Store && Store->getValue().getValueType().isVector() &&
6652 "Expected vector store");
6653
6655 Store->getMemoryVT(),
6656 *Store->getMemOperand()))
6657 return SDValue();
6658
6659 SDLoc DL(Op);
6660 SDValue StoredVal = Store->getValue();
6661 MVT VT = StoredVal.getSimpleValueType();
6662 unsigned EltSizeBits = VT.getScalarSizeInBits();
6663 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6664 "Unexpected unaligned RVV store type");
6665 MVT NewVT =
6666 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6667 assert(NewVT.isValid() &&
6668 "Expecting equally-sized RVV vector types to be legal");
6669 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6670 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6671 Store->getPointerInfo(), Store->getBaseAlign(),
6672 Store->getMemOperand()->getFlags());
6673}
6674
6676 const RISCVSubtarget &Subtarget) {
6677 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6678
6679 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6680
6681 // All simm32 constants should be handled by isel.
6682 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6683 // this check redundant, but small immediates are common so this check
6684 // should have better compile time.
6685 if (isInt<32>(Imm))
6686 return Op;
6687
6688 // We only need to cost the immediate, if constant pool lowering is enabled.
6689 if (!Subtarget.useConstantPoolForLargeInts())
6690 return Op;
6691
6693 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6694 return Op;
6695
6696 // Optimizations below are disabled for opt size. If we're optimizing for
6697 // size, use a constant pool.
6698 if (DAG.shouldOptForSize())
6699 return SDValue();
6700
6701 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6702 // that if it will avoid a constant pool.
6703 // It will require an extra temporary register though.
6704 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6705 // low and high 32 bits are the same and bit 31 and 63 are set.
6706 unsigned ShiftAmt, AddOpc;
6707 RISCVMatInt::InstSeq SeqLo =
6708 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6709 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6710 return Op;
6711
6712 return SDValue();
6713}
6714
6715SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6716 SelectionDAG &DAG) const {
6717 MVT VT = Op.getSimpleValueType();
6718 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6719
6720 // Can this constant be selected by a Zfa FLI instruction?
6721 bool Negate = false;
6722 int Index = getLegalZfaFPImm(Imm, VT);
6723
6724 // If the constant is negative, try negating.
6725 if (Index < 0 && Imm.isNegative()) {
6726 Index = getLegalZfaFPImm(-Imm, VT);
6727 Negate = true;
6728 }
6729
6730 // If we couldn't find a FLI lowering, fall back to generic code.
6731 if (Index < 0)
6732 return SDValue();
6733
6734 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6735 SDLoc DL(Op);
6736 SDValue Const =
6737 DAG.getNode(RISCVISD::FLI, DL, VT,
6738 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6739 if (!Negate)
6740 return Const;
6741
6742 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6743}
6744
6746 SelectionDAG &DAG) {
6747
6748 unsigned IsData = Op.getConstantOperandVal(4);
6749
6750 // mips-p8700 we support data prefetch for now.
6751 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6752 return Op.getOperand(0);
6753 return Op;
6754}
6755
6757 const RISCVSubtarget &Subtarget) {
6758 SDLoc dl(Op);
6759 AtomicOrdering FenceOrdering =
6760 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6761 SyncScope::ID FenceSSID =
6762 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6763
6764 if (Subtarget.hasStdExtZtso()) {
6765 // The only fence that needs an instruction is a sequentially-consistent
6766 // cross-thread fence.
6767 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6768 FenceSSID == SyncScope::System)
6769 return Op;
6770
6771 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6772 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6773 }
6774
6775 // singlethread fences only synchronize with signal handlers on the same
6776 // thread and thus only need to preserve instruction order, not actually
6777 // enforce memory ordering.
6778 if (FenceSSID == SyncScope::SingleThread)
6779 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6780 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6781
6782 return Op;
6783}
6784
6785SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6786 SelectionDAG &DAG) const {
6787 SDLoc DL(Op);
6788 MVT VT = Op.getSimpleValueType();
6789 MVT XLenVT = Subtarget.getXLenVT();
6790 unsigned Check = Op.getConstantOperandVal(1);
6791 unsigned TDCMask = 0;
6792 if (Check & fcSNan)
6793 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6794 if (Check & fcQNan)
6795 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6796 if (Check & fcPosInf)
6798 if (Check & fcNegInf)
6800 if (Check & fcPosNormal)
6802 if (Check & fcNegNormal)
6804 if (Check & fcPosSubnormal)
6806 if (Check & fcNegSubnormal)
6808 if (Check & fcPosZero)
6809 TDCMask |= RISCV::FPMASK_Positive_Zero;
6810 if (Check & fcNegZero)
6811 TDCMask |= RISCV::FPMASK_Negative_Zero;
6812
6813 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6814
6815 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6816
6817 if (VT.isVector()) {
6818 SDValue Op0 = Op.getOperand(0);
6819 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6820
6821 if (VT.isScalableVector()) {
6823 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6824 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6825 Mask = Op.getOperand(2);
6826 VL = Op.getOperand(3);
6827 }
6828 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6829 VL, Op->getFlags());
6830 if (IsOneBitMask)
6831 return DAG.getSetCC(DL, VT, FPCLASS,
6832 DAG.getConstant(TDCMask, DL, DstVT),
6834 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6835 DAG.getConstant(TDCMask, DL, DstVT));
6836 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6837 ISD::SETNE);
6838 }
6839
6840 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6841 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6842 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6843 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6844 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6845 Mask = Op.getOperand(2);
6846 MVT MaskContainerVT =
6847 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6848 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6849 VL = Op.getOperand(3);
6850 }
6851 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6852
6853 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6854 Mask, VL, Op->getFlags());
6855
6856 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6857 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6858 if (IsOneBitMask) {
6859 SDValue VMSEQ =
6860 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6861 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6862 DAG.getUNDEF(ContainerVT), Mask, VL});
6863 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6864 }
6865 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6866 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6867
6868 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6869 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6870 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6871
6872 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6873 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6874 DAG.getUNDEF(ContainerVT), Mask, VL});
6875 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6876 }
6877
6878 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6879 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6880 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6882 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6883}
6884
6885// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6886// operations propagate nans.
6888 const RISCVSubtarget &Subtarget) {
6889 SDLoc DL(Op);
6890 MVT VT = Op.getSimpleValueType();
6891
6892 SDValue X = Op.getOperand(0);
6893 SDValue Y = Op.getOperand(1);
6894
6895 if (!VT.isVector()) {
6896 MVT XLenVT = Subtarget.getXLenVT();
6897
6898 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6899 // ensures that when one input is a nan, the other will also be a nan
6900 // allowing the nan to propagate. If both inputs are nan, this will swap the
6901 // inputs which is harmless.
6902
6903 SDValue NewY = Y;
6904 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6905 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6906 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6907 }
6908
6909 SDValue NewX = X;
6910 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6911 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6912 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6913 }
6914
6915 unsigned Opc =
6916 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6917 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6918 }
6919
6920 // Check no NaNs before converting to fixed vector scalable.
6921 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6922 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6923
6924 MVT ContainerVT = VT;
6925 if (VT.isFixedLengthVector()) {
6926 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6927 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6928 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6929 }
6930
6931 SDValue Mask, VL;
6932 if (Op->isVPOpcode()) {
6933 Mask = Op.getOperand(2);
6934 if (VT.isFixedLengthVector())
6935 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6936 Subtarget);
6937 VL = Op.getOperand(3);
6938 } else {
6939 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6940 }
6941
6942 SDValue NewY = Y;
6943 if (!XIsNeverNan) {
6944 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6945 {X, X, DAG.getCondCode(ISD::SETOEQ),
6946 DAG.getUNDEF(ContainerVT), Mask, VL});
6947 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6948 DAG.getUNDEF(ContainerVT), VL);
6949 }
6950
6951 SDValue NewX = X;
6952 if (!YIsNeverNan) {
6953 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6954 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6955 DAG.getUNDEF(ContainerVT), Mask, VL});
6956 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6957 DAG.getUNDEF(ContainerVT), VL);
6958 }
6959
6960 unsigned Opc =
6961 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6962 ? RISCVISD::VFMAX_VL
6963 : RISCVISD::VFMIN_VL;
6964 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6965 DAG.getUNDEF(ContainerVT), Mask, VL);
6966 if (VT.isFixedLengthVector())
6967 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6968 return Res;
6969}
6970
6972 const RISCVSubtarget &Subtarget) {
6973 bool IsFABS = Op.getOpcode() == ISD::FABS;
6974 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6975 "Wrong opcode for lowering FABS or FNEG.");
6976
6977 MVT XLenVT = Subtarget.getXLenVT();
6978 MVT VT = Op.getSimpleValueType();
6979 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6980
6981 SDLoc DL(Op);
6982 SDValue Fmv =
6983 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6984
6985 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6986 Mask = Mask.sext(Subtarget.getXLen());
6987
6988 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6989 SDValue Logic =
6990 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6991 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6992}
6993
6995 const RISCVSubtarget &Subtarget) {
6996 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6997
6998 MVT XLenVT = Subtarget.getXLenVT();
6999 MVT VT = Op.getSimpleValueType();
7000 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7001
7002 SDValue Mag = Op.getOperand(0);
7003 SDValue Sign = Op.getOperand(1);
7004
7005 SDLoc DL(Op);
7006
7007 // Get sign bit into an integer value.
7008 unsigned SignSize = Sign.getValueSizeInBits();
7009 SDValue SignAsInt = [&]() {
7010 if (SignSize == Subtarget.getXLen())
7011 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7012 switch (SignSize) {
7013 case 16:
7014 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7015 case 32:
7016 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7017 case 64: {
7018 assert(XLenVT == MVT::i32 && "Unexpected type");
7019 // Copy the upper word to integer.
7020 SignSize = 32;
7021 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7022 .getValue(1);
7023 }
7024 default:
7025 llvm_unreachable("Unexpected sign size");
7026 }
7027 }();
7028
7029 // Get the signbit at the right position for MagAsInt.
7030 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7031 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7032 SignAsInt,
7033 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7034
7035 // Mask the sign bit and any bits above it. The extra bits will be dropped
7036 // when we convert back to FP.
7037 SDValue SignMask = DAG.getConstant(
7038 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7039 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7040
7041 // Transform Mag value to integer, and clear the sign bit.
7042 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7043 SDValue ClearSignMask = DAG.getConstant(
7044 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7045 SDValue ClearedSign =
7046 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7047
7048 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7050
7051 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7052}
7053
7054/// Get a RISC-V target specified VL op for a given SDNode.
7055static unsigned getRISCVVLOp(SDValue Op) {
7056#define OP_CASE(NODE) \
7057 case ISD::NODE: \
7058 return RISCVISD::NODE##_VL;
7059#define VP_CASE(NODE) \
7060 case ISD::VP_##NODE: \
7061 return RISCVISD::NODE##_VL;
7062 // clang-format off
7063 switch (Op.getOpcode()) {
7064 default:
7065 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7066 OP_CASE(ADD)
7067 OP_CASE(SUB)
7068 OP_CASE(MUL)
7069 OP_CASE(MULHS)
7070 OP_CASE(MULHU)
7071 OP_CASE(SDIV)
7072 OP_CASE(SREM)
7073 OP_CASE(UDIV)
7074 OP_CASE(UREM)
7075 OP_CASE(SHL)
7076 OP_CASE(SRA)
7077 OP_CASE(SRL)
7078 OP_CASE(ROTL)
7079 OP_CASE(ROTR)
7080 OP_CASE(BSWAP)
7081 OP_CASE(CTTZ)
7082 OP_CASE(CTLZ)
7083 OP_CASE(CTPOP)
7084 OP_CASE(BITREVERSE)
7085 OP_CASE(SADDSAT)
7086 OP_CASE(UADDSAT)
7087 OP_CASE(SSUBSAT)
7088 OP_CASE(USUBSAT)
7089 OP_CASE(AVGFLOORS)
7090 OP_CASE(AVGFLOORU)
7091 OP_CASE(AVGCEILS)
7092 OP_CASE(AVGCEILU)
7093 OP_CASE(FADD)
7094 OP_CASE(FSUB)
7095 OP_CASE(FMUL)
7096 OP_CASE(FDIV)
7097 OP_CASE(FNEG)
7098 OP_CASE(FABS)
7099 OP_CASE(FCOPYSIGN)
7100 OP_CASE(FSQRT)
7101 OP_CASE(SMIN)
7102 OP_CASE(SMAX)
7103 OP_CASE(UMIN)
7104 OP_CASE(UMAX)
7105 OP_CASE(STRICT_FADD)
7106 OP_CASE(STRICT_FSUB)
7107 OP_CASE(STRICT_FMUL)
7108 OP_CASE(STRICT_FDIV)
7109 OP_CASE(STRICT_FSQRT)
7110 VP_CASE(ADD) // VP_ADD
7111 VP_CASE(SUB) // VP_SUB
7112 VP_CASE(MUL) // VP_MUL
7113 VP_CASE(SDIV) // VP_SDIV
7114 VP_CASE(SREM) // VP_SREM
7115 VP_CASE(UDIV) // VP_UDIV
7116 VP_CASE(UREM) // VP_UREM
7117 VP_CASE(SHL) // VP_SHL
7118 VP_CASE(FADD) // VP_FADD
7119 VP_CASE(FSUB) // VP_FSUB
7120 VP_CASE(FMUL) // VP_FMUL
7121 VP_CASE(FDIV) // VP_FDIV
7122 VP_CASE(FNEG) // VP_FNEG
7123 VP_CASE(FABS) // VP_FABS
7124 VP_CASE(SMIN) // VP_SMIN
7125 VP_CASE(SMAX) // VP_SMAX
7126 VP_CASE(UMIN) // VP_UMIN
7127 VP_CASE(UMAX) // VP_UMAX
7128 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7129 VP_CASE(SETCC) // VP_SETCC
7130 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7131 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7132 VP_CASE(BITREVERSE) // VP_BITREVERSE
7133 VP_CASE(SADDSAT) // VP_SADDSAT
7134 VP_CASE(UADDSAT) // VP_UADDSAT
7135 VP_CASE(SSUBSAT) // VP_SSUBSAT
7136 VP_CASE(USUBSAT) // VP_USUBSAT
7137 VP_CASE(BSWAP) // VP_BSWAP
7138 VP_CASE(CTLZ) // VP_CTLZ
7139 VP_CASE(CTTZ) // VP_CTTZ
7140 VP_CASE(CTPOP) // VP_CTPOP
7142 case ISD::VP_CTLZ_ZERO_UNDEF:
7143 return RISCVISD::CTLZ_VL;
7145 case ISD::VP_CTTZ_ZERO_UNDEF:
7146 return RISCVISD::CTTZ_VL;
7147 case ISD::FMA:
7148 case ISD::VP_FMA:
7149 return RISCVISD::VFMADD_VL;
7150 case ISD::STRICT_FMA:
7151 return RISCVISD::STRICT_VFMADD_VL;
7152 case ISD::AND:
7153 case ISD::VP_AND:
7154 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7155 return RISCVISD::VMAND_VL;
7156 return RISCVISD::AND_VL;
7157 case ISD::OR:
7158 case ISD::VP_OR:
7159 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7160 return RISCVISD::VMOR_VL;
7161 return RISCVISD::OR_VL;
7162 case ISD::XOR:
7163 case ISD::VP_XOR:
7164 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7165 return RISCVISD::VMXOR_VL;
7166 return RISCVISD::XOR_VL;
7167 case ISD::ANY_EXTEND:
7168 case ISD::ZERO_EXTEND:
7169 return RISCVISD::VZEXT_VL;
7170 case ISD::SIGN_EXTEND:
7171 return RISCVISD::VSEXT_VL;
7172 case ISD::SETCC:
7173 return RISCVISD::SETCC_VL;
7174 case ISD::VSELECT:
7175 return RISCVISD::VMERGE_VL;
7176 case ISD::VP_SELECT:
7177 case ISD::VP_MERGE:
7178 return RISCVISD::VMERGE_VL;
7179 case ISD::VP_SRA:
7180 return RISCVISD::SRA_VL;
7181 case ISD::VP_SRL:
7182 return RISCVISD::SRL_VL;
7183 case ISD::VP_SQRT:
7184 return RISCVISD::FSQRT_VL;
7185 case ISD::VP_SIGN_EXTEND:
7186 return RISCVISD::VSEXT_VL;
7187 case ISD::VP_ZERO_EXTEND:
7188 return RISCVISD::VZEXT_VL;
7189 case ISD::VP_FP_TO_SINT:
7190 return RISCVISD::VFCVT_RTZ_X_F_VL;
7191 case ISD::VP_FP_TO_UINT:
7192 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7193 case ISD::FMINNUM:
7194 case ISD::FMINIMUMNUM:
7195 case ISD::VP_FMINNUM:
7196 return RISCVISD::VFMIN_VL;
7197 case ISD::FMAXNUM:
7198 case ISD::FMAXIMUMNUM:
7199 case ISD::VP_FMAXNUM:
7200 return RISCVISD::VFMAX_VL;
7201 case ISD::LRINT:
7202 case ISD::VP_LRINT:
7203 case ISD::LLRINT:
7204 case ISD::VP_LLRINT:
7205 return RISCVISD::VFCVT_RM_X_F_VL;
7206 }
7207 // clang-format on
7208#undef OP_CASE
7209#undef VP_CASE
7210}
7211
7213 const RISCVSubtarget &Subtarget) {
7214 return (Op.getValueType() == MVT::nxv32f16 &&
7215 (Subtarget.hasVInstructionsF16Minimal() &&
7216 !Subtarget.hasVInstructionsF16())) ||
7217 Op.getValueType() == MVT::nxv32bf16;
7218}
7219
7221 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7222 SDLoc DL(Op);
7223
7226
7227 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7228 if (!Op.getOperand(j).getValueType().isVector()) {
7229 LoOperands[j] = Op.getOperand(j);
7230 HiOperands[j] = Op.getOperand(j);
7231 continue;
7232 }
7233 std::tie(LoOperands[j], HiOperands[j]) =
7234 DAG.SplitVector(Op.getOperand(j), DL);
7235 }
7236
7237 SDValue LoRes =
7238 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7239 SDValue HiRes =
7240 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7241
7242 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7243}
7244
7246 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7247 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7248 SDLoc DL(Op);
7249
7252
7253 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7254 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7255 std::tie(LoOperands[j], HiOperands[j]) =
7256 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7257 continue;
7258 }
7259 if (!Op.getOperand(j).getValueType().isVector()) {
7260 LoOperands[j] = Op.getOperand(j);
7261 HiOperands[j] = Op.getOperand(j);
7262 continue;
7263 }
7264 std::tie(LoOperands[j], HiOperands[j]) =
7265 DAG.SplitVector(Op.getOperand(j), DL);
7266 }
7267
7268 SDValue LoRes =
7269 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7270 SDValue HiRes =
7271 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7272
7273 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7274}
7275
7277 SDLoc DL(Op);
7278
7279 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7280 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7281 auto [EVLLo, EVLHi] =
7282 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7283
7284 SDValue ResLo =
7285 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7286 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7287 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7288 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7289}
7290
7292
7293 assert(Op->isStrictFPOpcode());
7294
7295 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7296
7297 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7298 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7299
7300 SDLoc DL(Op);
7301
7304
7305 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7306 if (!Op.getOperand(j).getValueType().isVector()) {
7307 LoOperands[j] = Op.getOperand(j);
7308 HiOperands[j] = Op.getOperand(j);
7309 continue;
7310 }
7311 std::tie(LoOperands[j], HiOperands[j]) =
7312 DAG.SplitVector(Op.getOperand(j), DL);
7313 }
7314
7315 SDValue LoRes =
7316 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7317 HiOperands[0] = LoRes.getValue(1);
7318 SDValue HiRes =
7319 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7320
7321 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7322 LoRes.getValue(0), HiRes.getValue(0));
7323 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7324}
7325
7326SDValue
7327RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7328 SelectionDAG &DAG) const {
7329 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7330 "Unexpected bfloat16 load lowering");
7331
7332 SDLoc DL(Op);
7333 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7334 EVT MemVT = LD->getMemoryVT();
7335 SDValue Load = DAG.getExtLoad(
7336 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7337 LD->getBasePtr(),
7339 LD->getMemOperand());
7340 // Using mask to make bf16 nan-boxing valid when we don't have flh
7341 // instruction. -65536 would be treat as a small number and thus it can be
7342 // directly used lui to get the constant.
7343 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7344 SDValue OrSixteenOne =
7345 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7346 SDValue ConvertedResult =
7347 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7348 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7349}
7350
7351SDValue
7352RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7353 SelectionDAG &DAG) const {
7354 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7355 "Unexpected bfloat16 store lowering");
7356
7357 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7358 SDLoc DL(Op);
7359 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7360 Subtarget.getXLenVT(), ST->getValue());
7361 return DAG.getTruncStore(
7362 ST->getChain(), DL, FMV, ST->getBasePtr(),
7363 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7364 ST->getMemOperand());
7365}
7366
7368 SelectionDAG &DAG) const {
7369 switch (Op.getOpcode()) {
7370 default:
7372 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7373 case ISD::PREFETCH:
7374 return LowerPREFETCH(Op, Subtarget, DAG);
7375 case ISD::ATOMIC_FENCE:
7376 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7377 case ISD::GlobalAddress:
7378 return lowerGlobalAddress(Op, DAG);
7379 case ISD::BlockAddress:
7380 return lowerBlockAddress(Op, DAG);
7381 case ISD::ConstantPool:
7382 return lowerConstantPool(Op, DAG);
7383 case ISD::JumpTable:
7384 return lowerJumpTable(Op, DAG);
7386 return lowerGlobalTLSAddress(Op, DAG);
7387 case ISD::Constant:
7388 return lowerConstant(Op, DAG, Subtarget);
7389 case ISD::ConstantFP:
7390 return lowerConstantFP(Op, DAG);
7391 case ISD::SELECT:
7392 return lowerSELECT(Op, DAG);
7393 case ISD::BRCOND:
7394 return lowerBRCOND(Op, DAG);
7395 case ISD::VASTART:
7396 return lowerVASTART(Op, DAG);
7397 case ISD::FRAMEADDR:
7398 return lowerFRAMEADDR(Op, DAG);
7399 case ISD::RETURNADDR:
7400 return lowerRETURNADDR(Op, DAG);
7401 case ISD::SHL_PARTS:
7402 return lowerShiftLeftParts(Op, DAG);
7403 case ISD::SRA_PARTS:
7404 return lowerShiftRightParts(Op, DAG, true);
7405 case ISD::SRL_PARTS:
7406 return lowerShiftRightParts(Op, DAG, false);
7407 case ISD::ROTL:
7408 case ISD::ROTR:
7409 if (Op.getValueType().isFixedLengthVector()) {
7410 assert(Subtarget.hasStdExtZvkb());
7411 return lowerToScalableOp(Op, DAG);
7412 }
7413 assert(Subtarget.hasVendorXTHeadBb() &&
7414 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7415 "Unexpected custom legalization");
7416 // XTHeadBb only supports rotate by constant.
7417 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7418 return SDValue();
7419 return Op;
7420 case ISD::BITCAST: {
7421 SDLoc DL(Op);
7422 EVT VT = Op.getValueType();
7423 SDValue Op0 = Op.getOperand(0);
7424 EVT Op0VT = Op0.getValueType();
7425 MVT XLenVT = Subtarget.getXLenVT();
7426 if (Op0VT == MVT::i16 &&
7427 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7428 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7429 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7430 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7431 }
7432 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7433 Subtarget.hasStdExtFOrZfinx()) {
7434 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7435 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7436 }
7437 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7438 Subtarget.hasStdExtDOrZdinx()) {
7439 SDValue Lo, Hi;
7440 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7441 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7442 }
7443
7444 // Consider other scalar<->scalar casts as legal if the types are legal.
7445 // Otherwise expand them.
7446 if (!VT.isVector() && !Op0VT.isVector()) {
7447 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7448 return Op;
7449 return SDValue();
7450 }
7451
7452 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7453 "Unexpected types");
7454
7455 if (VT.isFixedLengthVector()) {
7456 // We can handle fixed length vector bitcasts with a simple replacement
7457 // in isel.
7458 if (Op0VT.isFixedLengthVector())
7459 return Op;
7460 // When bitcasting from scalar to fixed-length vector, insert the scalar
7461 // into a one-element vector of the result type, and perform a vector
7462 // bitcast.
7463 if (!Op0VT.isVector()) {
7464 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7465 if (!isTypeLegal(BVT))
7466 return SDValue();
7467 return DAG.getBitcast(
7468 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7469 }
7470 return SDValue();
7471 }
7472 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7473 // thus: bitcast the vector to a one-element vector type whose element type
7474 // is the same as the result type, and extract the first element.
7475 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7476 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7477 if (!isTypeLegal(BVT))
7478 return SDValue();
7479 SDValue BVec = DAG.getBitcast(BVT, Op0);
7480 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7481 }
7482 return SDValue();
7483 }
7485 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7487 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7489 return LowerINTRINSIC_VOID(Op, DAG);
7490 case ISD::IS_FPCLASS:
7491 return LowerIS_FPCLASS(Op, DAG);
7492 case ISD::BITREVERSE: {
7493 MVT VT = Op.getSimpleValueType();
7494 if (VT.isFixedLengthVector()) {
7495 assert(Subtarget.hasStdExtZvbb());
7496 return lowerToScalableOp(Op, DAG);
7497 }
7498 SDLoc DL(Op);
7499 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7500 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7501 // Expand bitreverse to a bswap(rev8) followed by brev8.
7502 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7503 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7504 }
7505 case ISD::TRUNCATE:
7508 // Only custom-lower vector truncates
7509 if (!Op.getSimpleValueType().isVector())
7510 return Op;
7511 return lowerVectorTruncLike(Op, DAG);
7512 case ISD::ANY_EXTEND:
7513 case ISD::ZERO_EXTEND:
7514 if (Op.getOperand(0).getValueType().isVector() &&
7515 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7516 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7517 if (Op.getValueType().isScalableVector())
7518 return Op;
7519 return lowerToScalableOp(Op, DAG);
7520 case ISD::SIGN_EXTEND:
7521 if (Op.getOperand(0).getValueType().isVector() &&
7522 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7523 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7524 if (Op.getValueType().isScalableVector())
7525 return Op;
7526 return lowerToScalableOp(Op, DAG);
7528 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7530 return lowerINSERT_VECTOR_ELT(Op, DAG);
7532 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7533 case ISD::SCALAR_TO_VECTOR: {
7534 MVT VT = Op.getSimpleValueType();
7535 SDLoc DL(Op);
7536 SDValue Scalar = Op.getOperand(0);
7537 if (VT.getVectorElementType() == MVT::i1) {
7538 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7539 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7540 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7541 }
7542 MVT ContainerVT = VT;
7543 if (VT.isFixedLengthVector())
7544 ContainerVT = getContainerForFixedLengthVector(VT);
7545 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7546
7547 SDValue V;
7548 if (VT.isFloatingPoint()) {
7549 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7550 DAG.getUNDEF(ContainerVT), Scalar, VL);
7551 } else {
7552 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7553 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7554 DAG.getUNDEF(ContainerVT), Scalar, VL);
7555 }
7556 if (VT.isFixedLengthVector())
7557 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7558 return V;
7559 }
7560 case ISD::VSCALE: {
7561 MVT XLenVT = Subtarget.getXLenVT();
7562 MVT VT = Op.getSimpleValueType();
7563 SDLoc DL(Op);
7564 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7565 // We define our scalable vector types for lmul=1 to use a 64 bit known
7566 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7567 // vscale as VLENB / 8.
7568 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7569 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7570 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7571 // We assume VLENB is a multiple of 8. We manually choose the best shift
7572 // here because SimplifyDemandedBits isn't always able to simplify it.
7573 uint64_t Val = Op.getConstantOperandVal(0);
7574 if (isPowerOf2_64(Val)) {
7575 uint64_t Log2 = Log2_64(Val);
7576 if (Log2 < 3) {
7577 SDNodeFlags Flags;
7578 Flags.setExact(true);
7579 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7580 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7581 } else if (Log2 > 3) {
7582 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7583 DAG.getConstant(Log2 - 3, DL, XLenVT));
7584 }
7585 } else if ((Val % 8) == 0) {
7586 // If the multiplier is a multiple of 8, scale it down to avoid needing
7587 // to shift the VLENB value.
7588 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7589 DAG.getConstant(Val / 8, DL, XLenVT));
7590 } else {
7591 SDNodeFlags Flags;
7592 Flags.setExact(true);
7593 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7594 DAG.getConstant(3, DL, XLenVT), Flags);
7595 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7596 DAG.getConstant(Val, DL, XLenVT));
7597 }
7598 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7599 }
7600 case ISD::FPOWI: {
7601 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7602 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7603 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7604 Op.getOperand(1).getValueType() == MVT::i32) {
7605 SDLoc DL(Op);
7606 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7607 SDValue Powi =
7608 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7609 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7610 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7611 }
7612 return SDValue();
7613 }
7614 case ISD::FMAXIMUM:
7615 case ISD::FMINIMUM:
7616 if (isPromotedOpNeedingSplit(Op, Subtarget))
7617 return SplitVectorOp(Op, DAG);
7618 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7619 case ISD::FP_EXTEND:
7620 case ISD::FP_ROUND:
7621 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7624 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7625 case ISD::SINT_TO_FP:
7626 case ISD::UINT_TO_FP:
7627 if (Op.getValueType().isVector() &&
7628 ((Op.getValueType().getScalarType() == MVT::f16 &&
7629 (Subtarget.hasVInstructionsF16Minimal() &&
7630 !Subtarget.hasVInstructionsF16())) ||
7631 Op.getValueType().getScalarType() == MVT::bf16)) {
7632 if (isPromotedOpNeedingSplit(Op, Subtarget))
7633 return SplitVectorOp(Op, DAG);
7634 // int -> f32
7635 SDLoc DL(Op);
7636 MVT NVT =
7637 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7638 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7639 // f32 -> [b]f16
7640 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7641 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7642 }
7643 [[fallthrough]];
7644 case ISD::FP_TO_SINT:
7645 case ISD::FP_TO_UINT:
7646 if (SDValue Op1 = Op.getOperand(0);
7647 Op1.getValueType().isVector() &&
7648 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7649 (Subtarget.hasVInstructionsF16Minimal() &&
7650 !Subtarget.hasVInstructionsF16())) ||
7651 Op1.getValueType().getScalarType() == MVT::bf16)) {
7652 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7653 return SplitVectorOp(Op, DAG);
7654 // [b]f16 -> f32
7655 SDLoc DL(Op);
7656 MVT NVT = MVT::getVectorVT(MVT::f32,
7657 Op1.getValueType().getVectorElementCount());
7658 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7659 // f32 -> int
7660 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7661 }
7662 [[fallthrough]];
7667 // RVV can only do fp<->int conversions to types half/double the size as
7668 // the source. We custom-lower any conversions that do two hops into
7669 // sequences.
7670 MVT VT = Op.getSimpleValueType();
7671 if (VT.isScalarInteger())
7672 return lowerFP_TO_INT(Op, DAG, Subtarget);
7673 bool IsStrict = Op->isStrictFPOpcode();
7674 SDValue Src = Op.getOperand(0 + IsStrict);
7675 MVT SrcVT = Src.getSimpleValueType();
7676 if (SrcVT.isScalarInteger())
7677 return lowerINT_TO_FP(Op, DAG, Subtarget);
7678 if (!VT.isVector())
7679 return Op;
7680 SDLoc DL(Op);
7681 MVT EltVT = VT.getVectorElementType();
7682 MVT SrcEltVT = SrcVT.getVectorElementType();
7683 unsigned EltSize = EltVT.getSizeInBits();
7684 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7685 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7686 "Unexpected vector element types");
7687
7688 bool IsInt2FP = SrcEltVT.isInteger();
7689 // Widening conversions
7690 if (EltSize > (2 * SrcEltSize)) {
7691 if (IsInt2FP) {
7692 // Do a regular integer sign/zero extension then convert to float.
7693 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7695 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7696 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7699 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7700 if (IsStrict)
7701 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7702 Op.getOperand(0), Ext);
7703 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7704 }
7705 // FP2Int
7706 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7707 // Do one doubling fp_extend then complete the operation by converting
7708 // to int.
7709 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7710 if (IsStrict) {
7711 auto [FExt, Chain] =
7712 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7713 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7714 }
7715 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7716 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7717 }
7718
7719 // Narrowing conversions
7720 if (SrcEltSize > (2 * EltSize)) {
7721 if (IsInt2FP) {
7722 // One narrowing int_to_fp, then an fp_round.
7723 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7724 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7725 if (IsStrict) {
7726 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7727 DAG.getVTList(InterimFVT, MVT::Other),
7728 Op.getOperand(0), Src);
7729 SDValue Chain = Int2FP.getValue(1);
7730 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7731 }
7732 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7733 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7734 }
7735 // FP2Int
7736 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7737 // representable by the integer, the result is poison.
7738 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7740 if (IsStrict) {
7741 SDValue FP2Int =
7742 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7743 Op.getOperand(0), Src);
7744 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7745 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7746 }
7747 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7748 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7749 }
7750
7751 // Scalable vectors can exit here. Patterns will handle equally-sized
7752 // conversions halving/doubling ones.
7753 if (!VT.isFixedLengthVector())
7754 return Op;
7755
7756 // For fixed-length vectors we lower to a custom "VL" node.
7757 unsigned RVVOpc = 0;
7758 switch (Op.getOpcode()) {
7759 default:
7760 llvm_unreachable("Impossible opcode");
7761 case ISD::FP_TO_SINT:
7762 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7763 break;
7764 case ISD::FP_TO_UINT:
7765 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7766 break;
7767 case ISD::SINT_TO_FP:
7768 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7769 break;
7770 case ISD::UINT_TO_FP:
7771 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7772 break;
7774 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7775 break;
7777 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7778 break;
7780 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7781 break;
7783 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7784 break;
7785 }
7786
7787 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7788 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7789 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7790 "Expected same element count");
7791
7792 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7793
7794 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7795 if (IsStrict) {
7796 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7797 Op.getOperand(0), Src, Mask, VL);
7798 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7799 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7800 }
7801 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7802 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7803 }
7806 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7807 case ISD::FP_TO_BF16: {
7808 // Custom lower to ensure the libcall return is passed in an FPR on hard
7809 // float ABIs.
7810 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7811 SDLoc DL(Op);
7812 MakeLibCallOptions CallOptions;
7813 RTLIB::Libcall LC =
7814 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7815 SDValue Res =
7816 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7817 if (Subtarget.is64Bit())
7818 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7819 return DAG.getBitcast(MVT::i32, Res);
7820 }
7821 case ISD::BF16_TO_FP: {
7822 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7823 MVT VT = Op.getSimpleValueType();
7824 SDLoc DL(Op);
7825 Op = DAG.getNode(
7826 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7827 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7828 SDValue Res = Subtarget.is64Bit()
7829 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7830 : DAG.getBitcast(MVT::f32, Op);
7831 // fp_extend if the target VT is bigger than f32.
7832 if (VT != MVT::f32)
7833 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7834 return Res;
7835 }
7837 case ISD::FP_TO_FP16: {
7838 // Custom lower to ensure the libcall return is passed in an FPR on hard
7839 // float ABIs.
7840 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7841 SDLoc DL(Op);
7842 MakeLibCallOptions CallOptions;
7843 bool IsStrict = Op->isStrictFPOpcode();
7844 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7845 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7846 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7847 SDValue Res;
7848 std::tie(Res, Chain) =
7849 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7850 if (Subtarget.is64Bit())
7851 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7852 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7853 if (IsStrict)
7854 return DAG.getMergeValues({Result, Chain}, DL);
7855 return Result;
7856 }
7858 case ISD::FP16_TO_FP: {
7859 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7860 // float ABIs.
7861 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7862 SDLoc DL(Op);
7863 MakeLibCallOptions CallOptions;
7864 bool IsStrict = Op->isStrictFPOpcode();
7865 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7866 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7867 SDValue Arg = Subtarget.is64Bit()
7868 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7869 : DAG.getBitcast(MVT::f32, Op0);
7870 SDValue Res;
7871 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7872 CallOptions, DL, Chain);
7873 if (IsStrict)
7874 return DAG.getMergeValues({Res, Chain}, DL);
7875 return Res;
7876 }
7877 case ISD::FTRUNC:
7878 case ISD::FCEIL:
7879 case ISD::FFLOOR:
7880 case ISD::FNEARBYINT:
7881 case ISD::FRINT:
7882 case ISD::FROUND:
7883 case ISD::FROUNDEVEN:
7884 if (isPromotedOpNeedingSplit(Op, Subtarget))
7885 return SplitVectorOp(Op, DAG);
7886 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7887 case ISD::LRINT:
7888 case ISD::LLRINT:
7889 case ISD::LROUND:
7890 case ISD::LLROUND: {
7891 if (Op.getValueType().isVector())
7892 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7893 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7894 "Unexpected custom legalisation");
7895 SDLoc DL(Op);
7896 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7897 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7898 }
7899 case ISD::STRICT_LRINT:
7900 case ISD::STRICT_LLRINT:
7901 case ISD::STRICT_LROUND:
7902 case ISD::STRICT_LLROUND: {
7903 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7904 "Unexpected custom legalisation");
7905 SDLoc DL(Op);
7906 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7907 {Op.getOperand(0), Op.getOperand(1)});
7908 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7909 {Ext.getValue(1), Ext.getValue(0)});
7910 }
7911 case ISD::VECREDUCE_ADD:
7916 return lowerVECREDUCE(Op, DAG);
7917 case ISD::VECREDUCE_AND:
7918 case ISD::VECREDUCE_OR:
7919 case ISD::VECREDUCE_XOR:
7920 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7921 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7922 return lowerVECREDUCE(Op, DAG);
7929 return lowerFPVECREDUCE(Op, DAG);
7930 case ISD::VP_REDUCE_ADD:
7931 case ISD::VP_REDUCE_UMAX:
7932 case ISD::VP_REDUCE_SMAX:
7933 case ISD::VP_REDUCE_UMIN:
7934 case ISD::VP_REDUCE_SMIN:
7935 case ISD::VP_REDUCE_FADD:
7936 case ISD::VP_REDUCE_SEQ_FADD:
7937 case ISD::VP_REDUCE_FMIN:
7938 case ISD::VP_REDUCE_FMAX:
7939 case ISD::VP_REDUCE_FMINIMUM:
7940 case ISD::VP_REDUCE_FMAXIMUM:
7941 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7942 return SplitVectorReductionOp(Op, DAG);
7943 return lowerVPREDUCE(Op, DAG);
7944 case ISD::VP_REDUCE_AND:
7945 case ISD::VP_REDUCE_OR:
7946 case ISD::VP_REDUCE_XOR:
7947 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7948 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7949 return lowerVPREDUCE(Op, DAG);
7950 case ISD::VP_CTTZ_ELTS:
7951 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7952 return lowerVPCttzElements(Op, DAG);
7953 case ISD::UNDEF: {
7954 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7955 return convertFromScalableVector(Op.getSimpleValueType(),
7956 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7957 }
7959 return lowerINSERT_SUBVECTOR(Op, DAG);
7961 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7963 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7965 return lowerVECTOR_INTERLEAVE(Op, DAG);
7966 case ISD::STEP_VECTOR:
7967 return lowerSTEP_VECTOR(Op, DAG);
7969 return lowerVECTOR_REVERSE(Op, DAG);
7970 case ISD::VECTOR_SPLICE:
7971 return lowerVECTOR_SPLICE(Op, DAG);
7972 case ISD::BUILD_VECTOR: {
7973 MVT VT = Op.getSimpleValueType();
7974 MVT EltVT = VT.getVectorElementType();
7975 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7976 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7977 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7978 }
7979 case ISD::SPLAT_VECTOR: {
7980 MVT VT = Op.getSimpleValueType();
7981 MVT EltVT = VT.getVectorElementType();
7982 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7983 EltVT == MVT::bf16) {
7984 SDLoc DL(Op);
7985 SDValue Elt;
7986 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7987 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7988 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7989 Op.getOperand(0));
7990 else
7991 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7992 MVT IVT = VT.changeVectorElementType(MVT::i16);
7993 return DAG.getNode(ISD::BITCAST, DL, VT,
7994 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7995 }
7996
7997 if (EltVT == MVT::i1)
7998 return lowerVectorMaskSplat(Op, DAG);
7999 return SDValue();
8000 }
8002 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8003 case ISD::CONCAT_VECTORS: {
8004 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8005 // better than going through the stack, as the default expansion does.
8006 SDLoc DL(Op);
8007 MVT VT = Op.getSimpleValueType();
8008 MVT ContainerVT = VT;
8009 if (VT.isFixedLengthVector())
8010 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8011
8012 // Recursively split concat_vectors with more than 2 operands:
8013 //
8014 // concat_vector op1, op2, op3, op4
8015 // ->
8016 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8017 //
8018 // This reduces the length of the chain of vslideups and allows us to
8019 // perform the vslideups at a smaller LMUL, limited to MF2.
8020 if (Op.getNumOperands() > 2 &&
8021 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8022 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8024 size_t HalfNumOps = Op.getNumOperands() / 2;
8025 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8026 Op->ops().take_front(HalfNumOps));
8027 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8028 Op->ops().drop_front(HalfNumOps));
8029 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8030 }
8031
8032 unsigned NumOpElts =
8033 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8034 SDValue Vec = DAG.getUNDEF(VT);
8035 for (const auto &OpIdx : enumerate(Op->ops())) {
8036 SDValue SubVec = OpIdx.value();
8037 // Don't insert undef subvectors.
8038 if (SubVec.isUndef())
8039 continue;
8040 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8041 }
8042 return Vec;
8043 }
8044 case ISD::LOAD: {
8045 auto *Load = cast<LoadSDNode>(Op);
8046 EVT VT = Load->getValueType(0);
8047 if (VT == MVT::f64) {
8048 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8049 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8050
8051 // Replace a double precision load with two i32 loads and a BuildPairF64.
8052 SDLoc DL(Op);
8053 SDValue BasePtr = Load->getBasePtr();
8054 SDValue Chain = Load->getChain();
8055
8056 SDValue Lo =
8057 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8058 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8059 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8060 SDValue Hi = DAG.getLoad(
8061 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8062 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8063 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8064 Hi.getValue(1));
8065
8066 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8067 return DAG.getMergeValues({Pair, Chain}, DL);
8068 }
8069
8070 if (VT == MVT::bf16)
8071 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8072
8073 // Handle normal vector tuple load.
8074 if (VT.isRISCVVectorTuple()) {
8075 SDLoc DL(Op);
8076 MVT XLenVT = Subtarget.getXLenVT();
8077 unsigned NF = VT.getRISCVVectorTupleNumFields();
8078 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8079 unsigned NumElts = Sz / (NF * 8);
8080 int Log2LMUL = Log2_64(NumElts) - 3;
8081
8082 auto Flag = SDNodeFlags();
8083 Flag.setNoUnsignedWrap(true);
8084 SDValue Ret = DAG.getUNDEF(VT);
8085 SDValue BasePtr = Load->getBasePtr();
8086 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8087 VROffset =
8088 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8089 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8090 SmallVector<SDValue, 8> OutChains;
8091
8092 // Load NF vector registers and combine them to a vector tuple.
8093 for (unsigned i = 0; i < NF; ++i) {
8094 SDValue LoadVal = DAG.getLoad(
8095 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8096 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8097 OutChains.push_back(LoadVal.getValue(1));
8098 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8099 DAG.getTargetConstant(i, DL, MVT::i32));
8100 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8101 }
8102 return DAG.getMergeValues(
8103 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8104 }
8105
8106 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8107 return V;
8108 if (Op.getValueType().isFixedLengthVector())
8109 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8110 return Op;
8111 }
8112 case ISD::STORE: {
8113 auto *Store = cast<StoreSDNode>(Op);
8114 SDValue StoredVal = Store->getValue();
8115 EVT VT = StoredVal.getValueType();
8116 if (VT == MVT::f64) {
8117 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8118 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8119
8120 // Replace a double precision store with a SplitF64 and i32 stores.
8121 SDValue DL(Op);
8122 SDValue BasePtr = Store->getBasePtr();
8123 SDValue Chain = Store->getChain();
8124 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8125 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8126
8127 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8128 Store->getPointerInfo(), Store->getBaseAlign(),
8129 Store->getMemOperand()->getFlags());
8130 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8131 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8132 Store->getPointerInfo().getWithOffset(4),
8133 Store->getBaseAlign(),
8134 Store->getMemOperand()->getFlags());
8135 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8136 }
8137 if (VT == MVT::i64) {
8138 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8139 "Unexpected custom legalisation");
8140 if (Store->isTruncatingStore())
8141 return SDValue();
8142
8143 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8144 return SDValue();
8145
8146 SDLoc DL(Op);
8147 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8148 DAG.getTargetConstant(0, DL, MVT::i32));
8149 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8150 DAG.getTargetConstant(1, DL, MVT::i32));
8151
8152 return DAG.getMemIntrinsicNode(
8153 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8154 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8155 Store->getMemOperand());
8156 }
8157
8158 if (VT == MVT::bf16)
8159 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8160
8161 // Handle normal vector tuple store.
8162 if (VT.isRISCVVectorTuple()) {
8163 SDLoc DL(Op);
8164 MVT XLenVT = Subtarget.getXLenVT();
8165 unsigned NF = VT.getRISCVVectorTupleNumFields();
8166 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8167 unsigned NumElts = Sz / (NF * 8);
8168 int Log2LMUL = Log2_64(NumElts) - 3;
8169
8170 auto Flag = SDNodeFlags();
8171 Flag.setNoUnsignedWrap(true);
8172 SDValue Ret;
8173 SDValue Chain = Store->getChain();
8174 SDValue BasePtr = Store->getBasePtr();
8175 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8176 VROffset =
8177 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8178 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8179
8180 // Extract subregisters in a vector tuple and store them individually.
8181 for (unsigned i = 0; i < NF; ++i) {
8182 auto Extract =
8183 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8184 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8185 DAG.getTargetConstant(i, DL, MVT::i32));
8186 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8187 MachinePointerInfo(Store->getAddressSpace()),
8188 Store->getBaseAlign(),
8189 Store->getMemOperand()->getFlags());
8190 Chain = Ret.getValue(0);
8191 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8192 }
8193 return Ret;
8194 }
8195
8196 if (auto V = expandUnalignedRVVStore(Op, DAG))
8197 return V;
8198 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8199 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8200 return Op;
8201 }
8202 case ISD::MLOAD:
8203 case ISD::VP_LOAD:
8204 return lowerMaskedLoad(Op, DAG);
8205 case ISD::VP_LOAD_FF:
8206 return lowerLoadFF(Op, DAG);
8207 case ISD::MSTORE:
8208 case ISD::VP_STORE:
8209 return lowerMaskedStore(Op, DAG);
8211 return lowerVectorCompress(Op, DAG);
8212 case ISD::SELECT_CC: {
8213 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8214 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8215 // into separate SETCC+SELECT just like LegalizeDAG.
8216 SDValue Tmp1 = Op.getOperand(0);
8217 SDValue Tmp2 = Op.getOperand(1);
8218 SDValue True = Op.getOperand(2);
8219 SDValue False = Op.getOperand(3);
8220 EVT VT = Op.getValueType();
8221 SDValue CC = Op.getOperand(4);
8222 EVT CmpVT = Tmp1.getValueType();
8223 EVT CCVT =
8224 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8225 SDLoc DL(Op);
8226 SDValue Cond =
8227 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8228 return DAG.getSelect(DL, VT, Cond, True, False);
8229 }
8230 case ISD::SETCC: {
8231 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8232 if (OpVT.isScalarInteger()) {
8233 MVT VT = Op.getSimpleValueType();
8234 SDValue LHS = Op.getOperand(0);
8235 SDValue RHS = Op.getOperand(1);
8236 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8237 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8238 "Unexpected CondCode");
8239
8240 SDLoc DL(Op);
8241
8242 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8243 // convert this to the equivalent of (set(u)ge X, C+1) by using
8244 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8245 // in a register.
8246 if (isa<ConstantSDNode>(RHS)) {
8247 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8248 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8249 // If this is an unsigned compare and the constant is -1, incrementing
8250 // the constant would change behavior. The result should be false.
8251 if (CCVal == ISD::SETUGT && Imm == -1)
8252 return DAG.getConstant(0, DL, VT);
8253 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8254 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8255 SDValue SetCC = DAG.getSetCC(
8256 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8257 return DAG.getLogicalNOT(DL, SetCC, VT);
8258 }
8259 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8260 if (CCVal == ISD::SETUGT && Imm == 2047) {
8261 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8262 DAG.getShiftAmountConstant(11, OpVT, DL));
8263 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8264 ISD::SETNE);
8265 }
8266 }
8267
8268 // Not a constant we could handle, swap the operands and condition code to
8269 // SETLT/SETULT.
8270 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8271 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8272 }
8273
8274 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8275 return SplitVectorOp(Op, DAG);
8276
8277 return lowerToScalableOp(Op, DAG);
8278 }
8279 case ISD::ADD:
8280 case ISD::SUB:
8281 case ISD::MUL:
8282 case ISD::MULHS:
8283 case ISD::MULHU:
8284 case ISD::AND:
8285 case ISD::OR:
8286 case ISD::XOR:
8287 case ISD::SDIV:
8288 case ISD::SREM:
8289 case ISD::UDIV:
8290 case ISD::UREM:
8291 case ISD::BSWAP:
8292 case ISD::CTPOP:
8293 case ISD::VSELECT:
8294 return lowerToScalableOp(Op, DAG);
8295 case ISD::SHL:
8296 case ISD::SRA:
8297 case ISD::SRL:
8298 if (Op.getSimpleValueType().isFixedLengthVector())
8299 return lowerToScalableOp(Op, DAG);
8300 // This can be called for an i32 shift amount that needs to be promoted.
8301 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8302 "Unexpected custom legalisation");
8303 return SDValue();
8304 case ISD::FABS:
8305 case ISD::FNEG:
8306 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8307 return lowerFABSorFNEG(Op, DAG, Subtarget);
8308 [[fallthrough]];
8309 case ISD::FADD:
8310 case ISD::FSUB:
8311 case ISD::FMUL:
8312 case ISD::FDIV:
8313 case ISD::FSQRT:
8314 case ISD::FMA:
8315 case ISD::FMINNUM:
8316 case ISD::FMAXNUM:
8317 case ISD::FMINIMUMNUM:
8318 case ISD::FMAXIMUMNUM:
8319 if (isPromotedOpNeedingSplit(Op, Subtarget))
8320 return SplitVectorOp(Op, DAG);
8321 [[fallthrough]];
8322 case ISD::AVGFLOORS:
8323 case ISD::AVGFLOORU:
8324 case ISD::AVGCEILS:
8325 case ISD::AVGCEILU:
8326 case ISD::SMIN:
8327 case ISD::SMAX:
8328 case ISD::UMIN:
8329 case ISD::UMAX:
8330 case ISD::UADDSAT:
8331 case ISD::USUBSAT:
8332 case ISD::SADDSAT:
8333 case ISD::SSUBSAT:
8334 return lowerToScalableOp(Op, DAG);
8335 case ISD::ABDS:
8336 case ISD::ABDU: {
8337 SDLoc dl(Op);
8338 EVT VT = Op->getValueType(0);
8339 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8340 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8341 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8342
8343 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8344 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8345 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8346 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8347 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8348 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8349 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8350 }
8351 case ISD::ABS:
8352 case ISD::VP_ABS:
8353 return lowerABS(Op, DAG);
8354 case ISD::CTLZ:
8356 case ISD::CTTZ:
8358 if (Subtarget.hasStdExtZvbb())
8359 return lowerToScalableOp(Op, DAG);
8360 assert(Op.getOpcode() != ISD::CTTZ);
8361 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8362 case ISD::FCOPYSIGN:
8363 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8364 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8365 if (isPromotedOpNeedingSplit(Op, Subtarget))
8366 return SplitVectorOp(Op, DAG);
8367 return lowerToScalableOp(Op, DAG);
8368 case ISD::STRICT_FADD:
8369 case ISD::STRICT_FSUB:
8370 case ISD::STRICT_FMUL:
8371 case ISD::STRICT_FDIV:
8372 case ISD::STRICT_FSQRT:
8373 case ISD::STRICT_FMA:
8374 if (isPromotedOpNeedingSplit(Op, Subtarget))
8375 return SplitStrictFPVectorOp(Op, DAG);
8376 return lowerToScalableOp(Op, DAG);
8377 case ISD::STRICT_FSETCC:
8379 return lowerVectorStrictFSetcc(Op, DAG);
8380 case ISD::STRICT_FCEIL:
8381 case ISD::STRICT_FRINT:
8382 case ISD::STRICT_FFLOOR:
8383 case ISD::STRICT_FTRUNC:
8385 case ISD::STRICT_FROUND:
8387 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8388 case ISD::MGATHER:
8389 case ISD::VP_GATHER:
8390 return lowerMaskedGather(Op, DAG);
8391 case ISD::MSCATTER:
8392 case ISD::VP_SCATTER:
8393 return lowerMaskedScatter(Op, DAG);
8394 case ISD::GET_ROUNDING:
8395 return lowerGET_ROUNDING(Op, DAG);
8396 case ISD::SET_ROUNDING:
8397 return lowerSET_ROUNDING(Op, DAG);
8398 case ISD::GET_FPENV:
8399 return lowerGET_FPENV(Op, DAG);
8400 case ISD::SET_FPENV:
8401 return lowerSET_FPENV(Op, DAG);
8402 case ISD::RESET_FPENV:
8403 return lowerRESET_FPENV(Op, DAG);
8404 case ISD::GET_FPMODE:
8405 return lowerGET_FPMODE(Op, DAG);
8406 case ISD::SET_FPMODE:
8407 return lowerSET_FPMODE(Op, DAG);
8408 case ISD::RESET_FPMODE:
8409 return lowerRESET_FPMODE(Op, DAG);
8410 case ISD::EH_DWARF_CFA:
8411 return lowerEH_DWARF_CFA(Op, DAG);
8412 case ISD::VP_MERGE:
8413 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8414 return lowerVPMergeMask(Op, DAG);
8415 [[fallthrough]];
8416 case ISD::VP_SELECT:
8417 case ISD::VP_ADD:
8418 case ISD::VP_SUB:
8419 case ISD::VP_MUL:
8420 case ISD::VP_SDIV:
8421 case ISD::VP_UDIV:
8422 case ISD::VP_SREM:
8423 case ISD::VP_UREM:
8424 case ISD::VP_UADDSAT:
8425 case ISD::VP_USUBSAT:
8426 case ISD::VP_SADDSAT:
8427 case ISD::VP_SSUBSAT:
8428 case ISD::VP_LRINT:
8429 case ISD::VP_LLRINT:
8430 return lowerVPOp(Op, DAG);
8431 case ISD::VP_AND:
8432 case ISD::VP_OR:
8433 case ISD::VP_XOR:
8434 return lowerLogicVPOp(Op, DAG);
8435 case ISD::VP_FADD:
8436 case ISD::VP_FSUB:
8437 case ISD::VP_FMUL:
8438 case ISD::VP_FDIV:
8439 case ISD::VP_FNEG:
8440 case ISD::VP_FABS:
8441 case ISD::VP_SQRT:
8442 case ISD::VP_FMA:
8443 case ISD::VP_FMINNUM:
8444 case ISD::VP_FMAXNUM:
8445 case ISD::VP_FCOPYSIGN:
8446 if (isPromotedOpNeedingSplit(Op, Subtarget))
8447 return SplitVPOp(Op, DAG);
8448 [[fallthrough]];
8449 case ISD::VP_SRA:
8450 case ISD::VP_SRL:
8451 case ISD::VP_SHL:
8452 return lowerVPOp(Op, DAG);
8453 case ISD::VP_IS_FPCLASS:
8454 return LowerIS_FPCLASS(Op, DAG);
8455 case ISD::VP_SIGN_EXTEND:
8456 case ISD::VP_ZERO_EXTEND:
8457 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8458 return lowerVPExtMaskOp(Op, DAG);
8459 return lowerVPOp(Op, DAG);
8460 case ISD::VP_TRUNCATE:
8461 return lowerVectorTruncLike(Op, DAG);
8462 case ISD::VP_FP_EXTEND:
8463 case ISD::VP_FP_ROUND:
8464 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8465 case ISD::VP_SINT_TO_FP:
8466 case ISD::VP_UINT_TO_FP:
8467 if (Op.getValueType().isVector() &&
8468 ((Op.getValueType().getScalarType() == MVT::f16 &&
8469 (Subtarget.hasVInstructionsF16Minimal() &&
8470 !Subtarget.hasVInstructionsF16())) ||
8471 Op.getValueType().getScalarType() == MVT::bf16)) {
8472 if (isPromotedOpNeedingSplit(Op, Subtarget))
8473 return SplitVectorOp(Op, DAG);
8474 // int -> f32
8475 SDLoc DL(Op);
8476 MVT NVT =
8477 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8478 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8479 // f32 -> [b]f16
8480 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8481 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8482 }
8483 [[fallthrough]];
8484 case ISD::VP_FP_TO_SINT:
8485 case ISD::VP_FP_TO_UINT:
8486 if (SDValue Op1 = Op.getOperand(0);
8487 Op1.getValueType().isVector() &&
8488 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8489 (Subtarget.hasVInstructionsF16Minimal() &&
8490 !Subtarget.hasVInstructionsF16())) ||
8491 Op1.getValueType().getScalarType() == MVT::bf16)) {
8492 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8493 return SplitVectorOp(Op, DAG);
8494 // [b]f16 -> f32
8495 SDLoc DL(Op);
8496 MVT NVT = MVT::getVectorVT(MVT::f32,
8497 Op1.getValueType().getVectorElementCount());
8498 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8499 // f32 -> int
8500 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8501 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8502 }
8503 return lowerVPFPIntConvOp(Op, DAG);
8504 case ISD::VP_SETCC:
8505 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8506 return SplitVPOp(Op, DAG);
8507 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8508 return lowerVPSetCCMaskOp(Op, DAG);
8509 [[fallthrough]];
8510 case ISD::VP_SMIN:
8511 case ISD::VP_SMAX:
8512 case ISD::VP_UMIN:
8513 case ISD::VP_UMAX:
8514 case ISD::VP_BITREVERSE:
8515 case ISD::VP_BSWAP:
8516 return lowerVPOp(Op, DAG);
8517 case ISD::VP_CTLZ:
8518 case ISD::VP_CTLZ_ZERO_UNDEF:
8519 if (Subtarget.hasStdExtZvbb())
8520 return lowerVPOp(Op, DAG);
8521 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8522 case ISD::VP_CTTZ:
8523 case ISD::VP_CTTZ_ZERO_UNDEF:
8524 if (Subtarget.hasStdExtZvbb())
8525 return lowerVPOp(Op, DAG);
8526 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8527 case ISD::VP_CTPOP:
8528 return lowerVPOp(Op, DAG);
8529 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8530 return lowerVPStridedLoad(Op, DAG);
8531 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8532 return lowerVPStridedStore(Op, DAG);
8533 case ISD::VP_FCEIL:
8534 case ISD::VP_FFLOOR:
8535 case ISD::VP_FRINT:
8536 case ISD::VP_FNEARBYINT:
8537 case ISD::VP_FROUND:
8538 case ISD::VP_FROUNDEVEN:
8539 case ISD::VP_FROUNDTOZERO:
8540 if (isPromotedOpNeedingSplit(Op, Subtarget))
8541 return SplitVPOp(Op, DAG);
8542 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8543 case ISD::VP_FMAXIMUM:
8544 case ISD::VP_FMINIMUM:
8545 if (isPromotedOpNeedingSplit(Op, Subtarget))
8546 return SplitVPOp(Op, DAG);
8547 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8548 case ISD::EXPERIMENTAL_VP_SPLICE:
8549 return lowerVPSpliceExperimental(Op, DAG);
8550 case ISD::EXPERIMENTAL_VP_REVERSE:
8551 return lowerVPReverseExperimental(Op, DAG);
8552 case ISD::EXPERIMENTAL_VP_SPLAT:
8553 return lowerVPSplatExperimental(Op, DAG);
8554 case ISD::CLEAR_CACHE: {
8555 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8556 "llvm.clear_cache only needs custom lower on Linux targets");
8557 SDLoc DL(Op);
8558 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8559 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8560 Op.getOperand(2), Flags, DL);
8561 }
8563 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8565 return lowerINIT_TRAMPOLINE(Op, DAG);
8567 return lowerADJUST_TRAMPOLINE(Op, DAG);
8571 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8572 }
8573}
8574
8575SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8576 SDValue Start, SDValue End,
8577 SDValue Flags, SDLoc DL) const {
8578 MakeLibCallOptions CallOptions;
8579 std::pair<SDValue, SDValue> CallResult =
8580 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8581 {Start, End, Flags}, CallOptions, DL, InChain);
8582
8583 // This function returns void so only the out chain matters.
8584 return CallResult.second;
8585}
8586
8587SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8588 SelectionDAG &DAG) const {
8589 if (!Subtarget.is64Bit())
8590 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8591
8592 // Create an MCCodeEmitter to encode instructions.
8594 assert(TLO);
8595 MCContext &MCCtx = TLO->getContext();
8596
8597 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8598 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8599
8600 SDValue Root = Op.getOperand(0);
8601 SDValue Trmp = Op.getOperand(1); // trampoline
8602 SDLoc dl(Op);
8603
8604 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8605
8606 // We store in the trampoline buffer the following instructions and data.
8607 // Offset:
8608 // 0: auipc t2, 0
8609 // 4: ld t0, 24(t2)
8610 // 8: ld t2, 16(t2)
8611 // 12: jalr t0
8612 // 16: <StaticChainOffset>
8613 // 24: <FunctionAddressOffset>
8614 // 32:
8615 // Offset with branch control flow protection enabled:
8616 // 0: lpad <imm20>
8617 // 4: auipc t3, 0
8618 // 8: ld t2, 28(t3)
8619 // 12: ld t3, 20(t3)
8620 // 16: jalr t2
8621 // 20: <StaticChainOffset>
8622 // 28: <FunctionAddressOffset>
8623 // 36:
8624
8625 const bool HasCFBranch =
8626 Subtarget.hasStdExtZicfilp() &&
8628 "cf-protection-branch");
8629 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8630 const unsigned StaticChainOffset = StaticChainIdx * 4;
8631 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8632
8634 assert(STI);
8635 auto GetEncoding = [&](const MCInst &MC) {
8638 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8639 uint32_t Encoding = support::endian::read32le(CB.data());
8640 return Encoding;
8641 };
8642
8643 SmallVector<SDValue> OutChains;
8644
8645 SmallVector<uint32_t> Encodings;
8646 if (!HasCFBranch) {
8647 Encodings.append(
8648 {// auipc t2, 0
8649 // Loads the current PC into t2.
8650 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8651 // ld t0, 24(t2)
8652 // Loads the function address into t0. Note that we are using offsets
8653 // pc-relative to the first instruction of the trampoline.
8654 GetEncoding(MCInstBuilder(RISCV::LD)
8655 .addReg(RISCV::X5)
8656 .addReg(RISCV::X7)
8657 .addImm(FunctionAddressOffset)),
8658 // ld t2, 16(t2)
8659 // Load the value of the static chain.
8660 GetEncoding(MCInstBuilder(RISCV::LD)
8661 .addReg(RISCV::X7)
8662 .addReg(RISCV::X7)
8663 .addImm(StaticChainOffset)),
8664 // jalr t0
8665 // Jump to the function.
8666 GetEncoding(MCInstBuilder(RISCV::JALR)
8667 .addReg(RISCV::X0)
8668 .addReg(RISCV::X5)
8669 .addImm(0))});
8670 } else {
8671 Encodings.append(
8672 {// auipc x0, <imm20> (lpad <imm20>)
8673 // Landing pad.
8674 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8675 // auipc t3, 0
8676 // Loads the current PC into t3.
8677 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8678 // ld t2, (FunctionAddressOffset - 4)(t3)
8679 // Loads the function address into t2. Note that we are using offsets
8680 // pc-relative to the SECOND instruction of the trampoline.
8681 GetEncoding(MCInstBuilder(RISCV::LD)
8682 .addReg(RISCV::X7)
8683 .addReg(RISCV::X28)
8684 .addImm(FunctionAddressOffset - 4)),
8685 // ld t3, (StaticChainOffset - 4)(t3)
8686 // Load the value of the static chain.
8687 GetEncoding(MCInstBuilder(RISCV::LD)
8688 .addReg(RISCV::X28)
8689 .addReg(RISCV::X28)
8690 .addImm(StaticChainOffset - 4)),
8691 // jalr t2
8692 // Software-guarded jump to the function.
8693 GetEncoding(MCInstBuilder(RISCV::JALR)
8694 .addReg(RISCV::X0)
8695 .addReg(RISCV::X7)
8696 .addImm(0))});
8697 }
8698
8699 // Store encoded instructions.
8700 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8701 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8702 DAG.getConstant(Idx * 4, dl, MVT::i64))
8703 : Trmp;
8704 OutChains.push_back(DAG.getTruncStore(
8705 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8706 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8707 }
8708
8709 // Now store the variable part of the trampoline.
8710 SDValue FunctionAddress = Op.getOperand(2);
8711 SDValue StaticChain = Op.getOperand(3);
8712
8713 // Store the given static chain and function pointer in the trampoline buffer.
8714 struct OffsetValuePair {
8715 const unsigned Offset;
8716 const SDValue Value;
8717 SDValue Addr = SDValue(); // Used to cache the address.
8718 } OffsetValues[] = {
8719 {StaticChainOffset, StaticChain},
8720 {FunctionAddressOffset, FunctionAddress},
8721 };
8722 for (auto &OffsetValue : OffsetValues) {
8723 SDValue Addr =
8724 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8725 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8726 OffsetValue.Addr = Addr;
8727 OutChains.push_back(
8728 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8729 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8730 }
8731
8732 assert(OutChains.size() == StaticChainIdx + 2 &&
8733 "Size of OutChains mismatch");
8734 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8735
8736 // The end of instructions of trampoline is the same as the static chain
8737 // address that we computed earlier.
8738 SDValue EndOfTrmp = OffsetValues[0].Addr;
8739
8740 // Call clear cache on the trampoline instructions.
8741 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8742 Trmp, EndOfTrmp);
8743
8744 return Chain;
8745}
8746
8747SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8748 SelectionDAG &DAG) const {
8749 if (!Subtarget.is64Bit())
8750 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8751
8752 return Op.getOperand(0);
8753}
8754
8755SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8756 SelectionDAG &DAG) const {
8757 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8758 // TODO: There are many other sub-cases we could potentially lower, are
8759 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8760 SDLoc DL(Op);
8761 MVT VT = Op.getSimpleValueType();
8762 SDValue Accum = Op.getOperand(0);
8763 assert(Accum.getSimpleValueType() == VT &&
8764 VT.getVectorElementType() == MVT::i32);
8765 SDValue A = Op.getOperand(1);
8766 SDValue B = Op.getOperand(2);
8767 MVT ArgVT = A.getSimpleValueType();
8768 assert(ArgVT == B.getSimpleValueType() &&
8769 ArgVT.getVectorElementType() == MVT::i8);
8770 (void)ArgVT;
8771
8772 // The zvqdotq pseudos are defined with sources and destination both
8773 // being i32. This cast is needed for correctness to avoid incorrect
8774 // .vx matching of i8 splats.
8775 A = DAG.getBitcast(VT, A);
8776 B = DAG.getBitcast(VT, B);
8777
8778 MVT ContainerVT = VT;
8779 if (VT.isFixedLengthVector()) {
8780 ContainerVT = getContainerForFixedLengthVector(VT);
8781 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8782 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8783 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8784 }
8785
8786 unsigned Opc;
8787 switch (Op.getOpcode()) {
8789 Opc = RISCVISD::VQDOT_VL;
8790 break;
8792 Opc = RISCVISD::VQDOTU_VL;
8793 break;
8795 Opc = RISCVISD::VQDOTSU_VL;
8796 break;
8797 default:
8798 llvm_unreachable("Unexpected opcode");
8799 }
8800 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8801 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8802 if (VT.isFixedLengthVector())
8803 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8804 return Res;
8805}
8806
8808 SelectionDAG &DAG, unsigned Flags) {
8809 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8810}
8811
8813 SelectionDAG &DAG, unsigned Flags) {
8814 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8815 Flags);
8816}
8817
8819 SelectionDAG &DAG, unsigned Flags) {
8820 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8821 N->getOffset(), Flags);
8822}
8823
8825 SelectionDAG &DAG, unsigned Flags) {
8826 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8827}
8828
8830 EVT Ty, SelectionDAG &DAG) {
8832 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8833 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8834 return DAG.getLoad(
8835 Ty, DL, DAG.getEntryNode(), LC,
8837}
8838
8840 EVT Ty, SelectionDAG &DAG) {
8842 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8843 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8844 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8845 return DAG.getLoad(
8846 Ty, DL, DAG.getEntryNode(), LC,
8848}
8849
8850template <class NodeTy>
8851SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8852 bool IsLocal, bool IsExternWeak) const {
8853 SDLoc DL(N);
8854 EVT Ty = getPointerTy(DAG.getDataLayout());
8855
8856 // When HWASAN is used and tagging of global variables is enabled
8857 // they should be accessed via the GOT, since the tagged address of a global
8858 // is incompatible with existing code models. This also applies to non-pic
8859 // mode.
8860 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8861 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8862 if (IsLocal && !Subtarget.allowTaggedGlobals())
8863 // Use PC-relative addressing to access the symbol. This generates the
8864 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8865 // %pcrel_lo(auipc)).
8866 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8867
8868 // Use PC-relative addressing to access the GOT for this symbol, then load
8869 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8870 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8871 SDValue Load =
8872 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8878 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8879 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8880 return Load;
8881 }
8882
8883 switch (getTargetMachine().getCodeModel()) {
8884 default:
8885 reportFatalUsageError("Unsupported code model for lowering");
8886 case CodeModel::Small: {
8887 // Generate a sequence for accessing addresses within the first 2 GiB of
8888 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8889 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8890 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8891 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8892 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8893 }
8894 case CodeModel::Medium: {
8895 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8896 if (IsExternWeak) {
8897 // An extern weak symbol may be undefined, i.e. have value 0, which may
8898 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8899 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8900 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8901 SDValue Load =
8902 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8908 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8909 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8910 return Load;
8911 }
8912
8913 // Generate a sequence for accessing addresses within any 2GiB range within
8914 // the address space. This generates the pattern (PseudoLLA sym), which
8915 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8916 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8917 }
8918 case CodeModel::Large: {
8919 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8920 return getLargeGlobalAddress(G, DL, Ty, DAG);
8921
8922 // Using pc-relative mode for other node type.
8923 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8924 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8925 }
8926 }
8927}
8928
8929SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8930 SelectionDAG &DAG) const {
8931 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8932 assert(N->getOffset() == 0 && "unexpected offset in global node");
8933 const GlobalValue *GV = N->getGlobal();
8934 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8935}
8936
8937SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8938 SelectionDAG &DAG) const {
8939 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8940
8941 return getAddr(N, DAG);
8942}
8943
8944SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8945 SelectionDAG &DAG) const {
8946 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8947
8948 return getAddr(N, DAG);
8949}
8950
8951SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8952 SelectionDAG &DAG) const {
8953 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8954
8955 return getAddr(N, DAG);
8956}
8957
8958SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8959 SelectionDAG &DAG,
8960 bool UseGOT) const {
8961 SDLoc DL(N);
8962 EVT Ty = getPointerTy(DAG.getDataLayout());
8963 const GlobalValue *GV = N->getGlobal();
8964 MVT XLenVT = Subtarget.getXLenVT();
8965
8966 if (UseGOT) {
8967 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8968 // load the address from the GOT and add the thread pointer. This generates
8969 // the pattern (PseudoLA_TLS_IE sym), which expands to
8970 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8971 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8972 SDValue Load =
8973 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8979 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8980 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8981
8982 // Add the thread pointer.
8983 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8984 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8985 }
8986
8987 // Generate a sequence for accessing the address relative to the thread
8988 // pointer, with the appropriate adjustment for the thread pointer offset.
8989 // This generates the pattern
8990 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8991 SDValue AddrHi =
8993 SDValue AddrAdd =
8995 SDValue AddrLo =
8997
8998 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8999 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9000 SDValue MNAdd =
9001 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9002 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9003}
9004
9005SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9006 SelectionDAG &DAG) const {
9007 SDLoc DL(N);
9008 EVT Ty = getPointerTy(DAG.getDataLayout());
9009 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9010 const GlobalValue *GV = N->getGlobal();
9011
9012 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9013 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9014 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9015 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9016 SDValue Load =
9017 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9018
9019 // Prepare argument list to generate call.
9021 Args.emplace_back(Load, CallTy);
9022
9023 // Setup call to __tls_get_addr.
9025 CLI.setDebugLoc(DL)
9026 .setChain(DAG.getEntryNode())
9027 .setLibCallee(CallingConv::C, CallTy,
9028 DAG.getExternalSymbol("__tls_get_addr", Ty),
9029 std::move(Args));
9030
9031 return LowerCallTo(CLI).first;
9032}
9033
9034SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9035 SelectionDAG &DAG) const {
9036 SDLoc DL(N);
9037 EVT Ty = getPointerTy(DAG.getDataLayout());
9038 const GlobalValue *GV = N->getGlobal();
9039
9040 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9041 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9042 //
9043 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9044 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9045 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9046 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9047 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9048 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9049}
9050
9051SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9052 SelectionDAG &DAG) const {
9053 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9054 assert(N->getOffset() == 0 && "unexpected offset in global node");
9055
9056 if (DAG.getTarget().useEmulatedTLS())
9057 return LowerToTLSEmulatedModel(N, DAG);
9058
9060
9063 reportFatalUsageError("In GHC calling convention TLS is not supported");
9064
9065 SDValue Addr;
9066 switch (Model) {
9068 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9069 break;
9071 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9072 break;
9075 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9076 : getDynamicTLSAddr(N, DAG);
9077 break;
9078 }
9079
9080 return Addr;
9081}
9082
9083// Return true if Val is equal to (setcc LHS, RHS, CC).
9084// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9085// Otherwise, return std::nullopt.
9086static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9087 ISD::CondCode CC, SDValue Val) {
9088 assert(Val->getOpcode() == ISD::SETCC);
9089 SDValue LHS2 = Val.getOperand(0);
9090 SDValue RHS2 = Val.getOperand(1);
9091 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9092
9093 if (LHS == LHS2 && RHS == RHS2) {
9094 if (CC == CC2)
9095 return true;
9096 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9097 return false;
9098 } else if (LHS == RHS2 && RHS == LHS2) {
9100 if (CC == CC2)
9101 return true;
9102 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9103 return false;
9104 }
9105
9106 return std::nullopt;
9107}
9108
9110 const RISCVSubtarget &Subtarget) {
9111 SDValue CondV = N->getOperand(0);
9112 SDValue TrueV = N->getOperand(1);
9113 SDValue FalseV = N->getOperand(2);
9114 MVT VT = N->getSimpleValueType(0);
9115 SDLoc DL(N);
9116
9117 if (!Subtarget.hasConditionalMoveFusion()) {
9118 // (select c, -1, y) -> -c | y
9119 if (isAllOnesConstant(TrueV)) {
9120 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9121 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9122 }
9123 // (select c, y, -1) -> (c-1) | y
9124 if (isAllOnesConstant(FalseV)) {
9125 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9126 DAG.getAllOnesConstant(DL, VT));
9127 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9128 }
9129
9130 // (select c, 0, y) -> (c-1) & y
9131 if (isNullConstant(TrueV)) {
9132 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9133 DAG.getAllOnesConstant(DL, VT));
9134 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9135 }
9136 // (select c, y, 0) -> -c & y
9137 if (isNullConstant(FalseV)) {
9138 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9139 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9140 }
9141 }
9142
9143 // select c, ~x, x --> xor -c, x
9144 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9145 const APInt &TrueVal = TrueV->getAsAPIntVal();
9146 const APInt &FalseVal = FalseV->getAsAPIntVal();
9147 if (~TrueVal == FalseVal) {
9148 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9149 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9150 }
9151 }
9152
9153 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9154 // when both truev and falsev are also setcc.
9155 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9156 FalseV.getOpcode() == ISD::SETCC) {
9157 SDValue LHS = CondV.getOperand(0);
9158 SDValue RHS = CondV.getOperand(1);
9159 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9160
9161 // (select x, x, y) -> x | y
9162 // (select !x, x, y) -> x & y
9163 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9164 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9165 DAG.getFreeze(FalseV));
9166 }
9167 // (select x, y, x) -> x & y
9168 // (select !x, y, x) -> x | y
9169 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9170 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9171 DAG.getFreeze(TrueV), FalseV);
9172 }
9173 }
9174
9175 return SDValue();
9176}
9177
9178// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9179// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9180// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9181// being `0` or `-1`. In such cases we can replace `select` with `and`.
9182// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9183// than `c0`?
9184static SDValue
9186 const RISCVSubtarget &Subtarget) {
9187 if (Subtarget.hasShortForwardBranchOpt())
9188 return SDValue();
9189
9190 unsigned SelOpNo = 0;
9191 SDValue Sel = BO->getOperand(0);
9192 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9193 SelOpNo = 1;
9194 Sel = BO->getOperand(1);
9195 }
9196
9197 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9198 return SDValue();
9199
9200 unsigned ConstSelOpNo = 1;
9201 unsigned OtherSelOpNo = 2;
9202 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9203 ConstSelOpNo = 2;
9204 OtherSelOpNo = 1;
9205 }
9206 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9207 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9208 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9209 return SDValue();
9210
9211 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9212 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9213 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9214 return SDValue();
9215
9216 SDLoc DL(Sel);
9217 EVT VT = BO->getValueType(0);
9218
9219 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9220 if (SelOpNo == 1)
9221 std::swap(NewConstOps[0], NewConstOps[1]);
9222
9223 SDValue NewConstOp =
9224 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9225 if (!NewConstOp)
9226 return SDValue();
9227
9228 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9229 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9230 return SDValue();
9231
9232 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9233 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9234 if (SelOpNo == 1)
9235 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9236 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9237
9238 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9239 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9240 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9241}
9242
9243SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9244 SDValue CondV = Op.getOperand(0);
9245 SDValue TrueV = Op.getOperand(1);
9246 SDValue FalseV = Op.getOperand(2);
9247 SDLoc DL(Op);
9248 MVT VT = Op.getSimpleValueType();
9249 MVT XLenVT = Subtarget.getXLenVT();
9250
9251 // Lower vector SELECTs to VSELECTs by splatting the condition.
9252 if (VT.isVector()) {
9253 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9254 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9255 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9256 }
9257
9258 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9259 // nodes to implement the SELECT. Performing the lowering here allows for
9260 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9261 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9262 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
9263 VT.isScalarInteger()) {
9264 // (select c, t, 0) -> (czero_eqz t, c)
9265 if (isNullConstant(FalseV))
9266 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9267 // (select c, 0, f) -> (czero_nez f, c)
9268 if (isNullConstant(TrueV))
9269 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9270
9271 // Check to see if a given operation is a 'NOT', if so return the negated
9272 // operand
9273 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9274 using namespace llvm::SDPatternMatch;
9275 SDValue Xor;
9276 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9277 return Xor;
9278 }
9279 return std::nullopt;
9280 };
9281 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9282 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9283 if (TrueV.getOpcode() == ISD::AND &&
9284 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9285 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9286 ? getNotOperand(TrueV.getOperand(1))
9287 : getNotOperand(TrueV.getOperand(0));
9288 if (NotOperand) {
9289 SDValue CMOV =
9290 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9291 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9292 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9293 }
9294 return DAG.getNode(
9295 ISD::OR, DL, VT, TrueV,
9296 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9297 }
9298
9299 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9300 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9301 if (FalseV.getOpcode() == ISD::AND &&
9302 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9303 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9304 ? getNotOperand(FalseV.getOperand(1))
9305 : getNotOperand(FalseV.getOperand(0));
9306 if (NotOperand) {
9307 SDValue CMOV =
9308 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9309 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9310 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9311 }
9312 return DAG.getNode(
9313 ISD::OR, DL, VT, FalseV,
9314 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9315 }
9316
9317 // Try some other optimizations before falling back to generic lowering.
9318 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
9319 return V;
9320
9321 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9322 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9323 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9324 const APInt &TrueVal = TrueV->getAsAPIntVal();
9325 const APInt &FalseVal = FalseV->getAsAPIntVal();
9326
9327 // Prefer these over Zicond to avoid materializing an immediate:
9328 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9329 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9330 if (CondV.getOpcode() == ISD::SETCC &&
9331 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9332 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9333 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9334 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9335 int64_t TrueImm = TrueVal.getSExtValue();
9336 int64_t FalseImm = FalseVal.getSExtValue();
9337 if (CCVal == ISD::SETGT)
9338 std::swap(TrueImm, FalseImm);
9339 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9340 isInt<12>(TrueImm - FalseImm)) {
9341 SDValue SRA =
9342 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9343 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9344 SDValue AND =
9345 DAG.getNode(ISD::AND, DL, VT, SRA,
9346 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9347 return DAG.getNode(ISD::ADD, DL, VT, AND,
9348 DAG.getSignedConstant(FalseImm, DL, VT));
9349 }
9350 }
9351 }
9352
9353 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9354 // a constant in register
9355 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9356 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9357 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9358 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9359 }
9360 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9361 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9362 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9363 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9364 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9365 }
9366
9367 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9368 const int DeltaCost = RISCVMatInt::getIntMatCost(
9369 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9370 // Does the addend fold into an ADDI
9371 if (Addend.isSignedIntN(12))
9372 return DeltaCost;
9373 const int AddendCost = RISCVMatInt::getIntMatCost(
9374 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9375 return AddendCost + DeltaCost;
9376 };
9377 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9378 getCost(TrueVal - FalseVal, FalseVal);
9379 SDValue LHSVal = DAG.getConstant(
9380 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9381 SDValue CMOV =
9382 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9383 DL, VT, LHSVal, CondV);
9384 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9385 }
9386
9387 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9388 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9389 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9390 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9391 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9392 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9393 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9394 // Fall back to XORI if Const == -0x800
9395 if (RawConstVal == -0x800) {
9396 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9397 SDValue CMOV =
9398 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9399 DL, VT, XorOp, CondV);
9400 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9401 }
9402 // Efficient only if the constant and its negation fit into `ADDI`
9403 // Prefer Add/Sub over Xor since can be compressed for small immediates
9404 if (isInt<12>(RawConstVal)) {
9405 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9406 SDValue CMOV =
9407 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9408 DL, VT, SubOp, CondV);
9409 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9410 }
9411 }
9412
9413 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9414 // Unless we have the short forward branch optimization.
9415 if (!Subtarget.hasConditionalMoveFusion())
9416 return DAG.getNode(
9417 ISD::OR, DL, VT,
9418 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9419 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9421 }
9422
9423 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
9424 return V;
9425
9426 if (Op.hasOneUse()) {
9427 unsigned UseOpc = Op->user_begin()->getOpcode();
9428 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9429 SDNode *BinOp = *Op->user_begin();
9430 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9431 DAG, Subtarget)) {
9432 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9433 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9434 // may return a constant node and cause crash in lowerSELECT.
9435 if (NewSel.getOpcode() == ISD::SELECT)
9436 return lowerSELECT(NewSel, DAG);
9437 return NewSel;
9438 }
9439 }
9440 }
9441
9442 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9443 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9444 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9445 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9446 if (FPTV && FPFV) {
9447 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9448 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9449 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9450 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9451 DAG.getConstant(1, DL, XLenVT));
9452 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9453 }
9454 }
9455
9456 // If the condition is not an integer SETCC which operates on XLenVT, we need
9457 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9458 // (select condv, truev, falsev)
9459 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9460 if (CondV.getOpcode() != ISD::SETCC ||
9461 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9462 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9463 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9464
9465 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9466
9467 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9468 }
9469
9470 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9471 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9472 // advantage of the integer compare+branch instructions. i.e.:
9473 // (select (setcc lhs, rhs, cc), truev, falsev)
9474 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9475 SDValue LHS = CondV.getOperand(0);
9476 SDValue RHS = CondV.getOperand(1);
9477 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9478
9479 // Special case for a select of 2 constants that have a difference of 1.
9480 // Normally this is done by DAGCombine, but if the select is introduced by
9481 // type legalization or op legalization, we miss it. Restricting to SETLT
9482 // case for now because that is what signed saturating add/sub need.
9483 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9484 // but we would probably want to swap the true/false values if the condition
9485 // is SETGE/SETLE to avoid an XORI.
9486 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9487 CCVal == ISD::SETLT) {
9488 const APInt &TrueVal = TrueV->getAsAPIntVal();
9489 const APInt &FalseVal = FalseV->getAsAPIntVal();
9490 if (TrueVal - 1 == FalseVal)
9491 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9492 if (TrueVal + 1 == FalseVal)
9493 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9494 }
9495
9496 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9497 // 1 < x ? x : 1 -> 0 < x ? x : 1
9498 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9499 RHS == TrueV && LHS == FalseV) {
9500 LHS = DAG.getConstant(0, DL, VT);
9501 // 0 <u x is the same as x != 0.
9502 if (CCVal == ISD::SETULT) {
9503 std::swap(LHS, RHS);
9504 CCVal = ISD::SETNE;
9505 }
9506 }
9507
9508 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9509 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9510 RHS == FalseV) {
9511 RHS = DAG.getConstant(0, DL, VT);
9512 }
9513
9514 SDValue TargetCC = DAG.getCondCode(CCVal);
9515
9516 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9517 // (select (setcc lhs, rhs, CC), constant, falsev)
9518 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9519 std::swap(TrueV, FalseV);
9520 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9521 }
9522
9523 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9524 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9525}
9526
9527SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9528 SDValue CondV = Op.getOperand(1);
9529 SDLoc DL(Op);
9530 MVT XLenVT = Subtarget.getXLenVT();
9531
9532 if (CondV.getOpcode() == ISD::SETCC &&
9533 CondV.getOperand(0).getValueType() == XLenVT) {
9534 SDValue LHS = CondV.getOperand(0);
9535 SDValue RHS = CondV.getOperand(1);
9536 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9537
9538 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9539
9540 SDValue TargetCC = DAG.getCondCode(CCVal);
9541 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9542 LHS, RHS, TargetCC, Op.getOperand(2));
9543 }
9544
9545 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9546 CondV, DAG.getConstant(0, DL, XLenVT),
9547 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9548}
9549
9550SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9553
9554 SDLoc DL(Op);
9555 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9557
9558 // vastart just stores the address of the VarArgsFrameIndex slot into the
9559 // memory location argument.
9560 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9561 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9562 MachinePointerInfo(SV));
9563}
9564
9565SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9566 SelectionDAG &DAG) const {
9567 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9569 MachineFrameInfo &MFI = MF.getFrameInfo();
9570 MFI.setFrameAddressIsTaken(true);
9571 Register FrameReg = RI.getFrameRegister(MF);
9572 int XLenInBytes = Subtarget.getXLen() / 8;
9573
9574 EVT VT = Op.getValueType();
9575 SDLoc DL(Op);
9576 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9577 unsigned Depth = Op.getConstantOperandVal(0);
9578 while (Depth--) {
9579 int Offset = -(XLenInBytes * 2);
9580 SDValue Ptr = DAG.getNode(
9581 ISD::ADD, DL, VT, FrameAddr,
9583 FrameAddr =
9584 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9585 }
9586 return FrameAddr;
9587}
9588
9589SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9590 SelectionDAG &DAG) const {
9591 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9593 MachineFrameInfo &MFI = MF.getFrameInfo();
9594 MFI.setReturnAddressIsTaken(true);
9595 MVT XLenVT = Subtarget.getXLenVT();
9596 int XLenInBytes = Subtarget.getXLen() / 8;
9597
9598 EVT VT = Op.getValueType();
9599 SDLoc DL(Op);
9600 unsigned Depth = Op.getConstantOperandVal(0);
9601 if (Depth) {
9602 int Off = -XLenInBytes;
9603 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9604 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9605 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9606 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9608 }
9609
9610 // Return the value of the return address register, marking it an implicit
9611 // live-in.
9612 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9613 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9614}
9615
9616SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9617 SelectionDAG &DAG) const {
9618 SDLoc DL(Op);
9619 SDValue Lo = Op.getOperand(0);
9620 SDValue Hi = Op.getOperand(1);
9621 SDValue Shamt = Op.getOperand(2);
9622 EVT VT = Lo.getValueType();
9623
9624 // if Shamt-XLEN < 0: // Shamt < XLEN
9625 // Lo = Lo << Shamt
9626 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9627 // else:
9628 // Lo = 0
9629 // Hi = Lo << (Shamt-XLEN)
9630
9631 SDValue Zero = DAG.getConstant(0, DL, VT);
9632 SDValue One = DAG.getConstant(1, DL, VT);
9633 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9634 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9635 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9636 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9637
9638 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9639 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9640 SDValue ShiftRightLo =
9641 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9642 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9643 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9644 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9645
9646 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9647
9648 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9649 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9650
9651 SDValue Parts[2] = {Lo, Hi};
9652 return DAG.getMergeValues(Parts, DL);
9653}
9654
9655SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9656 bool IsSRA) const {
9657 SDLoc DL(Op);
9658 SDValue Lo = Op.getOperand(0);
9659 SDValue Hi = Op.getOperand(1);
9660 SDValue Shamt = Op.getOperand(2);
9661 EVT VT = Lo.getValueType();
9662
9663 // SRA expansion:
9664 // if Shamt-XLEN < 0: // Shamt < XLEN
9665 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9666 // Hi = Hi >>s Shamt
9667 // else:
9668 // Lo = Hi >>s (Shamt-XLEN);
9669 // Hi = Hi >>s (XLEN-1)
9670 //
9671 // SRL expansion:
9672 // if Shamt-XLEN < 0: // Shamt < XLEN
9673 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9674 // Hi = Hi >>u Shamt
9675 // else:
9676 // Lo = Hi >>u (Shamt-XLEN);
9677 // Hi = 0;
9678
9679 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9680
9681 SDValue Zero = DAG.getConstant(0, DL, VT);
9682 SDValue One = DAG.getConstant(1, DL, VT);
9683 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9684 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9685 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9686 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9687
9688 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9689 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9690 SDValue ShiftLeftHi =
9691 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9692 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9693 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9694 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9695 SDValue HiFalse =
9696 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9697
9698 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9699
9700 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9701 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9702
9703 SDValue Parts[2] = {Lo, Hi};
9704 return DAG.getMergeValues(Parts, DL);
9705}
9706
9707// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9708// legal equivalently-sized i8 type, so we can use that as a go-between.
9709SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9710 SelectionDAG &DAG) const {
9711 SDLoc DL(Op);
9712 MVT VT = Op.getSimpleValueType();
9713 SDValue SplatVal = Op.getOperand(0);
9714 // All-zeros or all-ones splats are handled specially.
9715 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9716 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9717 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9718 }
9719 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9720 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9721 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9722 }
9723 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9724 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9725 DAG.getConstant(1, DL, SplatVal.getValueType()));
9726 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9727 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9728 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9729}
9730
9731// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9732// illegal (currently only vXi64 RV32).
9733// FIXME: We could also catch non-constant sign-extended i32 values and lower
9734// them to VMV_V_X_VL.
9735SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9736 SelectionDAG &DAG) const {
9737 SDLoc DL(Op);
9738 MVT VecVT = Op.getSimpleValueType();
9739 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9740 "Unexpected SPLAT_VECTOR_PARTS lowering");
9741
9742 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9743 SDValue Lo = Op.getOperand(0);
9744 SDValue Hi = Op.getOperand(1);
9745
9746 MVT ContainerVT = VecVT;
9747 if (VecVT.isFixedLengthVector())
9748 ContainerVT = getContainerForFixedLengthVector(VecVT);
9749
9750 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9751
9752 SDValue Res =
9753 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9754
9755 if (VecVT.isFixedLengthVector())
9756 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9757
9758 return Res;
9759}
9760
9761// Custom-lower extensions from mask vectors by using a vselect either with 1
9762// for zero/any-extension or -1 for sign-extension:
9763// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9764// Note that any-extension is lowered identically to zero-extension.
9765SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9766 int64_t ExtTrueVal) const {
9767 SDLoc DL(Op);
9768 MVT VecVT = Op.getSimpleValueType();
9769 SDValue Src = Op.getOperand(0);
9770 // Only custom-lower extensions from mask types
9771 assert(Src.getValueType().isVector() &&
9772 Src.getValueType().getVectorElementType() == MVT::i1);
9773
9774 if (VecVT.isScalableVector()) {
9775 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9776 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9777 if (Src.getOpcode() == ISD::XOR &&
9778 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9779 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9780 SplatTrueVal);
9781 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9782 }
9783
9784 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9785 MVT I1ContainerVT =
9786 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9787
9788 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9789
9790 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9791
9792 MVT XLenVT = Subtarget.getXLenVT();
9793 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9794 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9795
9796 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9797 SDValue Xor = Src.getOperand(0);
9798 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9799 SDValue ScalableOnes = Xor.getOperand(1);
9800 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9801 ScalableOnes.getOperand(0).isUndef() &&
9803 ScalableOnes.getOperand(1).getNode())) {
9804 CC = Xor.getOperand(0);
9805 std::swap(SplatZero, SplatTrueVal);
9806 }
9807 }
9808 }
9809
9810 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9811 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9812 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9813 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9814 SDValue Select =
9815 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9816 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9817
9818 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9819}
9820
9821// Custom-lower truncations from vectors to mask vectors by using a mask and a
9822// setcc operation:
9823// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9824SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9825 SelectionDAG &DAG) const {
9826 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9827 SDLoc DL(Op);
9828 EVT MaskVT = Op.getValueType();
9829 // Only expect to custom-lower truncations to mask types
9830 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9831 "Unexpected type for vector mask lowering");
9832 SDValue Src = Op.getOperand(0);
9833 MVT VecVT = Src.getSimpleValueType();
9834 SDValue Mask, VL;
9835 if (IsVPTrunc) {
9836 Mask = Op.getOperand(1);
9837 VL = Op.getOperand(2);
9838 }
9839 // If this is a fixed vector, we need to convert it to a scalable vector.
9840 MVT ContainerVT = VecVT;
9841
9842 if (VecVT.isFixedLengthVector()) {
9843 ContainerVT = getContainerForFixedLengthVector(VecVT);
9844 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9845 if (IsVPTrunc) {
9846 MVT MaskContainerVT =
9847 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9848 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9849 }
9850 }
9851
9852 if (!IsVPTrunc) {
9853 std::tie(Mask, VL) =
9854 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9855 }
9856
9857 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9858 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9859
9860 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9861 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9862 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9863 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9864
9865 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9866 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9867 DAG.getUNDEF(ContainerVT), Mask, VL);
9868 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9869 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9870 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9871 if (MaskVT.isFixedLengthVector())
9872 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9873 return Trunc;
9874}
9875
9876SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9877 SelectionDAG &DAG) const {
9878 unsigned Opc = Op.getOpcode();
9879 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9880 SDLoc DL(Op);
9881
9882 MVT VT = Op.getSimpleValueType();
9883 // Only custom-lower vector truncates
9884 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9885
9886 // Truncates to mask types are handled differently
9887 if (VT.getVectorElementType() == MVT::i1)
9888 return lowerVectorMaskTruncLike(Op, DAG);
9889
9890 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9891 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9892 // truncate by one power of two at a time.
9893 MVT DstEltVT = VT.getVectorElementType();
9894
9895 SDValue Src = Op.getOperand(0);
9896 MVT SrcVT = Src.getSimpleValueType();
9897 MVT SrcEltVT = SrcVT.getVectorElementType();
9898
9899 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9900 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9901 "Unexpected vector truncate lowering");
9902
9903 MVT ContainerVT = SrcVT;
9904 SDValue Mask, VL;
9905 if (IsVPTrunc) {
9906 Mask = Op.getOperand(1);
9907 VL = Op.getOperand(2);
9908 }
9909 if (SrcVT.isFixedLengthVector()) {
9910 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9911 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9912 if (IsVPTrunc) {
9913 MVT MaskVT = getMaskTypeFor(ContainerVT);
9914 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9915 }
9916 }
9917
9918 SDValue Result = Src;
9919 if (!IsVPTrunc) {
9920 std::tie(Mask, VL) =
9921 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9922 }
9923
9924 unsigned NewOpc;
9926 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9927 else if (Opc == ISD::TRUNCATE_USAT_U)
9928 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9929 else
9930 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9931
9932 do {
9933 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9934 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9935 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9936 } while (SrcEltVT != DstEltVT);
9937
9938 if (SrcVT.isFixedLengthVector())
9939 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9940
9941 return Result;
9942}
9943
9944SDValue
9945RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9946 SelectionDAG &DAG) const {
9947 SDLoc DL(Op);
9948 SDValue Chain = Op.getOperand(0);
9949 SDValue Src = Op.getOperand(1);
9950 MVT VT = Op.getSimpleValueType();
9951 MVT SrcVT = Src.getSimpleValueType();
9952 MVT ContainerVT = VT;
9953 if (VT.isFixedLengthVector()) {
9954 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9955 ContainerVT =
9956 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9957 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9958 }
9959
9960 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9961
9962 // RVV can only widen/truncate fp to types double/half the size as the source.
9963 if ((VT.getVectorElementType() == MVT::f64 &&
9964 (SrcVT.getVectorElementType() == MVT::f16 ||
9965 SrcVT.getVectorElementType() == MVT::bf16)) ||
9966 ((VT.getVectorElementType() == MVT::f16 ||
9967 VT.getVectorElementType() == MVT::bf16) &&
9968 SrcVT.getVectorElementType() == MVT::f64)) {
9969 // For double rounding, the intermediate rounding should be round-to-odd.
9970 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9971 ? RISCVISD::STRICT_FP_EXTEND_VL
9972 : RISCVISD::STRICT_VFNCVT_ROD_VL;
9973 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9974 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9975 Chain, Src, Mask, VL);
9976 Chain = Src.getValue(1);
9977 }
9978
9979 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9980 ? RISCVISD::STRICT_FP_EXTEND_VL
9981 : RISCVISD::STRICT_FP_ROUND_VL;
9982 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
9983 Chain, Src, Mask, VL);
9984 if (VT.isFixedLengthVector()) {
9985 // StrictFP operations have two result values. Their lowered result should
9986 // have same result count.
9987 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
9988 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
9989 }
9990 return Res;
9991}
9992
9993SDValue
9994RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
9995 SelectionDAG &DAG) const {
9996 bool IsVP =
9997 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
9998 bool IsExtend =
9999 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10000 // RVV can only do truncate fp to types half the size as the source. We
10001 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10002 // conversion instruction.
10003 SDLoc DL(Op);
10004 MVT VT = Op.getSimpleValueType();
10005
10006 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10007
10008 SDValue Src = Op.getOperand(0);
10009 MVT SrcVT = Src.getSimpleValueType();
10010
10011 bool IsDirectExtend =
10012 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10013 (SrcVT.getVectorElementType() != MVT::f16 &&
10014 SrcVT.getVectorElementType() != MVT::bf16));
10015 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10016 VT.getVectorElementType() != MVT::bf16) ||
10017 SrcVT.getVectorElementType() != MVT::f64);
10018
10019 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10020
10021 // We have regular SD node patterns for direct non-VL extends.
10022 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10023 return Op;
10024
10025 // Prepare any fixed-length vector operands.
10026 MVT ContainerVT = VT;
10027 SDValue Mask, VL;
10028 if (IsVP) {
10029 Mask = Op.getOperand(1);
10030 VL = Op.getOperand(2);
10031 }
10032 if (VT.isFixedLengthVector()) {
10033 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10034 ContainerVT =
10035 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10036 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10037 if (IsVP) {
10038 MVT MaskVT = getMaskTypeFor(ContainerVT);
10039 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10040 }
10041 }
10042
10043 if (!IsVP)
10044 std::tie(Mask, VL) =
10045 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10046
10047 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10048
10049 if (IsDirectConv) {
10050 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10051 if (VT.isFixedLengthVector())
10052 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10053 return Src;
10054 }
10055
10056 unsigned InterConvOpc =
10057 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10058
10059 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10060 SDValue IntermediateConv =
10061 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10062 SDValue Result =
10063 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10064 if (VT.isFixedLengthVector())
10065 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10066 return Result;
10067}
10068
10069// Given a scalable vector type and an index into it, returns the type for the
10070// smallest subvector that the index fits in. This can be used to reduce LMUL
10071// for operations like vslidedown.
10072//
10073// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10074static std::optional<MVT>
10075getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10076 const RISCVSubtarget &Subtarget) {
10077 assert(VecVT.isScalableVector());
10078 const unsigned EltSize = VecVT.getScalarSizeInBits();
10079 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10080 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10081 MVT SmallerVT;
10082 if (MaxIdx < MinVLMAX)
10083 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10084 else if (MaxIdx < MinVLMAX * 2)
10085 SmallerVT =
10087 else if (MaxIdx < MinVLMAX * 4)
10088 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10091 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10092 return std::nullopt;
10093 return SmallerVT;
10094}
10095
10097 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10098 if (!IdxC || isNullConstant(Idx))
10099 return false;
10100 return isUInt<5>(IdxC->getZExtValue());
10101}
10102
10103// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10104// first position of a vector, and that vector is slid up to the insert index.
10105// By limiting the active vector length to index+1 and merging with the
10106// original vector (with an undisturbed tail policy for elements >= VL), we
10107// achieve the desired result of leaving all elements untouched except the one
10108// at VL-1, which is replaced with the desired value.
10109SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10110 SelectionDAG &DAG) const {
10111 SDLoc DL(Op);
10112 MVT VecVT = Op.getSimpleValueType();
10113 MVT XLenVT = Subtarget.getXLenVT();
10114 SDValue Vec = Op.getOperand(0);
10115 SDValue Val = Op.getOperand(1);
10116 MVT ValVT = Val.getSimpleValueType();
10117 SDValue Idx = Op.getOperand(2);
10118
10119 if (VecVT.getVectorElementType() == MVT::i1) {
10120 // FIXME: For now we just promote to an i8 vector and insert into that,
10121 // but this is probably not optimal.
10122 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10123 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10124 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10125 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10126 }
10127
10128 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10129 ValVT == MVT::bf16) {
10130 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10131 MVT IntVT = VecVT.changeTypeToInteger();
10132 SDValue IntInsert = DAG.getNode(
10133 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10134 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10135 return DAG.getBitcast(VecVT, IntInsert);
10136 }
10137
10138 MVT ContainerVT = VecVT;
10139 // If the operand is a fixed-length vector, convert to a scalable one.
10140 if (VecVT.isFixedLengthVector()) {
10141 ContainerVT = getContainerForFixedLengthVector(VecVT);
10142 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10143 }
10144
10145 // If we know the index we're going to insert at, we can shrink Vec so that
10146 // we're performing the scalar inserts and slideup on a smaller LMUL.
10147 SDValue OrigVec = Vec;
10148 std::optional<unsigned> AlignedIdx;
10149 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10150 const unsigned OrigIdx = IdxC->getZExtValue();
10151 // Do we know an upper bound on LMUL?
10152 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10153 DL, DAG, Subtarget)) {
10154 ContainerVT = *ShrunkVT;
10155 AlignedIdx = 0;
10156 }
10157
10158 // If we're compiling for an exact VLEN value, we can always perform
10159 // the insert in m1 as we can determine the register corresponding to
10160 // the index in the register group.
10161 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10162 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10163 EVT ElemVT = VecVT.getVectorElementType();
10164 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10165 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10166 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10167 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10168 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10169 ContainerVT = M1VT;
10170 }
10171
10172 if (AlignedIdx)
10173 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10174 }
10175
10176 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10177 // Even i64-element vectors on RV32 can be lowered without scalar
10178 // legalization if the most-significant 32 bits of the value are not affected
10179 // by the sign-extension of the lower 32 bits.
10180 // TODO: We could also catch sign extensions of a 32-bit value.
10181 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10182 const auto *CVal = cast<ConstantSDNode>(Val);
10183 if (isInt<32>(CVal->getSExtValue())) {
10184 IsLegalInsert = true;
10185 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10186 }
10187 }
10188
10189 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10190
10191 SDValue ValInVec;
10192
10193 if (IsLegalInsert) {
10194 unsigned Opc =
10195 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10196 if (isNullConstant(Idx)) {
10197 if (!VecVT.isFloatingPoint())
10198 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10199 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10200
10201 if (AlignedIdx)
10202 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10203 if (!VecVT.isFixedLengthVector())
10204 return Vec;
10205 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10206 }
10207
10208 // Use ri.vinsert.v.x if available.
10209 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10211 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10212 SDValue PolicyOp =
10214 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10215 VL, PolicyOp);
10216 if (AlignedIdx)
10217 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10218 if (!VecVT.isFixedLengthVector())
10219 return Vec;
10220 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10221 }
10222
10223 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10224 } else {
10225 // On RV32, i64-element vectors must be specially handled to place the
10226 // value at element 0, by using two vslide1down instructions in sequence on
10227 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10228 // this.
10229 SDValue ValLo, ValHi;
10230 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10231 MVT I32ContainerVT =
10232 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10233 SDValue I32Mask =
10234 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10235 // Limit the active VL to two.
10236 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10237 // If the Idx is 0 we can insert directly into the vector.
10238 if (isNullConstant(Idx)) {
10239 // First slide in the lo value, then the hi in above it. We use slide1down
10240 // to avoid the register group overlap constraint of vslide1up.
10241 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10242 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10243 // If the source vector is undef don't pass along the tail elements from
10244 // the previous slide1down.
10245 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10246 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10247 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10248 // Bitcast back to the right container type.
10249 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10250
10251 if (AlignedIdx)
10252 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10253 if (!VecVT.isFixedLengthVector())
10254 return ValInVec;
10255 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10256 }
10257
10258 // First slide in the lo value, then the hi in above it. We use slide1down
10259 // to avoid the register group overlap constraint of vslide1up.
10260 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10261 DAG.getUNDEF(I32ContainerVT),
10262 DAG.getUNDEF(I32ContainerVT), ValLo,
10263 I32Mask, InsertI64VL);
10264 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10265 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10266 I32Mask, InsertI64VL);
10267 // Bitcast back to the right container type.
10268 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10269 }
10270
10271 // Now that the value is in a vector, slide it into position.
10272 SDValue InsertVL =
10273 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10274
10275 // Use tail agnostic policy if Idx is the last index of Vec.
10277 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10278 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10280 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10281 Idx, Mask, InsertVL, Policy);
10282
10283 if (AlignedIdx)
10284 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10285 if (!VecVT.isFixedLengthVector())
10286 return Slideup;
10287 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10288}
10289
10290// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10291// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10292// types this is done using VMV_X_S to allow us to glean information about the
10293// sign bits of the result.
10294SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10295 SelectionDAG &DAG) const {
10296 SDLoc DL(Op);
10297 SDValue Idx = Op.getOperand(1);
10298 SDValue Vec = Op.getOperand(0);
10299 EVT EltVT = Op.getValueType();
10300 MVT VecVT = Vec.getSimpleValueType();
10301 MVT XLenVT = Subtarget.getXLenVT();
10302
10303 if (VecVT.getVectorElementType() == MVT::i1) {
10304 // Use vfirst.m to extract the first bit.
10305 if (isNullConstant(Idx)) {
10306 MVT ContainerVT = VecVT;
10307 if (VecVT.isFixedLengthVector()) {
10308 ContainerVT = getContainerForFixedLengthVector(VecVT);
10309 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10310 }
10311 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10312 SDValue Vfirst =
10313 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10314 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10315 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10316 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10317 }
10318 if (VecVT.isFixedLengthVector()) {
10319 unsigned NumElts = VecVT.getVectorNumElements();
10320 if (NumElts >= 8) {
10321 MVT WideEltVT;
10322 unsigned WidenVecLen;
10323 SDValue ExtractElementIdx;
10324 SDValue ExtractBitIdx;
10325 unsigned MaxEEW = Subtarget.getELen();
10326 MVT LargestEltVT = MVT::getIntegerVT(
10327 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10328 if (NumElts <= LargestEltVT.getSizeInBits()) {
10329 assert(isPowerOf2_32(NumElts) &&
10330 "the number of elements should be power of 2");
10331 WideEltVT = MVT::getIntegerVT(NumElts);
10332 WidenVecLen = 1;
10333 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10334 ExtractBitIdx = Idx;
10335 } else {
10336 WideEltVT = LargestEltVT;
10337 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10338 // extract element index = index / element width
10339 ExtractElementIdx = DAG.getNode(
10340 ISD::SRL, DL, XLenVT, Idx,
10341 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10342 // mask bit index = index % element width
10343 ExtractBitIdx = DAG.getNode(
10344 ISD::AND, DL, XLenVT, Idx,
10345 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10346 }
10347 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10348 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10349 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10350 Vec, ExtractElementIdx);
10351 // Extract the bit from GPR.
10352 SDValue ShiftRight =
10353 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10354 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10355 DAG.getConstant(1, DL, XLenVT));
10356 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10357 }
10358 }
10359 // Otherwise, promote to an i8 vector and extract from that.
10360 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10361 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10362 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10363 }
10364
10365 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10366 EltVT == MVT::bf16) {
10367 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10368 MVT IntVT = VecVT.changeTypeToInteger();
10369 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10370 SDValue IntExtract =
10371 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10372 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10373 }
10374
10375 // If this is a fixed vector, we need to convert it to a scalable vector.
10376 MVT ContainerVT = VecVT;
10377 if (VecVT.isFixedLengthVector()) {
10378 ContainerVT = getContainerForFixedLengthVector(VecVT);
10379 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10380 }
10381
10382 // If we're compiling for an exact VLEN value and we have a known
10383 // constant index, we can always perform the extract in m1 (or
10384 // smaller) as we can determine the register corresponding to
10385 // the index in the register group.
10386 const auto VLen = Subtarget.getRealVLen();
10387 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10388 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10389 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10390 unsigned OrigIdx = IdxC->getZExtValue();
10391 EVT ElemVT = VecVT.getVectorElementType();
10392 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10393 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10394 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10395 unsigned ExtractIdx =
10396 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10397 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10398 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10399 ContainerVT = M1VT;
10400 }
10401
10402 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10403 // contains our index.
10404 std::optional<uint64_t> MaxIdx;
10405 if (VecVT.isFixedLengthVector())
10406 MaxIdx = VecVT.getVectorNumElements() - 1;
10407 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10408 MaxIdx = IdxC->getZExtValue();
10409 if (MaxIdx) {
10410 if (auto SmallerVT =
10411 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10412 ContainerVT = *SmallerVT;
10413 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10414 }
10415 }
10416
10417 // Use ri.vextract.x.v if available.
10418 // TODO: Avoid index 0 and just use the vmv.x.s
10419 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10421 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10422 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10423 }
10424
10425 // If after narrowing, the required slide is still greater than LMUL2,
10426 // fallback to generic expansion and go through the stack. This is done
10427 // for a subtle reason: extracting *all* elements out of a vector is
10428 // widely expected to be linear in vector size, but because vslidedown
10429 // is linear in LMUL, performing N extracts using vslidedown becomes
10430 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10431 // seems to have the same problem (the store is linear in LMUL), but the
10432 // generic expansion *memoizes* the store, and thus for many extracts of
10433 // the same vector we end up with one store and a bunch of loads.
10434 // TODO: We don't have the same code for insert_vector_elt because we
10435 // have BUILD_VECTOR and handle the degenerate case there. Should we
10436 // consider adding an inverse BUILD_VECTOR node?
10437 MVT LMUL2VT =
10439 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10440 return SDValue();
10441
10442 // If the index is 0, the vector is already in the right position.
10443 if (!isNullConstant(Idx)) {
10444 // Use a VL of 1 to avoid processing more elements than we need.
10445 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10446 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10447 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10448 }
10449
10450 if (!EltVT.isInteger()) {
10451 // Floating-point extracts are handled in TableGen.
10452 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10453 }
10454
10455 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10456 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10457}
10458
10459// Some RVV intrinsics may claim that they want an integer operand to be
10460// promoted or expanded.
10462 const RISCVSubtarget &Subtarget) {
10463 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10464 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10465 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10466 "Unexpected opcode");
10467
10468 if (!Subtarget.hasVInstructions())
10469 return SDValue();
10470
10471 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10472 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10473 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10474
10475 SDLoc DL(Op);
10476
10478 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10479 if (!II || !II->hasScalarOperand())
10480 return SDValue();
10481
10482 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10483 assert(SplatOp < Op.getNumOperands());
10484
10486 SDValue &ScalarOp = Operands[SplatOp];
10487 MVT OpVT = ScalarOp.getSimpleValueType();
10488 MVT XLenVT = Subtarget.getXLenVT();
10489
10490 // If this isn't a scalar, or its type is XLenVT we're done.
10491 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10492 return SDValue();
10493
10494 // Simplest case is that the operand needs to be promoted to XLenVT.
10495 if (OpVT.bitsLT(XLenVT)) {
10496 // If the operand is a constant, sign extend to increase our chances
10497 // of being able to use a .vi instruction. ANY_EXTEND would become a
10498 // a zero extend and the simm5 check in isel would fail.
10499 // FIXME: Should we ignore the upper bits in isel instead?
10500 unsigned ExtOpc =
10501 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10502 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10503 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10504 }
10505
10506 // Use the previous operand to get the vXi64 VT. The result might be a mask
10507 // VT for compares. Using the previous operand assumes that the previous
10508 // operand will never have a smaller element size than a scalar operand and
10509 // that a widening operation never uses SEW=64.
10510 // NOTE: If this fails the below assert, we can probably just find the
10511 // element count from any operand or result and use it to construct the VT.
10512 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10513 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10514
10515 // The more complex case is when the scalar is larger than XLenVT.
10516 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10517 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10518
10519 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10520 // instruction to sign-extend since SEW>XLEN.
10521 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10522 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10523 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10524 }
10525
10526 switch (IntNo) {
10527 case Intrinsic::riscv_vslide1up:
10528 case Intrinsic::riscv_vslide1down:
10529 case Intrinsic::riscv_vslide1up_mask:
10530 case Intrinsic::riscv_vslide1down_mask: {
10531 // We need to special case these when the scalar is larger than XLen.
10532 unsigned NumOps = Op.getNumOperands();
10533 bool IsMasked = NumOps == 7;
10534
10535 // Convert the vector source to the equivalent nxvXi32 vector.
10536 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10537 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10538 SDValue ScalarLo, ScalarHi;
10539 std::tie(ScalarLo, ScalarHi) =
10540 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10541
10542 // Double the VL since we halved SEW.
10543 SDValue AVL = getVLOperand(Op);
10544 SDValue I32VL;
10545
10546 // Optimize for constant AVL
10547 if (isa<ConstantSDNode>(AVL)) {
10548 const auto [MinVLMAX, MaxVLMAX] =
10550
10551 uint64_t AVLInt = AVL->getAsZExtVal();
10552 if (AVLInt <= MinVLMAX) {
10553 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10554 } else if (AVLInt >= 2 * MaxVLMAX) {
10555 // Just set vl to VLMAX in this situation
10556 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10557 } else {
10558 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10559 // is related to the hardware implementation.
10560 // So let the following code handle
10561 }
10562 }
10563 if (!I32VL) {
10565 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10566 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10567 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10568 SDValue SETVL =
10569 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10570 // Using vsetvli instruction to get actually used length which related to
10571 // the hardware implementation
10572 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10573 SEW, LMUL);
10574 I32VL =
10575 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10576 }
10577
10578 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10579
10580 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10581 // instructions.
10582 SDValue Passthru;
10583 if (IsMasked)
10584 Passthru = DAG.getUNDEF(I32VT);
10585 else
10586 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10587
10588 if (IntNo == Intrinsic::riscv_vslide1up ||
10589 IntNo == Intrinsic::riscv_vslide1up_mask) {
10590 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10591 ScalarHi, I32Mask, I32VL);
10592 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10593 ScalarLo, I32Mask, I32VL);
10594 } else {
10595 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10596 ScalarLo, I32Mask, I32VL);
10597 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10598 ScalarHi, I32Mask, I32VL);
10599 }
10600
10601 // Convert back to nxvXi64.
10602 Vec = DAG.getBitcast(VT, Vec);
10603
10604 if (!IsMasked)
10605 return Vec;
10606 // Apply mask after the operation.
10607 SDValue Mask = Operands[NumOps - 3];
10608 SDValue MaskedOff = Operands[1];
10609 // Assume Policy operand is the last operand.
10610 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10611 // We don't need to select maskedoff if it's undef.
10612 if (MaskedOff.isUndef())
10613 return Vec;
10614 // TAMU
10615 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10616 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10617 DAG.getUNDEF(VT), AVL);
10618 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10619 // It's fine because vmerge does not care mask policy.
10620 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10621 MaskedOff, AVL);
10622 }
10623 }
10624
10625 // We need to convert the scalar to a splat vector.
10626 SDValue VL = getVLOperand(Op);
10627 assert(VL.getValueType() == XLenVT);
10628 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10629 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10630}
10631
10632// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10633// scalable vector llvm.get.vector.length for now.
10634//
10635// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10636// (vscale * VF). The vscale and VF are independent of element width. We use
10637// SEW=8 for the vsetvli because it is the only element width that supports all
10638// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10639// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10640// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10641// SEW and LMUL are better for the surrounding vector instructions.
10643 const RISCVSubtarget &Subtarget) {
10644 MVT XLenVT = Subtarget.getXLenVT();
10645
10646 // The smallest LMUL is only valid for the smallest element width.
10647 const unsigned ElementWidth = 8;
10648
10649 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10650 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10651 // We don't support VF==1 with ELEN==32.
10652 [[maybe_unused]] unsigned MinVF =
10653 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10654
10655 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10656 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10657 "Unexpected VF");
10658
10659 bool Fractional = VF < LMul1VF;
10660 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10661 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10662 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10663
10664 SDLoc DL(N);
10665
10666 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10667 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10668
10669 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10670
10671 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10672 SDValue Res =
10673 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10674 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10675}
10676
10678 const RISCVSubtarget &Subtarget) {
10679 SDValue Op0 = N->getOperand(1);
10680 MVT OpVT = Op0.getSimpleValueType();
10681 MVT ContainerVT = OpVT;
10682 if (OpVT.isFixedLengthVector()) {
10683 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10684 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10685 }
10686 MVT XLenVT = Subtarget.getXLenVT();
10687 SDLoc DL(N);
10688 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10689 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10690 if (isOneConstant(N->getOperand(2)))
10691 return Res;
10692
10693 // Convert -1 to VL.
10694 SDValue Setcc =
10695 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10696 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10697 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10698}
10699
10700static inline void promoteVCIXScalar(const SDValue &Op,
10702 SelectionDAG &DAG) {
10703 const RISCVSubtarget &Subtarget =
10705
10706 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10707 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10708 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10709 SDLoc DL(Op);
10710
10712 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10713 if (!II || !II->hasScalarOperand())
10714 return;
10715
10716 unsigned SplatOp = II->ScalarOperand + 1;
10717 assert(SplatOp < Op.getNumOperands());
10718
10719 SDValue &ScalarOp = Operands[SplatOp];
10720 MVT OpVT = ScalarOp.getSimpleValueType();
10721 MVT XLenVT = Subtarget.getXLenVT();
10722
10723 // The code below is partially copied from lowerVectorIntrinsicScalars.
10724 // If this isn't a scalar, or its type is XLenVT we're done.
10725 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10726 return;
10727
10728 // Manually emit promote operation for scalar operation.
10729 if (OpVT.bitsLT(XLenVT)) {
10730 unsigned ExtOpc =
10731 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10732 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10733 }
10734}
10735
10736static void processVCIXOperands(SDValue &OrigOp,
10738 SelectionDAG &DAG) {
10739 promoteVCIXScalar(OrigOp, Operands, DAG);
10740 const RISCVSubtarget &Subtarget =
10742 for (SDValue &V : Operands) {
10743 EVT ValType = V.getValueType();
10744 if (ValType.isVector() && ValType.isFloatingPoint()) {
10745 MVT InterimIVT =
10746 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10747 ValType.getVectorElementCount());
10748 V = DAG.getBitcast(InterimIVT, V);
10749 }
10750 if (ValType.isFixedLengthVector()) {
10751 MVT OpContainerVT = getContainerForFixedLengthVector(
10752 DAG, V.getSimpleValueType(), Subtarget);
10753 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10754 }
10755 }
10756}
10757
10758// LMUL * VLEN should be greater than or equal to EGS * SEW
10759static inline bool isValidEGW(int EGS, EVT VT,
10760 const RISCVSubtarget &Subtarget) {
10761 return (Subtarget.getRealMinVLen() *
10763 EGS * VT.getScalarSizeInBits();
10764}
10765
10766SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10767 SelectionDAG &DAG) const {
10768 unsigned IntNo = Op.getConstantOperandVal(0);
10769 SDLoc DL(Op);
10770 MVT XLenVT = Subtarget.getXLenVT();
10771
10772 switch (IntNo) {
10773 default:
10774 break; // Don't custom lower most intrinsics.
10775 case Intrinsic::riscv_tuple_insert: {
10776 SDValue Vec = Op.getOperand(1);
10777 SDValue SubVec = Op.getOperand(2);
10778 SDValue Index = Op.getOperand(3);
10779
10780 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10781 SubVec, Index);
10782 }
10783 case Intrinsic::riscv_tuple_extract: {
10784 SDValue Vec = Op.getOperand(1);
10785 SDValue Index = Op.getOperand(2);
10786
10787 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10788 Index);
10789 }
10790 case Intrinsic::thread_pointer: {
10791 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10792 return DAG.getRegister(RISCV::X4, PtrVT);
10793 }
10794 case Intrinsic::riscv_orc_b:
10795 case Intrinsic::riscv_brev8:
10796 case Intrinsic::riscv_sha256sig0:
10797 case Intrinsic::riscv_sha256sig1:
10798 case Intrinsic::riscv_sha256sum0:
10799 case Intrinsic::riscv_sha256sum1:
10800 case Intrinsic::riscv_sm3p0:
10801 case Intrinsic::riscv_sm3p1: {
10802 unsigned Opc;
10803 switch (IntNo) {
10804 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10805 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10806 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10807 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10808 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10809 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10810 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10811 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10812 }
10813
10814 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10815 }
10816 case Intrinsic::riscv_sm4ks:
10817 case Intrinsic::riscv_sm4ed: {
10818 unsigned Opc =
10819 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10820
10821 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10822 Op.getOperand(3));
10823 }
10824 case Intrinsic::riscv_zip:
10825 case Intrinsic::riscv_unzip: {
10826 unsigned Opc =
10827 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10828 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10829 }
10830 case Intrinsic::riscv_mopr:
10831 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10832 Op.getOperand(2));
10833
10834 case Intrinsic::riscv_moprr: {
10835 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10836 Op.getOperand(2), Op.getOperand(3));
10837 }
10838 case Intrinsic::riscv_clmul:
10839 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10840 Op.getOperand(2));
10841 case Intrinsic::riscv_clmulh:
10842 case Intrinsic::riscv_clmulr: {
10843 unsigned Opc =
10844 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10845 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10846 }
10847 case Intrinsic::experimental_get_vector_length:
10848 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10849 case Intrinsic::experimental_cttz_elts:
10850 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10851 case Intrinsic::riscv_vmv_x_s: {
10852 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10853 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10854 }
10855 case Intrinsic::riscv_vfmv_f_s:
10856 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10857 case Intrinsic::riscv_vmv_v_x:
10858 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10859 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10860 Subtarget);
10861 case Intrinsic::riscv_vfmv_v_f:
10862 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10863 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10864 case Intrinsic::riscv_vmv_s_x: {
10865 SDValue Scalar = Op.getOperand(2);
10866
10867 if (Scalar.getValueType().bitsLE(XLenVT)) {
10868 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10869 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10870 Op.getOperand(1), Scalar, Op.getOperand(3));
10871 }
10872
10873 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10874
10875 // This is an i64 value that lives in two scalar registers. We have to
10876 // insert this in a convoluted way. First we build vXi64 splat containing
10877 // the two values that we assemble using some bit math. Next we'll use
10878 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10879 // to merge element 0 from our splat into the source vector.
10880 // FIXME: This is probably not the best way to do this, but it is
10881 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10882 // point.
10883 // sw lo, (a0)
10884 // sw hi, 4(a0)
10885 // vlse vX, (a0)
10886 //
10887 // vid.v vVid
10888 // vmseq.vx mMask, vVid, 0
10889 // vmerge.vvm vDest, vSrc, vVal, mMask
10890 MVT VT = Op.getSimpleValueType();
10891 SDValue Vec = Op.getOperand(1);
10892 SDValue VL = getVLOperand(Op);
10893
10894 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10895 if (Op.getOperand(1).isUndef())
10896 return SplattedVal;
10897 SDValue SplattedIdx =
10898 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10899 DAG.getConstant(0, DL, MVT::i32), VL);
10900
10901 MVT MaskVT = getMaskTypeFor(VT);
10902 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10903 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10904 SDValue SelectCond =
10905 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10906 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10907 DAG.getUNDEF(MaskVT), Mask, VL});
10908 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10909 Vec, DAG.getUNDEF(VT), VL);
10910 }
10911 case Intrinsic::riscv_vfmv_s_f:
10912 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10913 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10914 // EGS * EEW >= 128 bits
10915 case Intrinsic::riscv_vaesdf_vv:
10916 case Intrinsic::riscv_vaesdf_vs:
10917 case Intrinsic::riscv_vaesdm_vv:
10918 case Intrinsic::riscv_vaesdm_vs:
10919 case Intrinsic::riscv_vaesef_vv:
10920 case Intrinsic::riscv_vaesef_vs:
10921 case Intrinsic::riscv_vaesem_vv:
10922 case Intrinsic::riscv_vaesem_vs:
10923 case Intrinsic::riscv_vaeskf1:
10924 case Intrinsic::riscv_vaeskf2:
10925 case Intrinsic::riscv_vaesz_vs:
10926 case Intrinsic::riscv_vsm4k:
10927 case Intrinsic::riscv_vsm4r_vv:
10928 case Intrinsic::riscv_vsm4r_vs: {
10929 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10930 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10931 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10932 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10933 return Op;
10934 }
10935 // EGS * EEW >= 256 bits
10936 case Intrinsic::riscv_vsm3c:
10937 case Intrinsic::riscv_vsm3me: {
10938 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10939 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10940 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10941 return Op;
10942 }
10943 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10944 case Intrinsic::riscv_vsha2ch:
10945 case Intrinsic::riscv_vsha2cl:
10946 case Intrinsic::riscv_vsha2ms: {
10947 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10948 !Subtarget.hasStdExtZvknhb())
10949 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10950 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10951 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10952 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10953 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10954 return Op;
10955 }
10956 case Intrinsic::riscv_sf_vc_v_x:
10957 case Intrinsic::riscv_sf_vc_v_i:
10958 case Intrinsic::riscv_sf_vc_v_xv:
10959 case Intrinsic::riscv_sf_vc_v_iv:
10960 case Intrinsic::riscv_sf_vc_v_vv:
10961 case Intrinsic::riscv_sf_vc_v_fv:
10962 case Intrinsic::riscv_sf_vc_v_xvv:
10963 case Intrinsic::riscv_sf_vc_v_ivv:
10964 case Intrinsic::riscv_sf_vc_v_vvv:
10965 case Intrinsic::riscv_sf_vc_v_fvv:
10966 case Intrinsic::riscv_sf_vc_v_xvw:
10967 case Intrinsic::riscv_sf_vc_v_ivw:
10968 case Intrinsic::riscv_sf_vc_v_vvw:
10969 case Intrinsic::riscv_sf_vc_v_fvw: {
10970 MVT VT = Op.getSimpleValueType();
10971
10972 SmallVector<SDValue> Operands{Op->op_values()};
10974
10975 MVT RetVT = VT;
10976 if (VT.isFixedLengthVector())
10978 else if (VT.isFloatingPoint())
10981
10982 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
10983
10984 if (VT.isFixedLengthVector())
10985 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
10986 else if (VT.isFloatingPoint())
10987 NewNode = DAG.getBitcast(VT, NewNode);
10988
10989 if (Op == NewNode)
10990 break;
10991
10992 return NewNode;
10993 }
10994 }
10995
10996 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10997}
10998
11000 unsigned Type) {
11001 SDLoc DL(Op);
11002 SmallVector<SDValue> Operands{Op->op_values()};
11003 Operands.erase(Operands.begin() + 1);
11004
11005 const RISCVSubtarget &Subtarget =
11007 MVT VT = Op.getSimpleValueType();
11008 MVT RetVT = VT;
11009 MVT FloatVT = VT;
11010
11011 if (VT.isFloatingPoint()) {
11012 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11014 FloatVT = RetVT;
11015 }
11016 if (VT.isFixedLengthVector())
11018 Subtarget);
11019
11021
11022 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11023 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11024 SDValue Chain = NewNode.getValue(1);
11025
11026 if (VT.isFixedLengthVector())
11027 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11028 if (VT.isFloatingPoint())
11029 NewNode = DAG.getBitcast(VT, NewNode);
11030
11031 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11032
11033 return NewNode;
11034}
11035
11037 unsigned Type) {
11038 SmallVector<SDValue> Operands{Op->op_values()};
11039 Operands.erase(Operands.begin() + 1);
11041
11042 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11043}
11044
11045static SDValue
11047 const RISCVSubtarget &Subtarget,
11048 SelectionDAG &DAG) {
11049 bool IsStrided;
11050 switch (IntNo) {
11051 case Intrinsic::riscv_seg2_load_mask:
11052 case Intrinsic::riscv_seg3_load_mask:
11053 case Intrinsic::riscv_seg4_load_mask:
11054 case Intrinsic::riscv_seg5_load_mask:
11055 case Intrinsic::riscv_seg6_load_mask:
11056 case Intrinsic::riscv_seg7_load_mask:
11057 case Intrinsic::riscv_seg8_load_mask:
11058 IsStrided = false;
11059 break;
11060 case Intrinsic::riscv_sseg2_load_mask:
11061 case Intrinsic::riscv_sseg3_load_mask:
11062 case Intrinsic::riscv_sseg4_load_mask:
11063 case Intrinsic::riscv_sseg5_load_mask:
11064 case Intrinsic::riscv_sseg6_load_mask:
11065 case Intrinsic::riscv_sseg7_load_mask:
11066 case Intrinsic::riscv_sseg8_load_mask:
11067 IsStrided = true;
11068 break;
11069 default:
11070 llvm_unreachable("unexpected intrinsic ID");
11071 };
11072
11073 static const Intrinsic::ID VlsegInts[7] = {
11074 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11075 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11076 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11077 Intrinsic::riscv_vlseg8_mask};
11078 static const Intrinsic::ID VlssegInts[7] = {
11079 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11080 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11081 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11082 Intrinsic::riscv_vlsseg8_mask};
11083
11084 SDLoc DL(Op);
11085 unsigned NF = Op->getNumValues() - 1;
11086 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11087 MVT XLenVT = Subtarget.getXLenVT();
11088 MVT VT = Op->getSimpleValueType(0);
11089 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11090 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11091 ContainerVT.getScalarSizeInBits();
11092 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11093
11094 // Operands: (chain, int_id, pointer, mask, vl) or
11095 // (chain, int_id, pointer, offset, mask, vl)
11096 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11097 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11098 MVT MaskVT = Mask.getSimpleValueType();
11099 MVT MaskContainerVT =
11100 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11101 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11102
11103 SDValue IntID = DAG.getTargetConstant(
11104 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11105 auto *Load = cast<MemIntrinsicSDNode>(Op);
11106
11107 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11109 Load->getChain(),
11110 IntID,
11111 DAG.getUNDEF(VecTupTy),
11112 Op.getOperand(2),
11113 Mask,
11114 VL,
11117 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11118 // Insert the stride operand.
11119 if (IsStrided)
11120 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11121
11122 SDValue Result =
11124 Load->getMemoryVT(), Load->getMemOperand());
11126 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11127 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11128 Result.getValue(0),
11129 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11130 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11131 }
11132 Results.push_back(Result.getValue(1));
11133 return DAG.getMergeValues(Results, DL);
11134}
11135
11136SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11137 SelectionDAG &DAG) const {
11138 unsigned IntNo = Op.getConstantOperandVal(1);
11139 switch (IntNo) {
11140 default:
11141 break;
11142 case Intrinsic::riscv_seg2_load_mask:
11143 case Intrinsic::riscv_seg3_load_mask:
11144 case Intrinsic::riscv_seg4_load_mask:
11145 case Intrinsic::riscv_seg5_load_mask:
11146 case Intrinsic::riscv_seg6_load_mask:
11147 case Intrinsic::riscv_seg7_load_mask:
11148 case Intrinsic::riscv_seg8_load_mask:
11149 case Intrinsic::riscv_sseg2_load_mask:
11150 case Intrinsic::riscv_sseg3_load_mask:
11151 case Intrinsic::riscv_sseg4_load_mask:
11152 case Intrinsic::riscv_sseg5_load_mask:
11153 case Intrinsic::riscv_sseg6_load_mask:
11154 case Intrinsic::riscv_sseg7_load_mask:
11155 case Intrinsic::riscv_sseg8_load_mask:
11156 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11157
11158 case Intrinsic::riscv_sf_vc_v_x_se:
11159 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11160 case Intrinsic::riscv_sf_vc_v_i_se:
11161 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11162 case Intrinsic::riscv_sf_vc_v_xv_se:
11163 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11164 case Intrinsic::riscv_sf_vc_v_iv_se:
11165 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11166 case Intrinsic::riscv_sf_vc_v_vv_se:
11167 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11168 case Intrinsic::riscv_sf_vc_v_fv_se:
11169 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11170 case Intrinsic::riscv_sf_vc_v_xvv_se:
11171 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11172 case Intrinsic::riscv_sf_vc_v_ivv_se:
11173 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11174 case Intrinsic::riscv_sf_vc_v_vvv_se:
11175 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11176 case Intrinsic::riscv_sf_vc_v_fvv_se:
11177 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11178 case Intrinsic::riscv_sf_vc_v_xvw_se:
11179 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11180 case Intrinsic::riscv_sf_vc_v_ivw_se:
11181 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11182 case Intrinsic::riscv_sf_vc_v_vvw_se:
11183 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11184 case Intrinsic::riscv_sf_vc_v_fvw_se:
11185 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11186 }
11187
11188 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11189}
11190
11191static SDValue
11193 const RISCVSubtarget &Subtarget,
11194 SelectionDAG &DAG) {
11195 bool IsStrided;
11196 switch (IntNo) {
11197 case Intrinsic::riscv_seg2_store_mask:
11198 case Intrinsic::riscv_seg3_store_mask:
11199 case Intrinsic::riscv_seg4_store_mask:
11200 case Intrinsic::riscv_seg5_store_mask:
11201 case Intrinsic::riscv_seg6_store_mask:
11202 case Intrinsic::riscv_seg7_store_mask:
11203 case Intrinsic::riscv_seg8_store_mask:
11204 IsStrided = false;
11205 break;
11206 case Intrinsic::riscv_sseg2_store_mask:
11207 case Intrinsic::riscv_sseg3_store_mask:
11208 case Intrinsic::riscv_sseg4_store_mask:
11209 case Intrinsic::riscv_sseg5_store_mask:
11210 case Intrinsic::riscv_sseg6_store_mask:
11211 case Intrinsic::riscv_sseg7_store_mask:
11212 case Intrinsic::riscv_sseg8_store_mask:
11213 IsStrided = true;
11214 break;
11215 default:
11216 llvm_unreachable("unexpected intrinsic ID");
11217 }
11218
11219 SDLoc DL(Op);
11220 static const Intrinsic::ID VssegInts[] = {
11221 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11222 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11223 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11224 Intrinsic::riscv_vsseg8_mask};
11225 static const Intrinsic::ID VsssegInts[] = {
11226 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11227 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11228 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11229 Intrinsic::riscv_vssseg8_mask};
11230
11231 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11232 // (chain, int_id, vec*, ptr, stride, mask, vl)
11233 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11234 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11235 MVT XLenVT = Subtarget.getXLenVT();
11236 MVT VT = Op->getOperand(2).getSimpleValueType();
11237 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11238 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11239 ContainerVT.getScalarSizeInBits();
11240 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11241
11242 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11243 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11244 MVT MaskVT = Mask.getSimpleValueType();
11245 MVT MaskContainerVT =
11246 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11247 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11248
11249 SDValue IntID = DAG.getTargetConstant(
11250 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11251 SDValue Ptr = Op->getOperand(NF + 2);
11252
11253 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11254
11255 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11256 for (unsigned i = 0; i < NF; i++)
11257 StoredVal = DAG.getNode(
11258 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11259 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11260 DAG, Subtarget),
11261 DAG.getTargetConstant(i, DL, MVT::i32));
11262
11264 FixedIntrinsic->getChain(),
11265 IntID,
11266 StoredVal,
11267 Ptr,
11268 Mask,
11269 VL,
11270 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11271 // Insert the stride operand.
11272 if (IsStrided)
11273 Ops.insert(std::next(Ops.begin(), 4),
11274 Op.getOperand(Op.getNumOperands() - 3));
11275
11276 return DAG.getMemIntrinsicNode(
11277 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11278 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11279}
11280
11281SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11282 SelectionDAG &DAG) const {
11283 unsigned IntNo = Op.getConstantOperandVal(1);
11284 switch (IntNo) {
11285 default:
11286 break;
11287 case Intrinsic::riscv_seg2_store_mask:
11288 case Intrinsic::riscv_seg3_store_mask:
11289 case Intrinsic::riscv_seg4_store_mask:
11290 case Intrinsic::riscv_seg5_store_mask:
11291 case Intrinsic::riscv_seg6_store_mask:
11292 case Intrinsic::riscv_seg7_store_mask:
11293 case Intrinsic::riscv_seg8_store_mask:
11294 case Intrinsic::riscv_sseg2_store_mask:
11295 case Intrinsic::riscv_sseg3_store_mask:
11296 case Intrinsic::riscv_sseg4_store_mask:
11297 case Intrinsic::riscv_sseg5_store_mask:
11298 case Intrinsic::riscv_sseg6_store_mask:
11299 case Intrinsic::riscv_sseg7_store_mask:
11300 case Intrinsic::riscv_sseg8_store_mask:
11301 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11302
11303 case Intrinsic::riscv_sf_vc_xv_se:
11304 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11305 case Intrinsic::riscv_sf_vc_iv_se:
11306 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11307 case Intrinsic::riscv_sf_vc_vv_se:
11308 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11309 case Intrinsic::riscv_sf_vc_fv_se:
11310 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11311 case Intrinsic::riscv_sf_vc_xvv_se:
11312 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11313 case Intrinsic::riscv_sf_vc_ivv_se:
11314 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11315 case Intrinsic::riscv_sf_vc_vvv_se:
11316 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11317 case Intrinsic::riscv_sf_vc_fvv_se:
11318 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11319 case Intrinsic::riscv_sf_vc_xvw_se:
11320 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11321 case Intrinsic::riscv_sf_vc_ivw_se:
11322 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11323 case Intrinsic::riscv_sf_vc_vvw_se:
11324 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11325 case Intrinsic::riscv_sf_vc_fvw_se:
11326 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11327 }
11328
11329 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11330}
11331
11332static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11333 switch (ISDOpcode) {
11334 default:
11335 llvm_unreachable("Unhandled reduction");
11336 case ISD::VP_REDUCE_ADD:
11337 case ISD::VECREDUCE_ADD:
11338 return RISCVISD::VECREDUCE_ADD_VL;
11339 case ISD::VP_REDUCE_UMAX:
11341 return RISCVISD::VECREDUCE_UMAX_VL;
11342 case ISD::VP_REDUCE_SMAX:
11344 return RISCVISD::VECREDUCE_SMAX_VL;
11345 case ISD::VP_REDUCE_UMIN:
11347 return RISCVISD::VECREDUCE_UMIN_VL;
11348 case ISD::VP_REDUCE_SMIN:
11350 return RISCVISD::VECREDUCE_SMIN_VL;
11351 case ISD::VP_REDUCE_AND:
11352 case ISD::VECREDUCE_AND:
11353 return RISCVISD::VECREDUCE_AND_VL;
11354 case ISD::VP_REDUCE_OR:
11355 case ISD::VECREDUCE_OR:
11356 return RISCVISD::VECREDUCE_OR_VL;
11357 case ISD::VP_REDUCE_XOR:
11358 case ISD::VECREDUCE_XOR:
11359 return RISCVISD::VECREDUCE_XOR_VL;
11360 case ISD::VP_REDUCE_FADD:
11361 return RISCVISD::VECREDUCE_FADD_VL;
11362 case ISD::VP_REDUCE_SEQ_FADD:
11363 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11364 case ISD::VP_REDUCE_FMAX:
11365 case ISD::VP_REDUCE_FMAXIMUM:
11366 return RISCVISD::VECREDUCE_FMAX_VL;
11367 case ISD::VP_REDUCE_FMIN:
11368 case ISD::VP_REDUCE_FMINIMUM:
11369 return RISCVISD::VECREDUCE_FMIN_VL;
11370 }
11371
11372}
11373
11374SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11375 SelectionDAG &DAG,
11376 bool IsVP) const {
11377 SDLoc DL(Op);
11378 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11379 MVT VecVT = Vec.getSimpleValueType();
11380 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11381 Op.getOpcode() == ISD::VECREDUCE_OR ||
11382 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11383 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11384 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11385 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11386 "Unexpected reduction lowering");
11387
11388 MVT XLenVT = Subtarget.getXLenVT();
11389
11390 MVT ContainerVT = VecVT;
11391 if (VecVT.isFixedLengthVector()) {
11392 ContainerVT = getContainerForFixedLengthVector(VecVT);
11393 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11394 }
11395
11396 SDValue Mask, VL;
11397 if (IsVP) {
11398 Mask = Op.getOperand(2);
11399 VL = Op.getOperand(3);
11400 } else {
11401 std::tie(Mask, VL) =
11402 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11403 }
11404
11405 ISD::CondCode CC;
11406 switch (Op.getOpcode()) {
11407 default:
11408 llvm_unreachable("Unhandled reduction");
11409 case ISD::VECREDUCE_AND:
11410 case ISD::VP_REDUCE_AND: {
11411 // vcpop ~x == 0
11412 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11413 if (IsVP || VecVT.isFixedLengthVector())
11414 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11415 else
11416 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11417 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11418 CC = ISD::SETEQ;
11419 break;
11420 }
11421 case ISD::VECREDUCE_OR:
11422 case ISD::VP_REDUCE_OR:
11423 // vcpop x != 0
11424 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11425 CC = ISD::SETNE;
11426 break;
11427 case ISD::VECREDUCE_XOR:
11428 case ISD::VP_REDUCE_XOR: {
11429 // ((vcpop x) & 1) != 0
11430 SDValue One = DAG.getConstant(1, DL, XLenVT);
11431 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11432 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11433 CC = ISD::SETNE;
11434 break;
11435 }
11436 }
11437
11438 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11439 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11440 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11441
11442 if (!IsVP)
11443 return SetCC;
11444
11445 // Now include the start value in the operation.
11446 // Note that we must return the start value when no elements are operated
11447 // upon. The vcpop instructions we've emitted in each case above will return
11448 // 0 for an inactive vector, and so we've already received the neutral value:
11449 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11450 // can simply include the start value.
11451 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11452 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11453}
11454
11455static bool isNonZeroAVL(SDValue AVL) {
11456 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11457 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11458 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11459 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11460}
11461
11462/// Helper to lower a reduction sequence of the form:
11463/// scalar = reduce_op vec, scalar_start
11464static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11465 SDValue StartValue, SDValue Vec, SDValue Mask,
11466 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11467 const RISCVSubtarget &Subtarget) {
11468 const MVT VecVT = Vec.getSimpleValueType();
11469 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11470 const MVT XLenVT = Subtarget.getXLenVT();
11471 const bool NonZeroAVL = isNonZeroAVL(VL);
11472
11473 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11474 // or the original VT if fractional.
11475 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11476 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11477 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11478 // be the result of the reduction operation.
11479 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11480 SDValue InitialValue =
11481 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11482 if (M1VT != InnerVT)
11483 InitialValue =
11484 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11485 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11487 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11488 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11489 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11490}
11491
11492SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11493 SelectionDAG &DAG) const {
11494 SDLoc DL(Op);
11495 SDValue Vec = Op.getOperand(0);
11496 EVT VecEVT = Vec.getValueType();
11497
11498 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11499
11500 // Due to ordering in legalize types we may have a vector type that needs to
11501 // be split. Do that manually so we can get down to a legal type.
11502 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11504 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11505 VecEVT = Lo.getValueType();
11506 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11507 }
11508
11509 // TODO: The type may need to be widened rather than split. Or widened before
11510 // it can be split.
11511 if (!isTypeLegal(VecEVT))
11512 return SDValue();
11513
11514 MVT VecVT = VecEVT.getSimpleVT();
11515 MVT VecEltVT = VecVT.getVectorElementType();
11516 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11517
11518 MVT ContainerVT = VecVT;
11519 if (VecVT.isFixedLengthVector()) {
11520 ContainerVT = getContainerForFixedLengthVector(VecVT);
11521 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11522 }
11523
11524 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11525
11526 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11527 switch (BaseOpc) {
11528 case ISD::AND:
11529 case ISD::OR:
11530 case ISD::UMAX:
11531 case ISD::UMIN:
11532 case ISD::SMAX:
11533 case ISD::SMIN:
11534 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11535 }
11536 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11537 Mask, VL, DL, DAG, Subtarget);
11538}
11539
11540// Given a reduction op, this function returns the matching reduction opcode,
11541// the vector SDValue and the scalar SDValue required to lower this to a
11542// RISCVISD node.
11543static std::tuple<unsigned, SDValue, SDValue>
11545 const RISCVSubtarget &Subtarget) {
11546 SDLoc DL(Op);
11547 auto Flags = Op->getFlags();
11548 unsigned Opcode = Op.getOpcode();
11549 switch (Opcode) {
11550 default:
11551 llvm_unreachable("Unhandled reduction");
11552 case ISD::VECREDUCE_FADD: {
11553 // Use positive zero if we can. It is cheaper to materialize.
11554 SDValue Zero =
11555 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11556 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11557 }
11559 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11560 Op.getOperand(0));
11564 case ISD::VECREDUCE_FMAX: {
11565 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11566 unsigned RVVOpc =
11567 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11568 ? RISCVISD::VECREDUCE_FMIN_VL
11569 : RISCVISD::VECREDUCE_FMAX_VL;
11570 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11571 }
11572 }
11573}
11574
11575SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11576 SelectionDAG &DAG) const {
11577 SDLoc DL(Op);
11578 MVT VecEltVT = Op.getSimpleValueType();
11579
11580 unsigned RVVOpcode;
11581 SDValue VectorVal, ScalarVal;
11582 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11583 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11584 MVT VecVT = VectorVal.getSimpleValueType();
11585
11586 MVT ContainerVT = VecVT;
11587 if (VecVT.isFixedLengthVector()) {
11588 ContainerVT = getContainerForFixedLengthVector(VecVT);
11589 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11590 }
11591
11592 MVT ResVT = Op.getSimpleValueType();
11593 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11594 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11595 VL, DL, DAG, Subtarget);
11596 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11597 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11598 return Res;
11599
11600 if (Op->getFlags().hasNoNaNs())
11601 return Res;
11602
11603 // Force output to NaN if any element is Nan.
11604 SDValue IsNan =
11605 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11606 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11607 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11608 MVT XLenVT = Subtarget.getXLenVT();
11609 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11610 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11611 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11612 return DAG.getSelect(
11613 DL, ResVT, NoNaNs, Res,
11614 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11615}
11616
11617SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 unsigned Opc = Op.getOpcode();
11621 SDValue Start = Op.getOperand(0);
11622 SDValue Vec = Op.getOperand(1);
11623 EVT VecEVT = Vec.getValueType();
11624 MVT XLenVT = Subtarget.getXLenVT();
11625
11626 // TODO: The type may need to be widened rather than split. Or widened before
11627 // it can be split.
11628 if (!isTypeLegal(VecEVT))
11629 return SDValue();
11630
11631 MVT VecVT = VecEVT.getSimpleVT();
11632 unsigned RVVOpcode = getRVVReductionOp(Opc);
11633
11634 if (VecVT.isFixedLengthVector()) {
11635 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11636 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11637 }
11638
11639 SDValue VL = Op.getOperand(3);
11640 SDValue Mask = Op.getOperand(2);
11641 SDValue Res =
11642 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11643 Vec, Mask, VL, DL, DAG, Subtarget);
11644 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11645 Op->getFlags().hasNoNaNs())
11646 return Res;
11647
11648 // Propagate NaNs.
11649 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11650 // Check if any of the elements in Vec is NaN.
11651 SDValue IsNaN = DAG.getNode(
11652 RISCVISD::SETCC_VL, DL, PredVT,
11653 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11654 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11655 // Check if the start value is NaN.
11656 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11657 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11658 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11659 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11660 MVT ResVT = Res.getSimpleValueType();
11661 return DAG.getSelect(
11662 DL, ResVT, NoNaNs, Res,
11663 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11664}
11665
11666SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11667 SelectionDAG &DAG) const {
11668 SDValue Vec = Op.getOperand(0);
11669 SDValue SubVec = Op.getOperand(1);
11670 MVT VecVT = Vec.getSimpleValueType();
11671 MVT SubVecVT = SubVec.getSimpleValueType();
11672
11673 SDLoc DL(Op);
11674 MVT XLenVT = Subtarget.getXLenVT();
11675 unsigned OrigIdx = Op.getConstantOperandVal(2);
11676 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11677
11678 if (OrigIdx == 0 && Vec.isUndef())
11679 return Op;
11680
11681 // We don't have the ability to slide mask vectors up indexed by their i1
11682 // elements; the smallest we can do is i8. Often we are able to bitcast to
11683 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11684 // into a scalable one, we might not necessarily have enough scalable
11685 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11686 if (SubVecVT.getVectorElementType() == MVT::i1) {
11687 if (VecVT.getVectorMinNumElements() >= 8 &&
11688 SubVecVT.getVectorMinNumElements() >= 8) {
11689 assert(OrigIdx % 8 == 0 && "Invalid index");
11690 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11691 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11692 "Unexpected mask vector lowering");
11693 OrigIdx /= 8;
11694 SubVecVT =
11695 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11696 SubVecVT.isScalableVector());
11697 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11698 VecVT.isScalableVector());
11699 Vec = DAG.getBitcast(VecVT, Vec);
11700 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11701 } else {
11702 // We can't slide this mask vector up indexed by its i1 elements.
11703 // This poses a problem when we wish to insert a scalable vector which
11704 // can't be re-expressed as a larger type. Just choose the slow path and
11705 // extend to a larger type, then truncate back down.
11706 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11707 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11708 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11709 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11710 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11711 Op.getOperand(2));
11712 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11713 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11714 }
11715 }
11716
11717 // If the subvector vector is a fixed-length type and we don't know VLEN
11718 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11719 // don't know which register of a LMUL group contains the specific subvector
11720 // as we only know the minimum register size. Therefore we must slide the
11721 // vector group up the full amount.
11722 const auto VLen = Subtarget.getRealVLen();
11723 if (SubVecVT.isFixedLengthVector() && !VLen) {
11724 MVT ContainerVT = VecVT;
11725 if (VecVT.isFixedLengthVector()) {
11726 ContainerVT = getContainerForFixedLengthVector(VecVT);
11727 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11728 }
11729
11730 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11731
11732 SDValue Mask =
11733 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11734 // Set the vector length to only the number of elements we care about. Note
11735 // that for slideup this includes the offset.
11736 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11737 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11738
11739 // Use tail agnostic policy if we're inserting over Vec's tail.
11741 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11743
11744 // If we're inserting into the lowest elements, use a tail undisturbed
11745 // vmv.v.v.
11746 if (OrigIdx == 0) {
11747 SubVec =
11748 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11749 } else {
11750 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11751 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11752 SlideupAmt, Mask, VL, Policy);
11753 }
11754
11755 if (VecVT.isFixedLengthVector())
11756 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11757 return DAG.getBitcast(Op.getValueType(), SubVec);
11758 }
11759
11760 MVT ContainerVecVT = VecVT;
11761 if (VecVT.isFixedLengthVector()) {
11762 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11763 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11764 }
11765
11766 MVT ContainerSubVecVT = SubVecVT;
11767 if (SubVecVT.isFixedLengthVector()) {
11768 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11769 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11770 }
11771
11772 unsigned SubRegIdx;
11773 ElementCount RemIdx;
11774 // insert_subvector scales the index by vscale if the subvector is scalable,
11775 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11776 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11777 if (SubVecVT.isFixedLengthVector()) {
11778 assert(VLen);
11779 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11780 auto Decompose =
11782 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11783 SubRegIdx = Decompose.first;
11784 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11785 (OrigIdx % Vscale));
11786 } else {
11787 auto Decompose =
11789 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11790 SubRegIdx = Decompose.first;
11791 RemIdx = ElementCount::getScalable(Decompose.second);
11792 }
11793
11796 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11797 bool ExactlyVecRegSized =
11798 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11799 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11800
11801 // 1. If the Idx has been completely eliminated and this subvector's size is
11802 // a vector register or a multiple thereof, or the surrounding elements are
11803 // undef, then this is a subvector insert which naturally aligns to a vector
11804 // register. These can easily be handled using subregister manipulation.
11805 // 2. If the subvector isn't an exact multiple of a valid register group size,
11806 // then the insertion must preserve the undisturbed elements of the register.
11807 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11808 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11809 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11810 // of that LMUL=1 type back into the larger vector (resolving to another
11811 // subregister operation). See below for how our VSLIDEUP works. We go via a
11812 // LMUL=1 type to avoid allocating a large register group to hold our
11813 // subvector.
11814 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11815 if (SubVecVT.isFixedLengthVector()) {
11816 // We may get NoSubRegister if inserting at index 0 and the subvec
11817 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11818 if (SubRegIdx == RISCV::NoSubRegister) {
11819 assert(OrigIdx == 0);
11820 return Op;
11821 }
11822
11823 // Use a insert_subvector that will resolve to an insert subreg.
11824 assert(VLen);
11825 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11826 SDValue Insert =
11827 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11828 if (VecVT.isFixedLengthVector())
11829 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11830 return Insert;
11831 }
11832 return Op;
11833 }
11834
11835 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11836 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11837 // (in our case undisturbed). This means we can set up a subvector insertion
11838 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11839 // size of the subvector.
11840 MVT InterSubVT = ContainerVecVT;
11841 SDValue AlignedExtract = Vec;
11842 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11843 if (SubVecVT.isFixedLengthVector()) {
11844 assert(VLen);
11845 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11846 }
11847 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11848 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11849 // Extract a subvector equal to the nearest full vector register type. This
11850 // should resolve to a EXTRACT_SUBREG instruction.
11851 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11852 }
11853
11854 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11855
11856 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11857
11858 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11859 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11860
11861 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11863 if (Subtarget.expandVScale(EndIndex) ==
11864 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11866
11867 // If we're inserting into the lowest elements, use a tail undisturbed
11868 // vmv.v.v.
11869 if (RemIdx.isZero()) {
11870 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11871 SubVec, VL);
11872 } else {
11873 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11874
11875 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11876 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11877
11878 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11879 SlideupAmt, Mask, VL, Policy);
11880 }
11881
11882 // If required, insert this subvector back into the correct vector register.
11883 // This should resolve to an INSERT_SUBREG instruction.
11884 if (ContainerVecVT.bitsGT(InterSubVT))
11885 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11886
11887 if (VecVT.isFixedLengthVector())
11888 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11889
11890 // We might have bitcast from a mask type: cast back to the original type if
11891 // required.
11892 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11893}
11894
11895SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11896 SelectionDAG &DAG) const {
11897 SDValue Vec = Op.getOperand(0);
11898 MVT SubVecVT = Op.getSimpleValueType();
11899 MVT VecVT = Vec.getSimpleValueType();
11900
11901 SDLoc DL(Op);
11902 MVT XLenVT = Subtarget.getXLenVT();
11903 unsigned OrigIdx = Op.getConstantOperandVal(1);
11904 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11905
11906 // With an index of 0 this is a cast-like subvector, which can be performed
11907 // with subregister operations.
11908 if (OrigIdx == 0)
11909 return Op;
11910
11911 // We don't have the ability to slide mask vectors down indexed by their i1
11912 // elements; the smallest we can do is i8. Often we are able to bitcast to
11913 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11914 // from a scalable one, we might not necessarily have enough scalable
11915 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11916 if (SubVecVT.getVectorElementType() == MVT::i1) {
11917 if (VecVT.getVectorMinNumElements() >= 8 &&
11918 SubVecVT.getVectorMinNumElements() >= 8) {
11919 assert(OrigIdx % 8 == 0 && "Invalid index");
11920 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11921 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11922 "Unexpected mask vector lowering");
11923 OrigIdx /= 8;
11924 SubVecVT =
11925 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11926 SubVecVT.isScalableVector());
11927 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11928 VecVT.isScalableVector());
11929 Vec = DAG.getBitcast(VecVT, Vec);
11930 } else {
11931 // We can't slide this mask vector down, indexed by its i1 elements.
11932 // This poses a problem when we wish to extract a scalable vector which
11933 // can't be re-expressed as a larger type. Just choose the slow path and
11934 // extend to a larger type, then truncate back down.
11935 // TODO: We could probably improve this when extracting certain fixed
11936 // from fixed, where we can extract as i8 and shift the correct element
11937 // right to reach the desired subvector?
11938 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11939 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11940 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11941 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11942 Op.getOperand(1));
11943 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11944 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11945 }
11946 }
11947
11948 const auto VLen = Subtarget.getRealVLen();
11949
11950 // If the subvector vector is a fixed-length type and we don't know VLEN
11951 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11952 // don't know which register of a LMUL group contains the specific subvector
11953 // as we only know the minimum register size. Therefore we must slide the
11954 // vector group down the full amount.
11955 if (SubVecVT.isFixedLengthVector() && !VLen) {
11956 MVT ContainerVT = VecVT;
11957 if (VecVT.isFixedLengthVector()) {
11958 ContainerVT = getContainerForFixedLengthVector(VecVT);
11959 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11960 }
11961
11962 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11963 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
11964 if (auto ShrunkVT =
11965 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
11966 ContainerVT = *ShrunkVT;
11967 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11968 }
11969
11970 SDValue Mask =
11971 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11972 // Set the vector length to only the number of elements we care about. This
11973 // avoids sliding down elements we're going to discard straight away.
11974 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11975 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11976 SDValue Slidedown =
11977 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11978 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
11979 // Now we can use a cast-like subvector extract to get the result.
11980 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
11981 return DAG.getBitcast(Op.getValueType(), Slidedown);
11982 }
11983
11984 if (VecVT.isFixedLengthVector()) {
11985 VecVT = getContainerForFixedLengthVector(VecVT);
11986 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
11987 }
11988
11989 MVT ContainerSubVecVT = SubVecVT;
11990 if (SubVecVT.isFixedLengthVector())
11991 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11992
11993 unsigned SubRegIdx;
11994 ElementCount RemIdx;
11995 // extract_subvector scales the index by vscale if the subvector is scalable,
11996 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11997 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11998 if (SubVecVT.isFixedLengthVector()) {
11999 assert(VLen);
12000 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12001 auto Decompose =
12003 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12004 SubRegIdx = Decompose.first;
12005 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12006 (OrigIdx % Vscale));
12007 } else {
12008 auto Decompose =
12010 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12011 SubRegIdx = Decompose.first;
12012 RemIdx = ElementCount::getScalable(Decompose.second);
12013 }
12014
12015 // If the Idx has been completely eliminated then this is a subvector extract
12016 // which naturally aligns to a vector register. These can easily be handled
12017 // using subregister manipulation. We use an extract_subvector that will
12018 // resolve to an extract subreg.
12019 if (RemIdx.isZero()) {
12020 if (SubVecVT.isFixedLengthVector()) {
12021 assert(VLen);
12022 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12023 Vec =
12024 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12025 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12026 }
12027 return Op;
12028 }
12029
12030 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12031 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12032 // divide exactly.
12033 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12034 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12035
12036 // If the vector type is an LMUL-group type, extract a subvector equal to the
12037 // nearest full vector register type.
12038 MVT InterSubVT = VecVT;
12039 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12040 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12041 // we should have successfully decomposed the extract into a subregister.
12042 // We use an extract_subvector that will resolve to a subreg extract.
12043 assert(SubRegIdx != RISCV::NoSubRegister);
12044 (void)SubRegIdx;
12045 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12046 if (SubVecVT.isFixedLengthVector()) {
12047 assert(VLen);
12048 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12049 }
12050 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12051 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12052 }
12053
12054 // Slide this vector register down by the desired number of elements in order
12055 // to place the desired subvector starting at element 0.
12056 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12057 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12058 if (SubVecVT.isFixedLengthVector())
12059 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12060 SDValue Slidedown =
12061 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12062 Vec, SlidedownAmt, Mask, VL);
12063
12064 // Now the vector is in the right position, extract our final subvector. This
12065 // should resolve to a COPY.
12066 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12067
12068 // We might have bitcast from a mask type: cast back to the original type if
12069 // required.
12070 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12071}
12072
12073// Widen a vector's operands to i8, then truncate its results back to the
12074// original type, typically i1. All operand and result types must be the same.
12076 SelectionDAG &DAG) {
12077 MVT VT = N.getSimpleValueType();
12078 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12080 for (SDValue Op : N->ops()) {
12081 assert(Op.getSimpleValueType() == VT &&
12082 "Operands and result must be same type");
12083 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12084 }
12085
12086 unsigned NumVals = N->getNumValues();
12087
12089 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12090 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12091 SmallVector<SDValue, 4> TruncVals;
12092 for (unsigned I = 0; I < NumVals; I++) {
12093 TruncVals.push_back(
12094 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12095 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12096 }
12097
12098 if (TruncVals.size() > 1)
12099 return DAG.getMergeValues(TruncVals, DL);
12100 return TruncVals.front();
12101}
12102
12103SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12104 SelectionDAG &DAG) const {
12105 SDLoc DL(Op);
12106 MVT VecVT = Op.getSimpleValueType();
12107
12108 const unsigned Factor = Op->getNumValues();
12109 assert(Factor <= 8);
12110
12111 // 1 bit element vectors need to be widened to e8
12112 if (VecVT.getVectorElementType() == MVT::i1)
12113 return widenVectorOpsToi8(Op, DL, DAG);
12114
12115 // Convert to scalable vectors first.
12116 if (VecVT.isFixedLengthVector()) {
12117 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12118 SmallVector<SDValue, 8> Ops(Factor);
12119 for (unsigned i = 0U; i < Factor; ++i)
12120 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12121 Subtarget);
12122
12123 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12124 SDValue NewDeinterleave =
12125 DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs, Ops);
12126
12127 SmallVector<SDValue, 8> Res(Factor);
12128 for (unsigned i = 0U; i < Factor; ++i)
12129 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12130 DAG, Subtarget);
12131 return DAG.getMergeValues(Res, DL);
12132 }
12133
12134 // If concatenating would exceed LMUL=8, we need to split.
12135 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12136 (8 * RISCV::RVVBitsPerBlock)) {
12137 SmallVector<SDValue, 8> Ops(Factor * 2);
12138 for (unsigned i = 0; i != Factor; ++i) {
12139 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12140 Ops[i * 2] = OpLo;
12141 Ops[i * 2 + 1] = OpHi;
12142 }
12143
12144 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12145
12147 ArrayRef(Ops).slice(0, Factor));
12149 ArrayRef(Ops).slice(Factor, Factor));
12150
12151 SmallVector<SDValue, 8> Res(Factor);
12152 for (unsigned i = 0; i != Factor; ++i)
12153 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12154 Hi.getValue(i));
12155
12156 return DAG.getMergeValues(Res, DL);
12157 }
12158
12159 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12160 MVT VT = Op->getSimpleValueType(0);
12161 SDValue V1 = Op->getOperand(0);
12162 SDValue V2 = Op->getOperand(1);
12163
12164 // For fractional LMUL, check if we can use a higher LMUL
12165 // instruction to avoid a vslidedown.
12166 if (SDValue Src = foldConcatVector(V1, V2);
12167 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12168 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12169 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12170 // Freeze the source so we can increase its use count.
12171 Src = DAG.getFreeze(Src);
12172 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12173 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12174 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12175 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12176 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12177 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12178 return DAG.getMergeValues({Even, Odd}, DL);
12179 }
12180
12181 // Freeze the sources so we can increase their use count.
12182 V1 = DAG.getFreeze(V1);
12183 V2 = DAG.getFreeze(V2);
12184 SDValue Even =
12185 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12186 SDValue Odd =
12187 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12188 return DAG.getMergeValues({Even, Odd}, DL);
12189 }
12190
12191 SmallVector<SDValue, 8> Ops(Op->op_values());
12192
12193 // Concatenate the vectors as one vector to deinterleave
12194 MVT ConcatVT =
12197 PowerOf2Ceil(Factor)));
12198 if (Ops.size() < PowerOf2Ceil(Factor))
12199 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12200 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12201
12202 if (Factor == 2) {
12203 // We can deinterleave through vnsrl.wi if the element type is smaller than
12204 // ELEN
12205 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12206 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12207 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12208 return DAG.getMergeValues({Even, Odd}, DL);
12209 }
12210
12211 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12212 // possibly mask vector, then extract the required subvector. Doing this
12213 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12214 // creation to be rematerialized during register allocation to reduce
12215 // register pressure if needed.
12216
12217 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12218
12219 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12220 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12221 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12222
12223 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12224 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12225 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12226
12227 // vcompress the even and odd elements into two separate vectors
12228 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12229 EvenMask, DAG.getUNDEF(ConcatVT));
12230 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12231 OddMask, DAG.getUNDEF(ConcatVT));
12232
12233 // Extract the result half of the gather for even and odd
12234 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12235 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12236
12237 return DAG.getMergeValues({Even, Odd}, DL);
12238 }
12239
12240 // Store with unit-stride store and load it back with segmented load.
12241 MVT XLenVT = Subtarget.getXLenVT();
12242 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12243 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12244
12245 // Allocate a stack slot.
12246 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12248 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12249 auto &MF = DAG.getMachineFunction();
12250 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12251 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12252
12253 SDValue StoreOps[] = {DAG.getEntryNode(),
12254 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12255 Concat, StackPtr, VL};
12256
12257 SDValue Chain = DAG.getMemIntrinsicNode(
12258 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12259 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12261
12262 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12263 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12264 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12265 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12266 Intrinsic::riscv_vlseg8_mask};
12267
12268 SDValue LoadOps[] = {
12269 Chain,
12270 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12271 Passthru,
12272 StackPtr,
12273 Mask,
12274 VL,
12277 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12278
12279 unsigned Sz =
12280 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12281 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12282
12284 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12285 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12287
12288 SmallVector<SDValue, 8> Res(Factor);
12289
12290 for (unsigned i = 0U; i < Factor; ++i)
12291 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12292 DAG.getTargetConstant(i, DL, MVT::i32));
12293
12294 return DAG.getMergeValues(Res, DL);
12295}
12296
12297SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12298 SelectionDAG &DAG) const {
12299 SDLoc DL(Op);
12300 MVT VecVT = Op.getSimpleValueType();
12301
12302 const unsigned Factor = Op.getNumOperands();
12303 assert(Factor <= 8);
12304
12305 // i1 vectors need to be widened to i8
12306 if (VecVT.getVectorElementType() == MVT::i1)
12307 return widenVectorOpsToi8(Op, DL, DAG);
12308
12309 // Convert to scalable vectors first.
12310 if (VecVT.isFixedLengthVector()) {
12311 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12312 SmallVector<SDValue, 8> Ops(Factor);
12313 for (unsigned i = 0U; i < Factor; ++i)
12314 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12315 Subtarget);
12316
12317 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12318 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12319
12320 SmallVector<SDValue, 8> Res(Factor);
12321 for (unsigned i = 0U; i < Factor; ++i)
12322 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12323 Subtarget);
12324 return DAG.getMergeValues(Res, DL);
12325 }
12326
12327 MVT XLenVT = Subtarget.getXLenVT();
12328 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12329
12330 // If the VT is larger than LMUL=8, we need to split and reassemble.
12331 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12332 (8 * RISCV::RVVBitsPerBlock)) {
12333 SmallVector<SDValue, 8> Ops(Factor * 2);
12334 for (unsigned i = 0; i != Factor; ++i) {
12335 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12336 Ops[i] = OpLo;
12337 Ops[i + Factor] = OpHi;
12338 }
12339
12340 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12341
12342 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12343 ArrayRef(Ops).take_front(Factor)),
12345 ArrayRef(Ops).drop_front(Factor))};
12346
12347 SmallVector<SDValue, 8> Concats(Factor);
12348 for (unsigned i = 0; i != Factor; ++i) {
12349 unsigned IdxLo = 2 * i;
12350 unsigned IdxHi = 2 * i + 1;
12351 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12352 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12353 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12354 }
12355
12356 return DAG.getMergeValues(Concats, DL);
12357 }
12358
12359 SDValue Interleaved;
12360
12361 // Spill to the stack using a segment store for simplicity.
12362 if (Factor != 2) {
12363 EVT MemVT =
12365 VecVT.getVectorElementCount() * Factor);
12366
12367 // Allocate a stack slot.
12368 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12370 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12371 EVT PtrVT = StackPtr.getValueType();
12372 auto &MF = DAG.getMachineFunction();
12373 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12374 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12375
12376 static const Intrinsic::ID IntrIds[] = {
12377 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12378 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12379 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12380 Intrinsic::riscv_vsseg8_mask,
12381 };
12382
12383 unsigned Sz =
12384 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12385 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12386
12387 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12388 for (unsigned i = 0; i < Factor; i++)
12389 StoredVal =
12390 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12391 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12392
12393 SDValue Ops[] = {DAG.getEntryNode(),
12394 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12395 StoredVal,
12396 StackPtr,
12397 Mask,
12398 VL,
12400 DL, XLenVT)};
12401
12402 SDValue Chain = DAG.getMemIntrinsicNode(
12403 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12404 VecVT.getVectorElementType(), PtrInfo, Alignment,
12406
12407 SmallVector<SDValue, 8> Loads(Factor);
12408
12410 DAG.getVScale(DL, PtrVT,
12411 APInt(PtrVT.getFixedSizeInBits(),
12412 VecVT.getStoreSize().getKnownMinValue()));
12413 for (unsigned i = 0; i != Factor; ++i) {
12414 if (i != 0)
12415 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12416
12417 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12418 }
12419
12420 return DAG.getMergeValues(Loads, DL);
12421 }
12422
12423 // Use ri.vzip2{a,b} if available
12424 // TODO: Figure out the best lowering for the spread variants
12425 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12426 !Op.getOperand(1).isUndef()) {
12427 // Freeze the sources so we can increase their use count.
12428 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12429 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12430 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12431 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12432 return DAG.getMergeValues({Lo, Hi}, DL);
12433 }
12434
12435 // If the element type is smaller than ELEN, then we can interleave with
12436 // vwaddu.vv and vwmaccu.vx
12437 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12438 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12439 DAG, Subtarget);
12440 } else {
12441 // Otherwise, fallback to using vrgathere16.vv
12442 MVT ConcatVT =
12445 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12446 Op.getOperand(0), Op.getOperand(1));
12447
12448 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12449
12450 // 0 1 2 3 4 5 6 7 ...
12451 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12452
12453 // 1 1 1 1 1 1 1 1 ...
12454 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12455
12456 // 1 0 1 0 1 0 1 0 ...
12457 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12458 OddMask = DAG.getSetCC(
12459 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12460 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12462
12463 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12464
12465 // Build up the index vector for interleaving the concatenated vector
12466 // 0 0 1 1 2 2 3 3 ...
12467 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12468 // 0 n 1 n+1 2 n+2 3 n+3 ...
12469 Idx =
12470 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12471
12472 // Then perform the interleave
12473 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12474 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12475 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12476 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12477 }
12478
12479 // Extract the two halves from the interleaved result
12480 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12481 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12482 VecVT.getVectorMinNumElements());
12483
12484 return DAG.getMergeValues({Lo, Hi}, DL);
12485}
12486
12487// Lower step_vector to the vid instruction. Any non-identity step value must
12488// be accounted for my manual expansion.
12489SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12490 SelectionDAG &DAG) const {
12491 SDLoc DL(Op);
12492 MVT VT = Op.getSimpleValueType();
12493 assert(VT.isScalableVector() && "Expected scalable vector");
12494 MVT XLenVT = Subtarget.getXLenVT();
12495 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12496 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12497 uint64_t StepValImm = Op.getConstantOperandVal(0);
12498 if (StepValImm != 1) {
12499 if (isPowerOf2_64(StepValImm)) {
12500 SDValue StepVal =
12501 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12502 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12503 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12504 } else {
12505 SDValue StepVal = lowerScalarSplat(
12506 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12507 VL, VT, DL, DAG, Subtarget);
12508 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12509 }
12510 }
12511 return StepVec;
12512}
12513
12514// Implement vector_reverse using vrgather.vv with indices determined by
12515// subtracting the id of each element from (VLMAX-1). This will convert
12516// the indices like so:
12517// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12518// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12519SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12520 SelectionDAG &DAG) const {
12521 SDLoc DL(Op);
12522 MVT VecVT = Op.getSimpleValueType();
12523 if (VecVT.getVectorElementType() == MVT::i1) {
12524 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12525 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12526 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12527 return DAG.getSetCC(DL, VecVT, Op2,
12528 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12529 }
12530
12531 MVT ContainerVT = VecVT;
12532 SDValue Vec = Op.getOperand(0);
12533 if (VecVT.isFixedLengthVector()) {
12534 ContainerVT = getContainerForFixedLengthVector(VecVT);
12535 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12536 }
12537
12538 MVT XLenVT = Subtarget.getXLenVT();
12539 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12540
12541 // On some uarchs vrgather.vv will read from every input register for each
12542 // output register, regardless of the indices. However to reverse a vector
12543 // each output register only needs to read from one register. So decompose it
12544 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12545 // O(LMUL^2).
12546 //
12547 // vsetvli a1, zero, e64, m4, ta, ma
12548 // vrgatherei16.vv v12, v8, v16
12549 // ->
12550 // vsetvli a1, zero, e64, m1, ta, ma
12551 // vrgather.vv v15, v8, v16
12552 // vrgather.vv v14, v9, v16
12553 // vrgather.vv v13, v10, v16
12554 // vrgather.vv v12, v11, v16
12555 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12556 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12557 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12558 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12559 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12560 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12561
12562 // Fixed length vectors might not fit exactly into their container, and so
12563 // leave a gap in the front of the vector after being reversed. Slide this
12564 // away.
12565 //
12566 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12567 // 0 1 2 3 x x x x <- reverse
12568 // x x x x 0 1 2 3 <- vslidedown.vx
12569 if (VecVT.isFixedLengthVector()) {
12570 SDValue Offset = DAG.getNode(
12571 ISD::SUB, DL, XLenVT,
12572 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12573 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12574 Concat =
12575 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12576 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12577 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12578 }
12579 return Concat;
12580 }
12581
12582 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12583 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12584 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12585 unsigned MaxVLMAX =
12586 VecVT.isFixedLengthVector()
12587 ? VecVT.getVectorNumElements()
12588 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12589
12590 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12591 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12592
12593 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12594 // to use vrgatherei16.vv.
12595 if (MaxVLMAX > 256 && EltSize == 8) {
12596 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12597 // Reverse each half, then reassemble them in reverse order.
12598 // NOTE: It's also possible that after splitting that VLMAX no longer
12599 // requires vrgatherei16.vv.
12600 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12601 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12602 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12603 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12604 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12605 // Reassemble the low and high pieces reversed.
12606 // FIXME: This is a CONCAT_VECTORS.
12607 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12608 return DAG.getInsertSubvector(DL, Res, Lo,
12609 LoVT.getVectorMinNumElements());
12610 }
12611
12612 // Just promote the int type to i16 which will double the LMUL.
12613 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12614 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12615 }
12616
12617 // At LMUL > 1, do the index computation in 16 bits to reduce register
12618 // pressure.
12619 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12620 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12621 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12622 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12623 IntVT = IntVT.changeVectorElementType(MVT::i16);
12624 }
12625
12626 // Calculate VLMAX-1 for the desired SEW.
12627 SDValue VLMinus1 = DAG.getNode(
12628 ISD::SUB, DL, XLenVT,
12629 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12630 DAG.getConstant(1, DL, XLenVT));
12631
12632 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12633 bool IsRV32E64 =
12634 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12635 SDValue SplatVL;
12636 if (!IsRV32E64)
12637 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12638 else
12639 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12640 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12641
12642 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12643 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12644 DAG.getUNDEF(IntVT), Mask, VL);
12645
12646 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12647 DAG.getUNDEF(ContainerVT), Mask, VL);
12648 if (VecVT.isFixedLengthVector())
12649 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12650 return Gather;
12651}
12652
12653SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12654 SelectionDAG &DAG) const {
12655 SDLoc DL(Op);
12656 SDValue V1 = Op.getOperand(0);
12657 SDValue V2 = Op.getOperand(1);
12658 MVT XLenVT = Subtarget.getXLenVT();
12659 MVT VecVT = Op.getSimpleValueType();
12660
12661 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12662
12663 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12664 SDValue DownOffset, UpOffset;
12665 if (ImmValue >= 0) {
12666 // The operand is a TargetConstant, we need to rebuild it as a regular
12667 // constant.
12668 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12669 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12670 } else {
12671 // The operand is a TargetConstant, we need to rebuild it as a regular
12672 // constant rather than negating the original operand.
12673 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12674 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12675 }
12676
12677 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12678
12679 SDValue SlideDown = getVSlidedown(
12680 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12681 Subtarget.hasVLDependentLatency() ? UpOffset
12682 : DAG.getRegister(RISCV::X0, XLenVT));
12683 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12684 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12686}
12687
12688SDValue
12689RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12690 SelectionDAG &DAG) const {
12691 SDLoc DL(Op);
12692 auto *Load = cast<LoadSDNode>(Op);
12693
12695 Load->getMemoryVT(),
12696 *Load->getMemOperand()) &&
12697 "Expecting a correctly-aligned load");
12698
12699 MVT VT = Op.getSimpleValueType();
12700 MVT XLenVT = Subtarget.getXLenVT();
12701 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12702
12703 // If we know the exact VLEN and our fixed length vector completely fills
12704 // the container, use a whole register load instead.
12705 const auto [MinVLMAX, MaxVLMAX] =
12706 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12707 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12708 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12709 MachineMemOperand *MMO = Load->getMemOperand();
12710 SDValue NewLoad =
12711 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12712 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12713 MMO->getAAInfo(), MMO->getRanges());
12714 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12715 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12716 }
12717
12718 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12719
12720 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12721 SDValue IntID = DAG.getTargetConstant(
12722 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12723 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12724 if (!IsMaskOp)
12725 Ops.push_back(DAG.getUNDEF(ContainerVT));
12726 Ops.push_back(Load->getBasePtr());
12727 Ops.push_back(VL);
12728 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12729 SDValue NewLoad =
12731 Load->getMemoryVT(), Load->getMemOperand());
12732
12733 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12734 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12735}
12736
12737SDValue
12738RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12739 SelectionDAG &DAG) const {
12740 SDLoc DL(Op);
12741 auto *Store = cast<StoreSDNode>(Op);
12742
12744 Store->getMemoryVT(),
12745 *Store->getMemOperand()) &&
12746 "Expecting a correctly-aligned store");
12747
12748 SDValue StoreVal = Store->getValue();
12749 MVT VT = StoreVal.getSimpleValueType();
12750 MVT XLenVT = Subtarget.getXLenVT();
12751
12752 // If the size less than a byte, we need to pad with zeros to make a byte.
12753 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12754 VT = MVT::v8i1;
12755 StoreVal =
12756 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12757 }
12758
12759 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12760
12761 SDValue NewValue =
12762 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12763
12764 // If we know the exact VLEN and our fixed length vector completely fills
12765 // the container, use a whole register store instead.
12766 const auto [MinVLMAX, MaxVLMAX] =
12767 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12768 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12769 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12770 MachineMemOperand *MMO = Store->getMemOperand();
12771 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12772 MMO->getPointerInfo(), MMO->getBaseAlign(),
12773 MMO->getFlags(), MMO->getAAInfo());
12774 }
12775
12776 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12777
12778 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12779 SDValue IntID = DAG.getTargetConstant(
12780 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12781 return DAG.getMemIntrinsicNode(
12782 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12783 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12784 Store->getMemoryVT(), Store->getMemOperand());
12785}
12786
12787SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12788 SelectionDAG &DAG) const {
12789 SDLoc DL(Op);
12790 MVT VT = Op.getSimpleValueType();
12791
12792 const auto *MemSD = cast<MemSDNode>(Op);
12793 EVT MemVT = MemSD->getMemoryVT();
12794 MachineMemOperand *MMO = MemSD->getMemOperand();
12795 SDValue Chain = MemSD->getChain();
12796 SDValue BasePtr = MemSD->getBasePtr();
12797
12798 SDValue Mask, PassThru, VL;
12799 bool IsExpandingLoad = false;
12800 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12801 Mask = VPLoad->getMask();
12802 PassThru = DAG.getUNDEF(VT);
12803 VL = VPLoad->getVectorLength();
12804 } else {
12805 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12806 Mask = MLoad->getMask();
12807 PassThru = MLoad->getPassThru();
12808 IsExpandingLoad = MLoad->isExpandingLoad();
12809 }
12810
12811 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12812
12813 MVT XLenVT = Subtarget.getXLenVT();
12814
12815 MVT ContainerVT = VT;
12816 if (VT.isFixedLengthVector()) {
12817 ContainerVT = getContainerForFixedLengthVector(VT);
12818 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12819 if (!IsUnmasked) {
12820 MVT MaskVT = getMaskTypeFor(ContainerVT);
12821 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12822 }
12823 }
12824
12825 if (!VL)
12826 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12827
12828 SDValue ExpandingVL;
12829 if (!IsUnmasked && IsExpandingLoad) {
12830 ExpandingVL = VL;
12831 VL =
12832 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12833 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12834 }
12835
12836 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12837 : Intrinsic::riscv_vle_mask;
12838 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12839 if (IntID == Intrinsic::riscv_vle)
12840 Ops.push_back(DAG.getUNDEF(ContainerVT));
12841 else
12842 Ops.push_back(PassThru);
12843 Ops.push_back(BasePtr);
12844 if (IntID == Intrinsic::riscv_vle_mask)
12845 Ops.push_back(Mask);
12846 Ops.push_back(VL);
12847 if (IntID == Intrinsic::riscv_vle_mask)
12848 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12849
12850 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12851
12852 SDValue Result =
12853 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12854 Chain = Result.getValue(1);
12855 if (ExpandingVL) {
12856 MVT IndexVT = ContainerVT;
12857 if (ContainerVT.isFloatingPoint())
12858 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12859
12860 MVT IndexEltVT = IndexVT.getVectorElementType();
12861 bool UseVRGATHEREI16 = false;
12862 // If index vector is an i8 vector and the element count exceeds 256, we
12863 // should change the element type of index vector to i16 to avoid
12864 // overflow.
12865 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12866 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12867 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12868 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12869 UseVRGATHEREI16 = true;
12870 }
12871
12872 SDValue Iota =
12873 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12874 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12875 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12876 Result =
12877 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12878 : RISCVISD::VRGATHER_VV_VL,
12879 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12880 }
12881
12882 if (VT.isFixedLengthVector())
12883 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12884
12885 return DAG.getMergeValues({Result, Chain}, DL);
12886}
12887
12888SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12889 SDLoc DL(Op);
12890 MVT VT = Op->getSimpleValueType(0);
12891
12892 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12893 EVT MemVT = VPLoadFF->getMemoryVT();
12894 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12895 SDValue Chain = VPLoadFF->getChain();
12896 SDValue BasePtr = VPLoadFF->getBasePtr();
12897
12898 SDValue Mask = VPLoadFF->getMask();
12899 SDValue VL = VPLoadFF->getVectorLength();
12900
12901 MVT XLenVT = Subtarget.getXLenVT();
12902
12903 MVT ContainerVT = VT;
12904 if (VT.isFixedLengthVector()) {
12905 ContainerVT = getContainerForFixedLengthVector(VT);
12906 MVT MaskVT = getMaskTypeFor(ContainerVT);
12907 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12908 }
12909
12910 unsigned IntID = Intrinsic::riscv_vleff_mask;
12911 SDValue Ops[] = {
12912 Chain,
12913 DAG.getTargetConstant(IntID, DL, XLenVT),
12914 DAG.getUNDEF(ContainerVT),
12915 BasePtr,
12916 Mask,
12917 VL,
12919
12920 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12921
12922 SDValue Result =
12923 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12924 SDValue OutVL = Result.getValue(1);
12925 Chain = Result.getValue(2);
12926
12927 if (VT.isFixedLengthVector())
12928 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12929
12930 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12931}
12932
12933SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12934 SelectionDAG &DAG) const {
12935 SDLoc DL(Op);
12936
12937 const auto *MemSD = cast<MemSDNode>(Op);
12938 EVT MemVT = MemSD->getMemoryVT();
12939 MachineMemOperand *MMO = MemSD->getMemOperand();
12940 SDValue Chain = MemSD->getChain();
12941 SDValue BasePtr = MemSD->getBasePtr();
12942 SDValue Val, Mask, VL;
12943
12944 bool IsCompressingStore = false;
12945 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12946 Val = VPStore->getValue();
12947 Mask = VPStore->getMask();
12948 VL = VPStore->getVectorLength();
12949 } else {
12950 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12951 Val = MStore->getValue();
12952 Mask = MStore->getMask();
12953 IsCompressingStore = MStore->isCompressingStore();
12954 }
12955
12956 bool IsUnmasked =
12957 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12958
12959 MVT VT = Val.getSimpleValueType();
12960 MVT XLenVT = Subtarget.getXLenVT();
12961
12962 MVT ContainerVT = VT;
12963 if (VT.isFixedLengthVector()) {
12964 ContainerVT = getContainerForFixedLengthVector(VT);
12965
12966 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12967 if (!IsUnmasked || IsCompressingStore) {
12968 MVT MaskVT = getMaskTypeFor(ContainerVT);
12969 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12970 }
12971 }
12972
12973 if (!VL)
12974 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12975
12976 if (IsCompressingStore) {
12977 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12978 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
12979 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
12980 VL =
12981 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12982 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12983 }
12984
12985 unsigned IntID =
12986 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
12987 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12988 Ops.push_back(Val);
12989 Ops.push_back(BasePtr);
12990 if (!IsUnmasked)
12991 Ops.push_back(Mask);
12992 Ops.push_back(VL);
12993
12995 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12996}
12997
12998SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
12999 SelectionDAG &DAG) const {
13000 SDLoc DL(Op);
13001 SDValue Val = Op.getOperand(0);
13002 SDValue Mask = Op.getOperand(1);
13003 SDValue Passthru = Op.getOperand(2);
13004
13005 MVT VT = Val.getSimpleValueType();
13006 MVT XLenVT = Subtarget.getXLenVT();
13007 MVT ContainerVT = VT;
13008 if (VT.isFixedLengthVector()) {
13009 ContainerVT = getContainerForFixedLengthVector(VT);
13010 MVT MaskVT = getMaskTypeFor(ContainerVT);
13011 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13012 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13013 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13014 }
13015
13016 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13017 SDValue Res =
13018 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13019 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13020 Passthru, Val, Mask, VL);
13021
13022 if (VT.isFixedLengthVector())
13023 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13024
13025 return Res;
13026}
13027
13028SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13029 SelectionDAG &DAG) const {
13030 unsigned Opc = Op.getOpcode();
13031 SDLoc DL(Op);
13032 SDValue Chain = Op.getOperand(0);
13033 SDValue Op1 = Op.getOperand(1);
13034 SDValue Op2 = Op.getOperand(2);
13035 SDValue CC = Op.getOperand(3);
13036 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13037 MVT VT = Op.getSimpleValueType();
13038 MVT InVT = Op1.getSimpleValueType();
13039
13040 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13041 // condition code.
13042 if (Opc == ISD::STRICT_FSETCCS) {
13043 // Expand strict_fsetccs(x, oeq) to
13044 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13045 SDVTList VTList = Op->getVTList();
13046 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13047 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13048 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13049 Op2, OLECCVal);
13050 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13051 Op1, OLECCVal);
13052 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13053 Tmp1.getValue(1), Tmp2.getValue(1));
13054 // Tmp1 and Tmp2 might be the same node.
13055 if (Tmp1 != Tmp2)
13056 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13057 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13058 }
13059
13060 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13061 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13062 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13063 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13064 Op2, OEQCCVal);
13065 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13066 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13067 }
13068 }
13069
13070 MVT ContainerInVT = InVT;
13071 if (InVT.isFixedLengthVector()) {
13072 ContainerInVT = getContainerForFixedLengthVector(InVT);
13073 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13074 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13075 }
13076 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13077
13078 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13079
13080 SDValue Res;
13081 if (Opc == ISD::STRICT_FSETCC &&
13082 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13083 CCVal == ISD::SETOLE)) {
13084 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13085 // active when both input elements are ordered.
13086 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13087 SDValue OrderMask1 = DAG.getNode(
13088 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13089 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13090 True, VL});
13091 SDValue OrderMask2 = DAG.getNode(
13092 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13093 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13094 True, VL});
13095 Mask =
13096 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13097 // Use Mask as the passthru operand to let the result be 0 if either of the
13098 // inputs is unordered.
13099 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13100 DAG.getVTList(MaskVT, MVT::Other),
13101 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13102 } else {
13103 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13104 : RISCVISD::STRICT_FSETCCS_VL;
13105 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13106 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13107 }
13108
13109 if (VT.isFixedLengthVector()) {
13110 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13111 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13112 }
13113 return Res;
13114}
13115
13116// Lower vector ABS to smax(X, sub(0, X)).
13117SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13118 SDLoc DL(Op);
13119 MVT VT = Op.getSimpleValueType();
13120 SDValue X = Op.getOperand(0);
13121
13122 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13123 "Unexpected type for ISD::ABS");
13124
13125 MVT ContainerVT = VT;
13126 if (VT.isFixedLengthVector()) {
13127 ContainerVT = getContainerForFixedLengthVector(VT);
13128 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13129 }
13130
13131 SDValue Mask, VL;
13132 if (Op->getOpcode() == ISD::VP_ABS) {
13133 Mask = Op->getOperand(1);
13134 if (VT.isFixedLengthVector())
13135 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13136 Subtarget);
13137 VL = Op->getOperand(2);
13138 } else
13139 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13140
13141 SDValue SplatZero = DAG.getNode(
13142 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13143 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13144 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13145 DAG.getUNDEF(ContainerVT), Mask, VL);
13146 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13147 DAG.getUNDEF(ContainerVT), Mask, VL);
13148
13149 if (VT.isFixedLengthVector())
13150 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13151 return Max;
13152}
13153
13154SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13155 SelectionDAG &DAG) const {
13156 const auto &TSInfo =
13157 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13158
13159 unsigned NewOpc = getRISCVVLOp(Op);
13160 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13161 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13162
13163 MVT VT = Op.getSimpleValueType();
13164 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13165
13166 // Create list of operands by converting existing ones to scalable types.
13168 for (const SDValue &V : Op->op_values()) {
13169 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13170
13171 // Pass through non-vector operands.
13172 if (!V.getValueType().isVector()) {
13173 Ops.push_back(V);
13174 continue;
13175 }
13176
13177 // "cast" fixed length vector to a scalable vector.
13178 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13179 "Only fixed length vectors are supported!");
13180 MVT VContainerVT = ContainerVT.changeVectorElementType(
13181 V.getSimpleValueType().getVectorElementType());
13182 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13183 }
13184
13185 SDLoc DL(Op);
13186 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13187 if (HasPassthruOp)
13188 Ops.push_back(DAG.getUNDEF(ContainerVT));
13189 if (HasMask)
13190 Ops.push_back(Mask);
13191 Ops.push_back(VL);
13192
13193 // StrictFP operations have two result values. Their lowered result should
13194 // have same result count.
13195 if (Op->isStrictFPOpcode()) {
13196 SDValue ScalableRes =
13197 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13198 Op->getFlags());
13199 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13200 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13201 }
13202
13203 SDValue ScalableRes =
13204 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13205 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13206}
13207
13208// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13209// * Operands of each node are assumed to be in the same order.
13210// * The EVL operand is promoted from i32 to i64 on RV64.
13211// * Fixed-length vectors are converted to their scalable-vector container
13212// types.
13213SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13214 const auto &TSInfo =
13215 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13216
13217 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13218 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13219
13220 SDLoc DL(Op);
13221 MVT VT = Op.getSimpleValueType();
13223
13224 MVT ContainerVT = VT;
13225 if (VT.isFixedLengthVector())
13226 ContainerVT = getContainerForFixedLengthVector(VT);
13227
13228 for (const auto &OpIdx : enumerate(Op->ops())) {
13229 SDValue V = OpIdx.value();
13230 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13231 // Add dummy passthru value before the mask. Or if there isn't a mask,
13232 // before EVL.
13233 if (HasPassthruOp) {
13234 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13235 if (MaskIdx) {
13236 if (*MaskIdx == OpIdx.index())
13237 Ops.push_back(DAG.getUNDEF(ContainerVT));
13238 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13239 OpIdx.index()) {
13240 if (Op.getOpcode() == ISD::VP_MERGE) {
13241 // For VP_MERGE, copy the false operand instead of an undef value.
13242 Ops.push_back(Ops.back());
13243 } else {
13244 assert(Op.getOpcode() == ISD::VP_SELECT);
13245 // For VP_SELECT, add an undef value.
13246 Ops.push_back(DAG.getUNDEF(ContainerVT));
13247 }
13248 }
13249 }
13250 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13251 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13252 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13254 Subtarget.getXLenVT()));
13255 // Pass through operands which aren't fixed-length vectors.
13256 if (!V.getValueType().isFixedLengthVector()) {
13257 Ops.push_back(V);
13258 continue;
13259 }
13260 // "cast" fixed length vector to a scalable vector.
13261 MVT OpVT = V.getSimpleValueType();
13262 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13263 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13264 "Only fixed length vectors are supported!");
13265 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13266 }
13267
13268 if (!VT.isFixedLengthVector())
13269 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13270
13271 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13272
13273 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13274}
13275
13276SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13277 SelectionDAG &DAG) const {
13278 SDLoc DL(Op);
13279 MVT VT = Op.getSimpleValueType();
13280
13281 SDValue Src = Op.getOperand(0);
13282 // NOTE: Mask is dropped.
13283 SDValue VL = Op.getOperand(2);
13284
13285 MVT ContainerVT = VT;
13286 if (VT.isFixedLengthVector()) {
13287 ContainerVT = getContainerForFixedLengthVector(VT);
13288 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13289 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13290 }
13291
13292 MVT XLenVT = Subtarget.getXLenVT();
13293 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13294 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13295 DAG.getUNDEF(ContainerVT), Zero, VL);
13296
13297 SDValue SplatValue = DAG.getSignedConstant(
13298 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13299 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13300 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13301
13302 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13303 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13304 if (!VT.isFixedLengthVector())
13305 return Result;
13306 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13307}
13308
13309SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13310 SelectionDAG &DAG) const {
13311 SDLoc DL(Op);
13312 MVT VT = Op.getSimpleValueType();
13313
13314 SDValue Op1 = Op.getOperand(0);
13315 SDValue Op2 = Op.getOperand(1);
13316 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13317 // NOTE: Mask is dropped.
13318 SDValue VL = Op.getOperand(4);
13319
13320 MVT ContainerVT = VT;
13321 if (VT.isFixedLengthVector()) {
13322 ContainerVT = getContainerForFixedLengthVector(VT);
13323 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13324 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13325 }
13326
13328 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13329
13330 switch (Condition) {
13331 default:
13332 break;
13333 // X != Y --> (X^Y)
13334 case ISD::SETNE:
13335 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13336 break;
13337 // X == Y --> ~(X^Y)
13338 case ISD::SETEQ: {
13339 SDValue Temp =
13340 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13341 Result =
13342 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13343 break;
13344 }
13345 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13346 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13347 case ISD::SETGT:
13348 case ISD::SETULT: {
13349 SDValue Temp =
13350 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13351 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13352 break;
13353 }
13354 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13355 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13356 case ISD::SETLT:
13357 case ISD::SETUGT: {
13358 SDValue Temp =
13359 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13360 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13361 break;
13362 }
13363 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13364 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13365 case ISD::SETGE:
13366 case ISD::SETULE: {
13367 SDValue Temp =
13368 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13369 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13370 break;
13371 }
13372 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13373 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13374 case ISD::SETLE:
13375 case ISD::SETUGE: {
13376 SDValue Temp =
13377 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13378 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13379 break;
13380 }
13381 }
13382
13383 if (!VT.isFixedLengthVector())
13384 return Result;
13385 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13386}
13387
13388// Lower Floating-Point/Integer Type-Convert VP SDNodes
13389SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13390 SelectionDAG &DAG) const {
13391 SDLoc DL(Op);
13392
13393 SDValue Src = Op.getOperand(0);
13394 SDValue Mask = Op.getOperand(1);
13395 SDValue VL = Op.getOperand(2);
13396 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13397
13398 MVT DstVT = Op.getSimpleValueType();
13399 MVT SrcVT = Src.getSimpleValueType();
13400 if (DstVT.isFixedLengthVector()) {
13401 DstVT = getContainerForFixedLengthVector(DstVT);
13402 SrcVT = getContainerForFixedLengthVector(SrcVT);
13403 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13404 MVT MaskVT = getMaskTypeFor(DstVT);
13405 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13406 }
13407
13408 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13409 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13410
13412 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13413 if (SrcVT.isInteger()) {
13414 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13415
13416 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13417 ? RISCVISD::VSEXT_VL
13418 : RISCVISD::VZEXT_VL;
13419
13420 // Do we need to do any pre-widening before converting?
13421 if (SrcEltSize == 1) {
13422 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13423 MVT XLenVT = Subtarget.getXLenVT();
13424 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13425 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13426 DAG.getUNDEF(IntVT), Zero, VL);
13427 SDValue One = DAG.getSignedConstant(
13428 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13429 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13430 DAG.getUNDEF(IntVT), One, VL);
13431 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13432 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13433 } else if (DstEltSize > (2 * SrcEltSize)) {
13434 // Widen before converting.
13435 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13436 DstVT.getVectorElementCount());
13437 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13438 }
13439
13440 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13441 } else {
13442 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13443 "Wrong input/output vector types");
13444
13445 // Convert f16 to f32 then convert f32 to i64.
13446 if (DstEltSize > (2 * SrcEltSize)) {
13447 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13448 MVT InterimFVT =
13449 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13450 Src =
13451 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13452 }
13453
13454 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13455 }
13456 } else { // Narrowing + Conversion
13457 if (SrcVT.isInteger()) {
13458 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13459 // First do a narrowing convert to an FP type half the size, then round
13460 // the FP type to a small FP type if needed.
13461
13462 MVT InterimFVT = DstVT;
13463 if (SrcEltSize > (2 * DstEltSize)) {
13464 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13465 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13466 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13467 }
13468
13469 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13470
13471 if (InterimFVT != DstVT) {
13472 Src = Result;
13473 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13474 }
13475 } else {
13476 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13477 "Wrong input/output vector types");
13478 // First do a narrowing conversion to an integer half the size, then
13479 // truncate if needed.
13480
13481 if (DstEltSize == 1) {
13482 // First convert to the same size integer, then convert to mask using
13483 // setcc.
13484 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13485 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13486 DstVT.getVectorElementCount());
13487 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13488
13489 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13490 // otherwise the conversion was undefined.
13491 MVT XLenVT = Subtarget.getXLenVT();
13492 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13493 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13494 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13495 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13496 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13497 DAG.getUNDEF(DstVT), Mask, VL});
13498 } else {
13499 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13500 DstVT.getVectorElementCount());
13501
13502 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13503
13504 while (InterimIVT != DstVT) {
13505 SrcEltSize /= 2;
13506 Src = Result;
13507 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13508 DstVT.getVectorElementCount());
13509 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13510 Src, Mask, VL);
13511 }
13512 }
13513 }
13514 }
13515
13516 MVT VT = Op.getSimpleValueType();
13517 if (!VT.isFixedLengthVector())
13518 return Result;
13519 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13520}
13521
13522SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13523 SelectionDAG &DAG) const {
13524 SDLoc DL(Op);
13525 MVT VT = Op.getSimpleValueType();
13526 MVT XLenVT = Subtarget.getXLenVT();
13527
13528 SDValue Mask = Op.getOperand(0);
13529 SDValue TrueVal = Op.getOperand(1);
13530 SDValue FalseVal = Op.getOperand(2);
13531 SDValue VL = Op.getOperand(3);
13532
13533 // Use default legalization if a vector of EVL type would be legal.
13534 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13536 if (isTypeLegal(EVLVecVT))
13537 return SDValue();
13538
13539 MVT ContainerVT = VT;
13540 if (VT.isFixedLengthVector()) {
13541 ContainerVT = getContainerForFixedLengthVector(VT);
13542 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13543 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13544 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13545 }
13546
13547 // Promote to a vector of i8.
13548 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13549
13550 // Promote TrueVal and FalseVal using VLMax.
13551 // FIXME: Is there a better way to do this?
13552 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13553 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13554 DAG.getUNDEF(PromotedVT),
13555 DAG.getConstant(1, DL, XLenVT), VLMax);
13556 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13557 DAG.getUNDEF(PromotedVT),
13558 DAG.getConstant(0, DL, XLenVT), VLMax);
13559 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13560 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13561 // Any element past VL uses FalseVal, so use VLMax
13562 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13563 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13564
13565 // VP_MERGE the two promoted values.
13566 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13567 TrueVal, FalseVal, FalseVal, VL);
13568
13569 // Convert back to mask.
13570 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13571 SDValue Result = DAG.getNode(
13572 RISCVISD::SETCC_VL, DL, ContainerVT,
13573 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13574 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13575
13576 if (VT.isFixedLengthVector())
13577 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13578 return Result;
13579}
13580
13581SDValue
13582RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13583 SelectionDAG &DAG) const {
13584 using namespace SDPatternMatch;
13585
13586 SDLoc DL(Op);
13587
13588 SDValue Op1 = Op.getOperand(0);
13589 SDValue Op2 = Op.getOperand(1);
13590 SDValue Offset = Op.getOperand(2);
13591 SDValue Mask = Op.getOperand(3);
13592 SDValue EVL1 = Op.getOperand(4);
13593 SDValue EVL2 = Op.getOperand(5);
13594
13595 const MVT XLenVT = Subtarget.getXLenVT();
13596 MVT VT = Op.getSimpleValueType();
13597 MVT ContainerVT = VT;
13598 if (VT.isFixedLengthVector()) {
13599 ContainerVT = getContainerForFixedLengthVector(VT);
13600 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13601 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13602 MVT MaskVT = getMaskTypeFor(ContainerVT);
13603 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13604 }
13605
13606 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13607 if (IsMaskVector) {
13608 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13609
13610 // Expand input operands
13611 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13612 DAG.getUNDEF(ContainerVT),
13613 DAG.getConstant(1, DL, XLenVT), EVL1);
13614 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13615 DAG.getUNDEF(ContainerVT),
13616 DAG.getConstant(0, DL, XLenVT), EVL1);
13617 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13618 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13619
13620 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13621 DAG.getUNDEF(ContainerVT),
13622 DAG.getConstant(1, DL, XLenVT), EVL2);
13623 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13624 DAG.getUNDEF(ContainerVT),
13625 DAG.getConstant(0, DL, XLenVT), EVL2);
13626 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13627 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13628 }
13629
13630 auto getVectorFirstEle = [](SDValue Vec) {
13631 SDValue FirstEle;
13632 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13633 return FirstEle;
13634
13635 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13637 return Vec.getOperand(0);
13638
13639 return SDValue();
13640 };
13641
13642 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13643 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13644 MVT EltVT = ContainerVT.getVectorElementType();
13646 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13647 EltVT == MVT::bf16) {
13648 EltVT = EltVT.changeTypeToInteger();
13649 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13650 Op2 = DAG.getBitcast(ContainerVT, Op2);
13651 FirstEle =
13652 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13653 }
13654 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13655 : RISCVISD::VSLIDE1UP_VL,
13656 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13657 FirstEle, Mask, EVL2);
13658 Result = DAG.getBitcast(
13660 Result);
13661 return VT.isFixedLengthVector()
13662 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13663 : Result;
13664 }
13665
13666 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13667 SDValue DownOffset, UpOffset;
13668 if (ImmValue >= 0) {
13669 // The operand is a TargetConstant, we need to rebuild it as a regular
13670 // constant.
13671 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13672 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13673 } else {
13674 // The operand is a TargetConstant, we need to rebuild it as a regular
13675 // constant rather than negating the original operand.
13676 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13677 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13678 }
13679
13680 if (ImmValue != 0)
13681 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13682 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13683 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13684 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13685 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13686
13687 if (IsMaskVector) {
13688 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13689 Result = DAG.getNode(
13690 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13691 {Result, DAG.getConstant(0, DL, ContainerVT),
13692 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13693 Mask, EVL2});
13694 }
13695
13696 if (!VT.isFixedLengthVector())
13697 return Result;
13698 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13699}
13700
13701SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13702 SelectionDAG &DAG) const {
13703 SDLoc DL(Op);
13704 SDValue Val = Op.getOperand(0);
13705 SDValue Mask = Op.getOperand(1);
13706 SDValue VL = Op.getOperand(2);
13707 MVT VT = Op.getSimpleValueType();
13708
13709 MVT ContainerVT = VT;
13710 if (VT.isFixedLengthVector()) {
13711 ContainerVT = getContainerForFixedLengthVector(VT);
13712 MVT MaskVT = getMaskTypeFor(ContainerVT);
13713 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13714 }
13715
13717 if (VT.getScalarType() == MVT::i1) {
13718 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13719 Result =
13720 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13721 ContainerVT, VL);
13722 } else {
13723 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13724 SDValue LHS =
13725 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13726 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13727 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13728 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13729 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13730 DAG.getUNDEF(ContainerVT), Mask, VL});
13731 }
13732 } else {
13733 Result =
13734 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13735 }
13736
13737 if (!VT.isFixedLengthVector())
13738 return Result;
13739 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13740}
13741
13742SDValue
13743RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13744 SelectionDAG &DAG) const {
13745 SDLoc DL(Op);
13746 MVT VT = Op.getSimpleValueType();
13747 MVT XLenVT = Subtarget.getXLenVT();
13748
13749 SDValue Op1 = Op.getOperand(0);
13750 SDValue Mask = Op.getOperand(1);
13751 SDValue EVL = Op.getOperand(2);
13752
13753 MVT ContainerVT = VT;
13754 if (VT.isFixedLengthVector()) {
13755 ContainerVT = getContainerForFixedLengthVector(VT);
13756 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13757 MVT MaskVT = getMaskTypeFor(ContainerVT);
13758 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13759 }
13760
13761 MVT GatherVT = ContainerVT;
13762 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13763 // Check if we are working with mask vectors
13764 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13765 if (IsMaskVector) {
13766 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13767
13768 // Expand input operand
13769 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13770 DAG.getUNDEF(IndicesVT),
13771 DAG.getConstant(1, DL, XLenVT), EVL);
13772 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13773 DAG.getUNDEF(IndicesVT),
13774 DAG.getConstant(0, DL, XLenVT), EVL);
13775 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13776 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13777 }
13778
13779 unsigned EltSize = GatherVT.getScalarSizeInBits();
13780 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13781 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13782 unsigned MaxVLMAX =
13783 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13784
13785 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13786 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13787 // to use vrgatherei16.vv.
13788 // TODO: It's also possible to use vrgatherei16.vv for other types to
13789 // decrease register width for the index calculation.
13790 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13791 if (MaxVLMAX > 256 && EltSize == 8) {
13792 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13793 // Split the vector in half and reverse each half using a full register
13794 // reverse.
13795 // Swap the halves and concatenate them.
13796 // Slide the concatenated result by (VLMax - VL).
13797 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13798 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13799 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13800
13801 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13802 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13803
13804 // Reassemble the low and high pieces reversed.
13805 // NOTE: this Result is unmasked (because we do not need masks for
13806 // shuffles). If in the future this has to change, we can use a SELECT_VL
13807 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13808 SDValue Result =
13809 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13810
13811 // Slide off any elements from past EVL that were reversed into the low
13812 // elements.
13813 unsigned MinElts = GatherVT.getVectorMinNumElements();
13814 SDValue VLMax =
13815 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13816 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13817
13818 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13819 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13820
13821 if (IsMaskVector) {
13822 // Truncate Result back to a mask vector
13823 Result =
13824 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13825 {Result, DAG.getConstant(0, DL, GatherVT),
13827 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13828 }
13829
13830 if (!VT.isFixedLengthVector())
13831 return Result;
13832 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13833 }
13834
13835 // Just promote the int type to i16 which will double the LMUL.
13836 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13837 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13838 }
13839
13840 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13841 SDValue VecLen =
13842 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13843 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13844 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13845 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13846 DAG.getUNDEF(IndicesVT), Mask, EVL);
13847 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13848 DAG.getUNDEF(GatherVT), Mask, EVL);
13849
13850 if (IsMaskVector) {
13851 // Truncate Result back to a mask vector
13852 Result = DAG.getNode(
13853 RISCVISD::SETCC_VL, DL, ContainerVT,
13854 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13855 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13856 }
13857
13858 if (!VT.isFixedLengthVector())
13859 return Result;
13860 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13861}
13862
13863SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13864 SelectionDAG &DAG) const {
13865 MVT VT = Op.getSimpleValueType();
13866 if (VT.getVectorElementType() != MVT::i1)
13867 return lowerVPOp(Op, DAG);
13868
13869 // It is safe to drop mask parameter as masked-off elements are undef.
13870 SDValue Op1 = Op->getOperand(0);
13871 SDValue Op2 = Op->getOperand(1);
13872 SDValue VL = Op->getOperand(3);
13873
13874 MVT ContainerVT = VT;
13875 const bool IsFixed = VT.isFixedLengthVector();
13876 if (IsFixed) {
13877 ContainerVT = getContainerForFixedLengthVector(VT);
13878 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13879 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13880 }
13881
13882 SDLoc DL(Op);
13883 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13884 if (!IsFixed)
13885 return Val;
13886 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13887}
13888
13889SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13890 SelectionDAG &DAG) const {
13891 SDLoc DL(Op);
13892 MVT XLenVT = Subtarget.getXLenVT();
13893 MVT VT = Op.getSimpleValueType();
13894 MVT ContainerVT = VT;
13895 if (VT.isFixedLengthVector())
13896 ContainerVT = getContainerForFixedLengthVector(VT);
13897
13898 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13899
13900 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13901 // Check if the mask is known to be all ones
13902 SDValue Mask = VPNode->getMask();
13903 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13904
13905 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13906 : Intrinsic::riscv_vlse_mask,
13907 DL, XLenVT);
13908 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13909 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13910 VPNode->getStride()};
13911 if (!IsUnmasked) {
13912 if (VT.isFixedLengthVector()) {
13913 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13914 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13915 }
13916 Ops.push_back(Mask);
13917 }
13918 Ops.push_back(VPNode->getVectorLength());
13919 if (!IsUnmasked) {
13920 SDValue Policy =
13922 Ops.push_back(Policy);
13923 }
13924
13925 SDValue Result =
13927 VPNode->getMemoryVT(), VPNode->getMemOperand());
13928 SDValue Chain = Result.getValue(1);
13929
13930 if (VT.isFixedLengthVector())
13931 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13932
13933 return DAG.getMergeValues({Result, Chain}, DL);
13934}
13935
13936SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13937 SelectionDAG &DAG) const {
13938 SDLoc DL(Op);
13939 MVT XLenVT = Subtarget.getXLenVT();
13940
13941 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13942 SDValue StoreVal = VPNode->getValue();
13943 MVT VT = StoreVal.getSimpleValueType();
13944 MVT ContainerVT = VT;
13945 if (VT.isFixedLengthVector()) {
13946 ContainerVT = getContainerForFixedLengthVector(VT);
13947 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13948 }
13949
13950 // Check if the mask is known to be all ones
13951 SDValue Mask = VPNode->getMask();
13952 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13953
13954 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13955 : Intrinsic::riscv_vsse_mask,
13956 DL, XLenVT);
13957 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13958 VPNode->getBasePtr(), VPNode->getStride()};
13959 if (!IsUnmasked) {
13960 if (VT.isFixedLengthVector()) {
13961 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13962 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13963 }
13964 Ops.push_back(Mask);
13965 }
13966 Ops.push_back(VPNode->getVectorLength());
13967
13968 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
13969 Ops, VPNode->getMemoryVT(),
13970 VPNode->getMemOperand());
13971}
13972
13973// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13974// matched to a RVV indexed load. The RVV indexed load instructions only
13975// support the "unsigned unscaled" addressing mode; indices are implicitly
13976// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13977// signed or scaled indexing is extended to the XLEN value type and scaled
13978// accordingly.
13979SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
13980 SelectionDAG &DAG) const {
13981 SDLoc DL(Op);
13982 MVT VT = Op.getSimpleValueType();
13983
13984 const auto *MemSD = cast<MemSDNode>(Op.getNode());
13985 EVT MemVT = MemSD->getMemoryVT();
13986 MachineMemOperand *MMO = MemSD->getMemOperand();
13987 SDValue Chain = MemSD->getChain();
13988 SDValue BasePtr = MemSD->getBasePtr();
13989
13990 [[maybe_unused]] ISD::LoadExtType LoadExtType;
13991 SDValue Index, Mask, PassThru, VL;
13992
13993 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
13994 Index = VPGN->getIndex();
13995 Mask = VPGN->getMask();
13996 PassThru = DAG.getUNDEF(VT);
13997 VL = VPGN->getVectorLength();
13998 // VP doesn't support extending loads.
14000 } else {
14001 // Else it must be a MGATHER.
14002 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14003 Index = MGN->getIndex();
14004 Mask = MGN->getMask();
14005 PassThru = MGN->getPassThru();
14006 LoadExtType = MGN->getExtensionType();
14007 }
14008
14009 MVT IndexVT = Index.getSimpleValueType();
14010 MVT XLenVT = Subtarget.getXLenVT();
14011
14013 "Unexpected VTs!");
14014 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14015 // Targets have to explicitly opt-in for extending vector loads.
14016 assert(LoadExtType == ISD::NON_EXTLOAD &&
14017 "Unexpected extending MGATHER/VP_GATHER");
14018
14019 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14020 // the selection of the masked intrinsics doesn't do this for us.
14021 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14022
14023 MVT ContainerVT = VT;
14024 if (VT.isFixedLengthVector()) {
14025 ContainerVT = getContainerForFixedLengthVector(VT);
14026 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14027 ContainerVT.getVectorElementCount());
14028
14029 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14030
14031 if (!IsUnmasked) {
14032 MVT MaskVT = getMaskTypeFor(ContainerVT);
14033 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14034 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14035 }
14036 }
14037
14038 if (!VL)
14039 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14040
14041 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14042 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14043 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14044 }
14045
14046 unsigned IntID =
14047 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14048 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14049 if (IsUnmasked)
14050 Ops.push_back(DAG.getUNDEF(ContainerVT));
14051 else
14052 Ops.push_back(PassThru);
14053 Ops.push_back(BasePtr);
14054 Ops.push_back(Index);
14055 if (!IsUnmasked)
14056 Ops.push_back(Mask);
14057 Ops.push_back(VL);
14058 if (!IsUnmasked)
14060
14061 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14062 SDValue Result =
14063 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14064 Chain = Result.getValue(1);
14065
14066 if (VT.isFixedLengthVector())
14067 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14068
14069 return DAG.getMergeValues({Result, Chain}, DL);
14070}
14071
14072// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14073// matched to a RVV indexed store. The RVV indexed store instructions only
14074// support the "unsigned unscaled" addressing mode; indices are implicitly
14075// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14076// signed or scaled indexing is extended to the XLEN value type and scaled
14077// accordingly.
14078SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14079 SelectionDAG &DAG) const {
14080 SDLoc DL(Op);
14081 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14082 EVT MemVT = MemSD->getMemoryVT();
14083 MachineMemOperand *MMO = MemSD->getMemOperand();
14084 SDValue Chain = MemSD->getChain();
14085 SDValue BasePtr = MemSD->getBasePtr();
14086
14087 [[maybe_unused]] bool IsTruncatingStore = false;
14088 SDValue Index, Mask, Val, VL;
14089
14090 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14091 Index = VPSN->getIndex();
14092 Mask = VPSN->getMask();
14093 Val = VPSN->getValue();
14094 VL = VPSN->getVectorLength();
14095 // VP doesn't support truncating stores.
14096 IsTruncatingStore = false;
14097 } else {
14098 // Else it must be a MSCATTER.
14099 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14100 Index = MSN->getIndex();
14101 Mask = MSN->getMask();
14102 Val = MSN->getValue();
14103 IsTruncatingStore = MSN->isTruncatingStore();
14104 }
14105
14106 MVT VT = Val.getSimpleValueType();
14107 MVT IndexVT = Index.getSimpleValueType();
14108 MVT XLenVT = Subtarget.getXLenVT();
14109
14111 "Unexpected VTs!");
14112 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14113 // Targets have to explicitly opt-in for extending vector loads and
14114 // truncating vector stores.
14115 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14116
14117 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14118 // the selection of the masked intrinsics doesn't do this for us.
14119 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14120
14121 MVT ContainerVT = VT;
14122 if (VT.isFixedLengthVector()) {
14123 ContainerVT = getContainerForFixedLengthVector(VT);
14124 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14125 ContainerVT.getVectorElementCount());
14126
14127 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14128 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14129
14130 if (!IsUnmasked) {
14131 MVT MaskVT = getMaskTypeFor(ContainerVT);
14132 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14133 }
14134 }
14135
14136 if (!VL)
14137 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14138
14139 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14140 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14141 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14142 }
14143
14144 unsigned IntID =
14145 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14146 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14147 Ops.push_back(Val);
14148 Ops.push_back(BasePtr);
14149 Ops.push_back(Index);
14150 if (!IsUnmasked)
14151 Ops.push_back(Mask);
14152 Ops.push_back(VL);
14153
14155 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14156}
14157
14158SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14159 SelectionDAG &DAG) const {
14160 const MVT XLenVT = Subtarget.getXLenVT();
14161 SDLoc DL(Op);
14162 SDValue Chain = Op->getOperand(0);
14163 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14164 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14165 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14166
14167 // Encoding used for rounding mode in RISC-V differs from that used in
14168 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14169 // table, which consists of a sequence of 4-bit fields, each representing
14170 // corresponding FLT_ROUNDS mode.
14171 static const int Table =
14177
14178 SDValue Shift =
14179 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14180 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14181 DAG.getConstant(Table, DL, XLenVT), Shift);
14182 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14183 DAG.getConstant(7, DL, XLenVT));
14184
14185 return DAG.getMergeValues({Masked, Chain}, DL);
14186}
14187
14188SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14189 SelectionDAG &DAG) const {
14190 const MVT XLenVT = Subtarget.getXLenVT();
14191 SDLoc DL(Op);
14192 SDValue Chain = Op->getOperand(0);
14193 SDValue RMValue = Op->getOperand(1);
14194 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14195
14196 // Encoding used for rounding mode in RISC-V differs from that used in
14197 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14198 // a table, which consists of a sequence of 4-bit fields, each representing
14199 // corresponding RISC-V mode.
14200 static const unsigned Table =
14206
14207 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14208
14209 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14210 DAG.getConstant(2, DL, XLenVT));
14211 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14212 DAG.getConstant(Table, DL, XLenVT), Shift);
14213 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14214 DAG.getConstant(0x7, DL, XLenVT));
14215 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14216 RMValue);
14217}
14218
14219SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14220 SelectionDAG &DAG) const {
14221 const MVT XLenVT = Subtarget.getXLenVT();
14222 SDLoc DL(Op);
14223 SDValue Chain = Op->getOperand(0);
14224 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14225 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14226 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14227}
14228
14229SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14230 SelectionDAG &DAG) const {
14231 const MVT XLenVT = Subtarget.getXLenVT();
14232 SDLoc DL(Op);
14233 SDValue Chain = Op->getOperand(0);
14234 SDValue EnvValue = Op->getOperand(1);
14235 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14236
14237 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14238 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14239 EnvValue);
14240}
14241
14242SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14243 SelectionDAG &DAG) const {
14244 const MVT XLenVT = Subtarget.getXLenVT();
14245 SDLoc DL(Op);
14246 SDValue Chain = Op->getOperand(0);
14247 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14248 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14249
14250 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14251 EnvValue);
14252}
14253
14254const uint64_t ModeMask64 = ~RISCVExceptFlags::ALL;
14255const uint32_t ModeMask32 = ~RISCVExceptFlags::ALL;
14256
14257SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14258 SelectionDAG &DAG) const {
14259 const MVT XLenVT = Subtarget.getXLenVT();
14260 SDLoc DL(Op);
14261 SDValue Chain = Op->getOperand(0);
14262 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14263 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14264 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14265 Chain = Result.getValue(1);
14266 return DAG.getMergeValues({Result, Chain}, DL);
14267}
14268
14269SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14270 SelectionDAG &DAG) const {
14271 const MVT XLenVT = Subtarget.getXLenVT();
14272 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14273 SDLoc DL(Op);
14274 SDValue Chain = Op->getOperand(0);
14275 SDValue EnvValue = Op->getOperand(1);
14276 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14277 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14278
14279 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14280 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14281 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14282 ModeMask);
14283 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14284 EnvValue);
14285}
14286
14287SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14288 SelectionDAG &DAG) const {
14289 const MVT XLenVT = Subtarget.getXLenVT();
14290 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14291 SDLoc DL(Op);
14292 SDValue Chain = Op->getOperand(0);
14293 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14294 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14295
14296 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14297 ModeMask);
14298}
14299
14300SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14301 SelectionDAG &DAG) const {
14303
14304 bool isRISCV64 = Subtarget.is64Bit();
14305 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14306
14307 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14308 return DAG.getFrameIndex(FI, PtrVT);
14309}
14310
14311// Returns the opcode of the target-specific SDNode that implements the 32-bit
14312// form of the given Opcode.
14313static unsigned getRISCVWOpcode(unsigned Opcode) {
14314 switch (Opcode) {
14315 default:
14316 llvm_unreachable("Unexpected opcode");
14317 case ISD::SHL:
14318 return RISCVISD::SLLW;
14319 case ISD::SRA:
14320 return RISCVISD::SRAW;
14321 case ISD::SRL:
14322 return RISCVISD::SRLW;
14323 case ISD::SDIV:
14324 return RISCVISD::DIVW;
14325 case ISD::UDIV:
14326 return RISCVISD::DIVUW;
14327 case ISD::UREM:
14328 return RISCVISD::REMUW;
14329 case ISD::ROTL:
14330 return RISCVISD::ROLW;
14331 case ISD::ROTR:
14332 return RISCVISD::RORW;
14333 }
14334}
14335
14336// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14337// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14338// otherwise be promoted to i64, making it difficult to select the
14339// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14340// type i8/i16/i32 is lost.
14342 unsigned ExtOpc = ISD::ANY_EXTEND) {
14343 SDLoc DL(N);
14344 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14345 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14346 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14347 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14348 // ReplaceNodeResults requires we maintain the same type for the return value.
14349 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14350}
14351
14352// Converts the given 32-bit operation to a i64 operation with signed extension
14353// semantic to reduce the signed extension instructions.
14355 SDLoc DL(N);
14356 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14357 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14358 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14359 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14360 DAG.getValueType(MVT::i32));
14361 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14362}
14363
14366 SelectionDAG &DAG) const {
14367 SDLoc DL(N);
14368 switch (N->getOpcode()) {
14369 default:
14370 llvm_unreachable("Don't know how to custom type legalize this operation!");
14373 case ISD::FP_TO_SINT:
14374 case ISD::FP_TO_UINT: {
14375 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14376 "Unexpected custom legalisation");
14377 bool IsStrict = N->isStrictFPOpcode();
14378 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14379 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14380 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14381 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14383 if (!isTypeLegal(Op0.getValueType()))
14384 return;
14385 if (IsStrict) {
14386 SDValue Chain = N->getOperand(0);
14387 // In absence of Zfh, promote f16 to f32, then convert.
14388 if (Op0.getValueType() == MVT::f16 &&
14389 !Subtarget.hasStdExtZfhOrZhinx()) {
14390 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14391 {Chain, Op0});
14392 Chain = Op0.getValue(1);
14393 }
14394 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14395 : RISCVISD::STRICT_FCVT_WU_RV64;
14396 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14397 SDValue Res = DAG.getNode(
14398 Opc, DL, VTs, Chain, Op0,
14399 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14400 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14401 Results.push_back(Res.getValue(1));
14402 return;
14403 }
14404 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14405 // convert.
14406 if ((Op0.getValueType() == MVT::f16 &&
14407 !Subtarget.hasStdExtZfhOrZhinx()) ||
14408 Op0.getValueType() == MVT::bf16)
14409 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14410
14411 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14412 SDValue Res =
14413 DAG.getNode(Opc, DL, MVT::i64, Op0,
14414 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14415 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14416 return;
14417 }
14418 // If the FP type needs to be softened, emit a library call using the 'si'
14419 // version. If we left it to default legalization we'd end up with 'di'. If
14420 // the FP type doesn't need to be softened just let generic type
14421 // legalization promote the result type.
14422 RTLIB::Libcall LC;
14423 if (IsSigned)
14424 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14425 else
14426 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14427 MakeLibCallOptions CallOptions;
14428 EVT OpVT = Op0.getValueType();
14429 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14430 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14431 SDValue Result;
14432 std::tie(Result, Chain) =
14433 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14434 Results.push_back(Result);
14435 if (IsStrict)
14436 Results.push_back(Chain);
14437 break;
14438 }
14439 case ISD::LROUND: {
14440 SDValue Op0 = N->getOperand(0);
14441 EVT Op0VT = Op0.getValueType();
14442 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14444 if (!isTypeLegal(Op0VT))
14445 return;
14446
14447 // In absence of Zfh, promote f16 to f32, then convert.
14448 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14449 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14450
14451 SDValue Res =
14452 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14453 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14454 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14455 return;
14456 }
14457 // If the FP type needs to be softened, emit a library call to lround. We'll
14458 // need to truncate the result. We assume any value that doesn't fit in i32
14459 // is allowed to return an unspecified value.
14460 RTLIB::Libcall LC =
14461 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14462 MakeLibCallOptions CallOptions;
14463 EVT OpVT = Op0.getValueType();
14464 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14465 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14466 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14467 Results.push_back(Result);
14468 break;
14469 }
14472 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14473 "has custom type legalization on riscv32");
14474
14475 SDValue LoCounter, HiCounter;
14476 MVT XLenVT = Subtarget.getXLenVT();
14477 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14478 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14479 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14480 } else {
14481 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14482 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14483 }
14484 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14485 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14486 N->getOperand(0), LoCounter, HiCounter);
14487
14488 Results.push_back(
14489 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14490 Results.push_back(RCW.getValue(2));
14491 break;
14492 }
14493 case ISD::LOAD: {
14494 if (!ISD::isNON_EXTLoad(N))
14495 return;
14496
14497 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14498 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14499 LoadSDNode *Ld = cast<LoadSDNode>(N);
14500
14501 if (N->getValueType(0) == MVT::i64) {
14502 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14503 "Unexpected custom legalisation");
14504
14505 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14506 return;
14507
14508 SDLoc DL(N);
14509 SDValue Result = DAG.getMemIntrinsicNode(
14510 RISCVISD::LD_RV32, DL,
14511 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14512 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14513 SDValue Lo = Result.getValue(0);
14514 SDValue Hi = Result.getValue(1);
14515 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14516 Results.append({Pair, Result.getValue(2)});
14517 return;
14518 }
14519
14520 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14521 "Unexpected custom legalisation");
14522
14523 SDLoc dl(N);
14524 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14525 Ld->getBasePtr(), Ld->getMemoryVT(),
14526 Ld->getMemOperand());
14527 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14528 Results.push_back(Res.getValue(1));
14529 return;
14530 }
14531 case ISD::MUL: {
14532 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14533 unsigned XLen = Subtarget.getXLen();
14534 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14535 if (Size > XLen) {
14536 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14537 SDValue LHS = N->getOperand(0);
14538 SDValue RHS = N->getOperand(1);
14539 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14540
14541 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14542 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14543 // We need exactly one side to be unsigned.
14544 if (LHSIsU == RHSIsU)
14545 return;
14546
14547 auto MakeMULPair = [&](SDValue S, SDValue U) {
14548 MVT XLenVT = Subtarget.getXLenVT();
14549 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14550 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14551 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14552 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14553 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14554 };
14555
14556 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14557 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14558
14559 // The other operand should be signed, but still prefer MULH when
14560 // possible.
14561 if (RHSIsU && LHSIsS && !RHSIsS)
14562 Results.push_back(MakeMULPair(LHS, RHS));
14563 else if (LHSIsU && RHSIsS && !LHSIsS)
14564 Results.push_back(MakeMULPair(RHS, LHS));
14565
14566 return;
14567 }
14568 [[fallthrough]];
14569 }
14570 case ISD::ADD:
14571 case ISD::SUB:
14572 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14573 "Unexpected custom legalisation");
14574 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14575 break;
14576 case ISD::SHL:
14577 case ISD::SRA:
14578 case ISD::SRL:
14579 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14580 "Unexpected custom legalisation");
14581 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14582 // If we can use a BSET instruction, allow default promotion to apply.
14583 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14584 isOneConstant(N->getOperand(0)))
14585 break;
14586 Results.push_back(customLegalizeToWOp(N, DAG));
14587 break;
14588 }
14589
14590 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14591 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14592 // shift amount.
14593 if (N->getOpcode() == ISD::SHL) {
14594 SDLoc DL(N);
14595 SDValue NewOp0 =
14596 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14597 SDValue NewOp1 =
14598 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14599 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14600 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14601 DAG.getValueType(MVT::i32));
14602 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14603 }
14604
14605 break;
14606 case ISD::ROTL:
14607 case ISD::ROTR:
14608 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14609 "Unexpected custom legalisation");
14610 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14611 Subtarget.hasVendorXTHeadBb()) &&
14612 "Unexpected custom legalization");
14613 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14614 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14615 return;
14616 Results.push_back(customLegalizeToWOp(N, DAG));
14617 break;
14618 case ISD::CTTZ:
14620 case ISD::CTLZ:
14621 case ISD::CTLZ_ZERO_UNDEF: {
14622 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14623 "Unexpected custom legalisation");
14624
14625 SDValue NewOp0 =
14626 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14627 bool IsCTZ =
14628 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14629 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14630 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14631 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14632 return;
14633 }
14634 case ISD::SDIV:
14635 case ISD::UDIV:
14636 case ISD::UREM: {
14637 MVT VT = N->getSimpleValueType(0);
14638 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14639 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14640 "Unexpected custom legalisation");
14641 // Don't promote division/remainder by constant since we should expand those
14642 // to multiply by magic constant.
14644 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14645 !isIntDivCheap(N->getValueType(0), Attr))
14646 return;
14647
14648 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14649 // the upper 32 bits. For other types we need to sign or zero extend
14650 // based on the opcode.
14651 unsigned ExtOpc = ISD::ANY_EXTEND;
14652 if (VT != MVT::i32)
14653 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14655
14656 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14657 break;
14658 }
14659 case ISD::SADDO: {
14660 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14661 "Unexpected custom legalisation");
14662
14663 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14664 // use the default legalization.
14665 if (!isa<ConstantSDNode>(N->getOperand(1)))
14666 return;
14667
14668 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14669 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14670 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14671 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14672 DAG.getValueType(MVT::i32));
14673
14674 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14675
14676 // For an addition, the result should be less than one of the operands (LHS)
14677 // if and only if the other operand (RHS) is negative, otherwise there will
14678 // be overflow.
14679 // For a subtraction, the result should be less than one of the operands
14680 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14681 // otherwise there will be overflow.
14682 EVT OType = N->getValueType(1);
14683 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14684 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14685
14686 SDValue Overflow =
14687 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14688 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14689 Results.push_back(Overflow);
14690 return;
14691 }
14692 case ISD::UADDO:
14693 case ISD::USUBO: {
14694 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14695 "Unexpected custom legalisation");
14696 bool IsAdd = N->getOpcode() == ISD::UADDO;
14697 // Create an ADDW or SUBW.
14698 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14699 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14700 SDValue Res =
14701 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14702 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14703 DAG.getValueType(MVT::i32));
14704
14705 SDValue Overflow;
14706 if (IsAdd && isOneConstant(RHS)) {
14707 // Special case uaddo X, 1 overflowed if the addition result is 0.
14708 // The general case (X + C) < C is not necessarily beneficial. Although we
14709 // reduce the live range of X, we may introduce the materialization of
14710 // constant C, especially when the setcc result is used by branch. We have
14711 // no compare with constant and branch instructions.
14712 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14713 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14714 } else if (IsAdd && isAllOnesConstant(RHS)) {
14715 // Special case uaddo X, -1 overflowed if X != 0.
14716 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14717 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14718 } else {
14719 // Sign extend the LHS and perform an unsigned compare with the ADDW
14720 // result. Since the inputs are sign extended from i32, this is equivalent
14721 // to comparing the lower 32 bits.
14722 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14723 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14724 IsAdd ? ISD::SETULT : ISD::SETUGT);
14725 }
14726
14727 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14728 Results.push_back(Overflow);
14729 return;
14730 }
14731 case ISD::UADDSAT:
14732 case ISD::USUBSAT: {
14733 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14734 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14735 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14736 // promotion for UADDO/USUBO.
14737 Results.push_back(expandAddSubSat(N, DAG));
14738 return;
14739 }
14740 case ISD::SADDSAT:
14741 case ISD::SSUBSAT: {
14742 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14743 "Unexpected custom legalisation");
14744 Results.push_back(expandAddSubSat(N, DAG));
14745 return;
14746 }
14747 case ISD::ABS: {
14748 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14749 "Unexpected custom legalisation");
14750
14751 if (Subtarget.hasStdExtZbb()) {
14752 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14753 // This allows us to remember that the result is sign extended. Expanding
14754 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14755 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14756 N->getOperand(0));
14757 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14758 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14759 return;
14760 }
14761
14762 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14763 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14764
14765 // Freeze the source so we can increase it's use count.
14766 Src = DAG.getFreeze(Src);
14767
14768 // Copy sign bit to all bits using the sraiw pattern.
14769 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14770 DAG.getValueType(MVT::i32));
14771 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14772 DAG.getConstant(31, DL, MVT::i64));
14773
14774 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14775 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14776
14777 // NOTE: The result is only required to be anyextended, but sext is
14778 // consistent with type legalization of sub.
14779 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14780 DAG.getValueType(MVT::i32));
14781 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14782 return;
14783 }
14784 case ISD::BITCAST: {
14785 EVT VT = N->getValueType(0);
14786 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14787 SDValue Op0 = N->getOperand(0);
14788 EVT Op0VT = Op0.getValueType();
14789 MVT XLenVT = Subtarget.getXLenVT();
14790 if (VT == MVT::i16 &&
14791 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14792 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14793 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14794 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14795 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14796 Subtarget.hasStdExtFOrZfinx()) {
14797 SDValue FPConv =
14798 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14799 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14800 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14801 Subtarget.hasStdExtDOrZdinx()) {
14802 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14803 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14804 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14805 NewReg.getValue(0), NewReg.getValue(1));
14806 Results.push_back(RetReg);
14807 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14808 isTypeLegal(Op0VT)) {
14809 // Custom-legalize bitcasts from fixed-length vector types to illegal
14810 // scalar types in order to improve codegen. Bitcast the vector to a
14811 // one-element vector type whose element type is the same as the result
14812 // type, and extract the first element.
14813 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14814 if (isTypeLegal(BVT)) {
14815 SDValue BVec = DAG.getBitcast(BVT, Op0);
14816 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14817 }
14818 }
14819 break;
14820 }
14821 case ISD::BITREVERSE: {
14822 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14823 "Unexpected custom legalisation");
14824 MVT XLenVT = Subtarget.getXLenVT();
14825 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14826 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14827 // ReplaceNodeResults requires we maintain the same type for the return
14828 // value.
14829 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14830 break;
14831 }
14832 case RISCVISD::BREV8:
14833 case RISCVISD::ORC_B: {
14834 MVT VT = N->getSimpleValueType(0);
14835 MVT XLenVT = Subtarget.getXLenVT();
14836 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14837 "Unexpected custom legalisation");
14838 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14839 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14840 "Unexpected extension");
14841 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14842 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14843 // ReplaceNodeResults requires we maintain the same type for the return
14844 // value.
14845 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14846 break;
14847 }
14849 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14850 // type is illegal (currently only vXi64 RV32).
14851 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14852 // transferred to the destination register. We issue two of these from the
14853 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14854 // first element.
14855 SDValue Vec = N->getOperand(0);
14856 SDValue Idx = N->getOperand(1);
14857
14858 // The vector type hasn't been legalized yet so we can't issue target
14859 // specific nodes if it needs legalization.
14860 // FIXME: We would manually legalize if it's important.
14861 if (!isTypeLegal(Vec.getValueType()))
14862 return;
14863
14864 MVT VecVT = Vec.getSimpleValueType();
14865
14866 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14867 VecVT.getVectorElementType() == MVT::i64 &&
14868 "Unexpected EXTRACT_VECTOR_ELT legalization");
14869
14870 // If this is a fixed vector, we need to convert it to a scalable vector.
14871 MVT ContainerVT = VecVT;
14872 if (VecVT.isFixedLengthVector()) {
14873 ContainerVT = getContainerForFixedLengthVector(VecVT);
14874 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14875 }
14876
14877 MVT XLenVT = Subtarget.getXLenVT();
14878
14879 // Use a VL of 1 to avoid processing more elements than we need.
14880 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14881
14882 // Unless the index is known to be 0, we must slide the vector down to get
14883 // the desired element into index 0.
14884 if (!isNullConstant(Idx)) {
14885 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14886 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14887 }
14888
14889 // Extract the lower XLEN bits of the correct vector element.
14890 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14891
14892 // To extract the upper XLEN bits of the vector element, shift the first
14893 // element right by 32 bits and re-extract the lower XLEN bits.
14894 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14895 DAG.getUNDEF(ContainerVT),
14896 DAG.getConstant(32, DL, XLenVT), VL);
14897 SDValue LShr32 =
14898 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14899 DAG.getUNDEF(ContainerVT), Mask, VL);
14900
14901 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14902
14903 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14904 break;
14905 }
14907 unsigned IntNo = N->getConstantOperandVal(0);
14908 switch (IntNo) {
14909 default:
14911 "Don't know how to custom type legalize this intrinsic!");
14912 case Intrinsic::experimental_get_vector_length: {
14913 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14915 return;
14916 }
14917 case Intrinsic::experimental_cttz_elts: {
14918 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14919 Results.push_back(
14920 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14921 return;
14922 }
14923 case Intrinsic::riscv_orc_b:
14924 case Intrinsic::riscv_brev8:
14925 case Intrinsic::riscv_sha256sig0:
14926 case Intrinsic::riscv_sha256sig1:
14927 case Intrinsic::riscv_sha256sum0:
14928 case Intrinsic::riscv_sha256sum1:
14929 case Intrinsic::riscv_sm3p0:
14930 case Intrinsic::riscv_sm3p1: {
14931 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14932 return;
14933 unsigned Opc;
14934 switch (IntNo) {
14935 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14936 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14937 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14938 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14939 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14940 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14941 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14942 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14943 }
14944
14945 SDValue NewOp =
14946 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14947 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14948 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14949 return;
14950 }
14951 case Intrinsic::riscv_sm4ks:
14952 case Intrinsic::riscv_sm4ed: {
14953 unsigned Opc =
14954 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14955 SDValue NewOp0 =
14956 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14957 SDValue NewOp1 =
14958 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14959 SDValue Res =
14960 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14961 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14962 return;
14963 }
14964 case Intrinsic::riscv_mopr: {
14965 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14966 return;
14967 SDValue NewOp =
14968 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14969 SDValue Res = DAG.getNode(
14970 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
14971 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
14972 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14973 return;
14974 }
14975 case Intrinsic::riscv_moprr: {
14976 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14977 return;
14978 SDValue NewOp0 =
14979 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14980 SDValue NewOp1 =
14981 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14982 SDValue Res = DAG.getNode(
14983 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
14984 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
14985 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14986 return;
14987 }
14988 case Intrinsic::riscv_clmul: {
14989 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14990 return;
14991
14992 SDValue NewOp0 =
14993 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14994 SDValue NewOp1 =
14995 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14996 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
14997 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14998 return;
14999 }
15000 case Intrinsic::riscv_clmulh:
15001 case Intrinsic::riscv_clmulr: {
15002 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15003 return;
15004
15005 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15006 // to the full 128-bit clmul result of multiplying two xlen values.
15007 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15008 // upper 32 bits.
15009 //
15010 // The alternative is to mask the inputs to 32 bits and use clmul, but
15011 // that requires two shifts to mask each input without zext.w.
15012 // FIXME: If the inputs are known zero extended or could be freely
15013 // zero extended, the mask form would be better.
15014 SDValue NewOp0 =
15015 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15016 SDValue NewOp1 =
15017 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15018 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15019 DAG.getConstant(32, DL, MVT::i64));
15020 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15021 DAG.getConstant(32, DL, MVT::i64));
15022 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15023 : RISCVISD::CLMULR;
15024 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15025 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15026 DAG.getConstant(32, DL, MVT::i64));
15027 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15028 return;
15029 }
15030 case Intrinsic::riscv_vmv_x_s: {
15031 EVT VT = N->getValueType(0);
15032 MVT XLenVT = Subtarget.getXLenVT();
15033 if (VT.bitsLT(XLenVT)) {
15034 // Simple case just extract using vmv.x.s and truncate.
15035 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15036 Subtarget.getXLenVT(), N->getOperand(1));
15037 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15038 return;
15039 }
15040
15041 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15042 "Unexpected custom legalization");
15043
15044 // We need to do the move in two steps.
15045 SDValue Vec = N->getOperand(1);
15046 MVT VecVT = Vec.getSimpleValueType();
15047
15048 // First extract the lower XLEN bits of the element.
15049 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15050
15051 // To extract the upper XLEN bits of the vector element, shift the first
15052 // element right by 32 bits and re-extract the lower XLEN bits.
15053 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15054
15055 SDValue ThirtyTwoV =
15056 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15057 DAG.getConstant(32, DL, XLenVT), VL);
15058 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15059 DAG.getUNDEF(VecVT), Mask, VL);
15060 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15061
15062 Results.push_back(
15063 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15064 break;
15065 }
15066 }
15067 break;
15068 }
15069 case ISD::VECREDUCE_ADD:
15070 case ISD::VECREDUCE_AND:
15071 case ISD::VECREDUCE_OR:
15072 case ISD::VECREDUCE_XOR:
15077 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15078 Results.push_back(V);
15079 break;
15080 case ISD::VP_REDUCE_ADD:
15081 case ISD::VP_REDUCE_AND:
15082 case ISD::VP_REDUCE_OR:
15083 case ISD::VP_REDUCE_XOR:
15084 case ISD::VP_REDUCE_SMAX:
15085 case ISD::VP_REDUCE_UMAX:
15086 case ISD::VP_REDUCE_SMIN:
15087 case ISD::VP_REDUCE_UMIN:
15088 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15089 Results.push_back(V);
15090 break;
15091 case ISD::GET_ROUNDING: {
15092 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15093 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15094 Results.push_back(Res.getValue(0));
15095 Results.push_back(Res.getValue(1));
15096 break;
15097 }
15098 }
15099}
15100
15101/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15102/// which corresponds to it.
15103static unsigned getVecReduceOpcode(unsigned Opc) {
15104 switch (Opc) {
15105 default:
15106 llvm_unreachable("Unhandled binary to transform reduction");
15107 case ISD::ADD:
15108 return ISD::VECREDUCE_ADD;
15109 case ISD::UMAX:
15110 return ISD::VECREDUCE_UMAX;
15111 case ISD::SMAX:
15112 return ISD::VECREDUCE_SMAX;
15113 case ISD::UMIN:
15114 return ISD::VECREDUCE_UMIN;
15115 case ISD::SMIN:
15116 return ISD::VECREDUCE_SMIN;
15117 case ISD::AND:
15118 return ISD::VECREDUCE_AND;
15119 case ISD::OR:
15120 return ISD::VECREDUCE_OR;
15121 case ISD::XOR:
15122 return ISD::VECREDUCE_XOR;
15123 case ISD::FADD:
15124 // Note: This is the associative form of the generic reduction opcode.
15125 return ISD::VECREDUCE_FADD;
15126 }
15127}
15128
15129/// Perform two related transforms whose purpose is to incrementally recognize
15130/// an explode_vector followed by scalar reduction as a vector reduction node.
15131/// This exists to recover from a deficiency in SLP which can't handle
15132/// forests with multiple roots sharing common nodes. In some cases, one
15133/// of the trees will be vectorized, and the other will remain (unprofitably)
15134/// scalarized.
15135static SDValue
15137 const RISCVSubtarget &Subtarget) {
15138
15139 // This transforms need to run before all integer types have been legalized
15140 // to i64 (so that the vector element type matches the add type), and while
15141 // it's safe to introduce odd sized vector types.
15143 return SDValue();
15144
15145 // Without V, this transform isn't useful. We could form the (illegal)
15146 // operations and let them be scalarized again, but there's really no point.
15147 if (!Subtarget.hasVInstructions())
15148 return SDValue();
15149
15150 const SDLoc DL(N);
15151 const EVT VT = N->getValueType(0);
15152 const unsigned Opc = N->getOpcode();
15153
15154 // For FADD, we only handle the case with reassociation allowed. We
15155 // could handle strict reduction order, but at the moment, there's no
15156 // known reason to, and the complexity isn't worth it.
15157 // TODO: Handle fminnum and fmaxnum here
15158 if (!VT.isInteger() &&
15159 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
15160 return SDValue();
15161
15162 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15163 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15164 "Inconsistent mappings");
15165 SDValue LHS = N->getOperand(0);
15166 SDValue RHS = N->getOperand(1);
15167
15168 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15169 return SDValue();
15170
15171 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15172 std::swap(LHS, RHS);
15173
15174 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15175 !isa<ConstantSDNode>(RHS.getOperand(1)))
15176 return SDValue();
15177
15178 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15179 SDValue SrcVec = RHS.getOperand(0);
15180 EVT SrcVecVT = SrcVec.getValueType();
15181 assert(SrcVecVT.getVectorElementType() == VT);
15182 if (SrcVecVT.isScalableVector())
15183 return SDValue();
15184
15185 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15186 return SDValue();
15187
15188 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15189 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15190 // root of our reduction tree. TODO: We could extend this to any two
15191 // adjacent aligned constant indices if desired.
15192 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15193 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15194 uint64_t LHSIdx =
15195 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15196 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15197 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15198 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15199 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15200 }
15201 }
15202
15203 // Match (binop (reduce (extract_subvector V, 0),
15204 // (extract_vector_elt V, sizeof(SubVec))))
15205 // into a reduction of one more element from the original vector V.
15206 if (LHS.getOpcode() != ReduceOpc)
15207 return SDValue();
15208
15209 SDValue ReduceVec = LHS.getOperand(0);
15210 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15211 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15212 isNullConstant(ReduceVec.getOperand(1)) &&
15213 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15214 // For illegal types (e.g. 3xi32), most will be combined again into a
15215 // wider (hopefully legal) type. If this is a terminal state, we are
15216 // relying on type legalization here to produce something reasonable
15217 // and this lowering quality could probably be improved. (TODO)
15218 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15219 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15220 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15221 ReduceVec->getFlags() & N->getFlags());
15222 }
15223
15224 return SDValue();
15225}
15226
15227
15228// Try to fold (<bop> x, (reduction.<bop> vec, start))
15230 const RISCVSubtarget &Subtarget) {
15231 auto BinOpToRVVReduce = [](unsigned Opc) {
15232 switch (Opc) {
15233 default:
15234 llvm_unreachable("Unhandled binary to transform reduction");
15235 case ISD::ADD:
15236 return RISCVISD::VECREDUCE_ADD_VL;
15237 case ISD::UMAX:
15238 return RISCVISD::VECREDUCE_UMAX_VL;
15239 case ISD::SMAX:
15240 return RISCVISD::VECREDUCE_SMAX_VL;
15241 case ISD::UMIN:
15242 return RISCVISD::VECREDUCE_UMIN_VL;
15243 case ISD::SMIN:
15244 return RISCVISD::VECREDUCE_SMIN_VL;
15245 case ISD::AND:
15246 return RISCVISD::VECREDUCE_AND_VL;
15247 case ISD::OR:
15248 return RISCVISD::VECREDUCE_OR_VL;
15249 case ISD::XOR:
15250 return RISCVISD::VECREDUCE_XOR_VL;
15251 case ISD::FADD:
15252 return RISCVISD::VECREDUCE_FADD_VL;
15253 case ISD::FMAXNUM:
15254 return RISCVISD::VECREDUCE_FMAX_VL;
15255 case ISD::FMINNUM:
15256 return RISCVISD::VECREDUCE_FMIN_VL;
15257 }
15258 };
15259
15260 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15261 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15262 isNullConstant(V.getOperand(1)) &&
15263 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15264 };
15265
15266 unsigned Opc = N->getOpcode();
15267 unsigned ReduceIdx;
15268 if (IsReduction(N->getOperand(0), Opc))
15269 ReduceIdx = 0;
15270 else if (IsReduction(N->getOperand(1), Opc))
15271 ReduceIdx = 1;
15272 else
15273 return SDValue();
15274
15275 // Skip if FADD disallows reassociation but the combiner needs.
15276 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15277 return SDValue();
15278
15279 SDValue Extract = N->getOperand(ReduceIdx);
15280 SDValue Reduce = Extract.getOperand(0);
15281 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15282 return SDValue();
15283
15284 SDValue ScalarV = Reduce.getOperand(2);
15285 EVT ScalarVT = ScalarV.getValueType();
15286 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15287 ScalarV.getOperand(0)->isUndef() &&
15288 isNullConstant(ScalarV.getOperand(2)))
15289 ScalarV = ScalarV.getOperand(1);
15290
15291 // Make sure that ScalarV is a splat with VL=1.
15292 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15293 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15294 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15295 return SDValue();
15296
15297 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15298 return SDValue();
15299
15300 // Check the scalar of ScalarV is neutral element
15301 // TODO: Deal with value other than neutral element.
15302 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15303 0))
15304 return SDValue();
15305
15306 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15307 // FIXME: We might be able to improve this if operand 0 is undef.
15308 if (!isNonZeroAVL(Reduce.getOperand(5)))
15309 return SDValue();
15310
15311 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15312
15313 SDLoc DL(N);
15314 SDValue NewScalarV =
15315 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15316 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15317
15318 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15319 if (ScalarVT != ScalarV.getValueType())
15320 NewScalarV =
15321 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15322
15323 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15324 NewScalarV, Reduce.getOperand(3),
15325 Reduce.getOperand(4), Reduce.getOperand(5)};
15326 SDValue NewReduce =
15327 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15328 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15329 Extract.getOperand(1));
15330}
15331
15332// Optimize (add (shl x, c0), (shl y, c1)) ->
15333// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15334// or
15335// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15337 const RISCVSubtarget &Subtarget) {
15338 const bool HasStdExtZba = Subtarget.hasStdExtZba();
15339 const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf();
15340 const bool HasVendorXqciac = Subtarget.hasVendorXqciac();
15341 // Perform this optimization only in the zba/xandesperf/xqciac extension.
15342 if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac)
15343 return SDValue();
15344
15345 // Skip for vector types and larger types.
15346 EVT VT = N->getValueType(0);
15347 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15348 return SDValue();
15349
15350 // The two operand nodes must be SHL and have no other use.
15351 SDValue N0 = N->getOperand(0);
15352 SDValue N1 = N->getOperand(1);
15353 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15354 !N0->hasOneUse() || !N1->hasOneUse())
15355 return SDValue();
15356
15357 // Check c0 and c1.
15358 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15359 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15360 if (!N0C || !N1C)
15361 return SDValue();
15362 int64_t C0 = N0C->getSExtValue();
15363 int64_t C1 = N1C->getSExtValue();
15364 if (C0 <= 0 || C1 <= 0)
15365 return SDValue();
15366
15367 int64_t Diff = std::abs(C0 - C1);
15368 bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3;
15369 bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf;
15370
15371 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
15372 if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) ||
15373 (IsShXaddDiff && !HasShXadd && HasVendorXqciac))
15374 return SDValue();
15375
15376 // Skip if QC_SHLADD is not applicable.
15377 if (Diff == 0 || Diff > 31)
15378 return SDValue();
15379
15380 // Build nodes.
15381 SDLoc DL(N);
15382 int64_t Bits = std::min(C0, C1);
15383 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15384 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15385 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15386 DAG.getConstant(Diff, DL, VT), NS);
15387 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15388}
15389
15390// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15391// or 3.
15393 SelectionDAG &DAG) {
15394 using namespace llvm::SDPatternMatch;
15395
15396 // Looking for a reg-reg add and not an addi.
15397 if (isa<ConstantSDNode>(N->getOperand(1)))
15398 return SDValue();
15399
15400 // Based on testing it seems that performance degrades if the ADDI has
15401 // more than 2 uses.
15402 if (AddI->use_size() > 2)
15403 return SDValue();
15404
15405 APInt AddVal;
15406 SDValue SHLVal;
15407 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15408 return SDValue();
15409
15410 APInt VShift;
15411 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15412 return SDValue();
15413
15414 if (VShift.slt(1) || VShift.sgt(3))
15415 return SDValue();
15416
15417 SDLoc DL(N);
15418 EVT VT = N->getValueType(0);
15419 // The shift must be positive but the add can be signed.
15420 uint64_t ShlConst = VShift.getZExtValue();
15421 int64_t AddConst = AddVal.getSExtValue();
15422
15423 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15424 DAG.getConstant(ShlConst, DL, VT), Other);
15425 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15426 DAG.getSignedConstant(AddConst, DL, VT));
15427}
15428
15429// Optimize (add (add (shl x, c0), c1), y) ->
15430// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15432 const RISCVSubtarget &Subtarget) {
15433 // Perform this optimization only in the zba extension.
15434 if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
15435 return SDValue();
15436
15437 // Skip for vector types and larger types.
15438 EVT VT = N->getValueType(0);
15439 if (VT != Subtarget.getXLenVT())
15440 return SDValue();
15441
15442 SDValue AddI = N->getOperand(0);
15443 SDValue Other = N->getOperand(1);
15444 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15445 return V;
15446 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15447 return V;
15448 return SDValue();
15449}
15450
15451// Combine a constant select operand into its use:
15452//
15453// (and (select cond, -1, c), x)
15454// -> (select cond, x, (and x, c)) [AllOnes=1]
15455// (or (select cond, 0, c), x)
15456// -> (select cond, x, (or x, c)) [AllOnes=0]
15457// (xor (select cond, 0, c), x)
15458// -> (select cond, x, (xor x, c)) [AllOnes=0]
15459// (add (select cond, 0, c), x)
15460// -> (select cond, x, (add x, c)) [AllOnes=0]
15461// (sub x, (select cond, 0, c))
15462// -> (select cond, x, (sub x, c)) [AllOnes=0]
15464 SelectionDAG &DAG, bool AllOnes,
15465 const RISCVSubtarget &Subtarget) {
15466 EVT VT = N->getValueType(0);
15467
15468 // Skip vectors.
15469 if (VT.isVector())
15470 return SDValue();
15471
15472 if (!Subtarget.hasConditionalMoveFusion()) {
15473 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15474 if ((!Subtarget.hasStdExtZicond() &&
15475 !Subtarget.hasVendorXVentanaCondOps()) ||
15476 N->getOpcode() != ISD::AND)
15477 return SDValue();
15478
15479 // Maybe harmful when condition code has multiple use.
15480 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15481 return SDValue();
15482
15483 // Maybe harmful when VT is wider than XLen.
15484 if (VT.getSizeInBits() > Subtarget.getXLen())
15485 return SDValue();
15486 }
15487
15488 if ((Slct.getOpcode() != ISD::SELECT &&
15489 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15490 !Slct.hasOneUse())
15491 return SDValue();
15492
15493 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15495 };
15496
15497 bool SwapSelectOps;
15498 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15499 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15500 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15501 SDValue NonConstantVal;
15502 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15503 SwapSelectOps = false;
15504 NonConstantVal = FalseVal;
15505 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15506 SwapSelectOps = true;
15507 NonConstantVal = TrueVal;
15508 } else
15509 return SDValue();
15510
15511 // Slct is now know to be the desired identity constant when CC is true.
15512 TrueVal = OtherOp;
15513 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15514 // Unless SwapSelectOps says the condition should be false.
15515 if (SwapSelectOps)
15516 std::swap(TrueVal, FalseVal);
15517
15518 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15519 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15520 {Slct.getOperand(0), Slct.getOperand(1),
15521 Slct.getOperand(2), TrueVal, FalseVal});
15522
15523 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15524 {Slct.getOperand(0), TrueVal, FalseVal});
15525}
15526
15527// Attempt combineSelectAndUse on each operand of a commutative operator N.
15529 bool AllOnes,
15530 const RISCVSubtarget &Subtarget) {
15531 SDValue N0 = N->getOperand(0);
15532 SDValue N1 = N->getOperand(1);
15533 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15534 return Result;
15535 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15536 return Result;
15537 return SDValue();
15538}
15539
15540// Transform (add (mul x, c0), c1) ->
15541// (add (mul (add x, c1/c0), c0), c1%c0).
15542// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15543// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15544// to an infinite loop in DAGCombine if transformed.
15545// Or transform (add (mul x, c0), c1) ->
15546// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15547// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15548// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15549// lead to an infinite loop in DAGCombine if transformed.
15550// Or transform (add (mul x, c0), c1) ->
15551// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15552// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15553// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15554// lead to an infinite loop in DAGCombine if transformed.
15555// Or transform (add (mul x, c0), c1) ->
15556// (mul (add x, c1/c0), c0).
15557// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15559 const RISCVSubtarget &Subtarget) {
15560 // Skip for vector types and larger types.
15561 EVT VT = N->getValueType(0);
15562 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15563 return SDValue();
15564 // The first operand node must be a MUL and has no other use.
15565 SDValue N0 = N->getOperand(0);
15566 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15567 return SDValue();
15568 // Check if c0 and c1 match above conditions.
15569 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15570 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15571 if (!N0C || !N1C)
15572 return SDValue();
15573 // If N0C has multiple uses it's possible one of the cases in
15574 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15575 // in an infinite loop.
15576 if (!N0C->hasOneUse())
15577 return SDValue();
15578 int64_t C0 = N0C->getSExtValue();
15579 int64_t C1 = N1C->getSExtValue();
15580 int64_t CA, CB;
15581 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15582 return SDValue();
15583 // Search for proper CA (non-zero) and CB that both are simm12.
15584 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15585 !isInt<12>(C0 * (C1 / C0))) {
15586 CA = C1 / C0;
15587 CB = C1 % C0;
15588 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15589 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15590 CA = C1 / C0 + 1;
15591 CB = C1 % C0 - C0;
15592 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15593 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15594 CA = C1 / C0 - 1;
15595 CB = C1 % C0 + C0;
15596 } else
15597 return SDValue();
15598 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15599 SDLoc DL(N);
15600 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15601 DAG.getSignedConstant(CA, DL, VT));
15602 SDValue New1 =
15603 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15604 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15605}
15606
15607// add (zext, zext) -> zext (add (zext, zext))
15608// sub (zext, zext) -> sext (sub (zext, zext))
15609// mul (zext, zext) -> zext (mul (zext, zext))
15610// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15611// udiv (zext, zext) -> zext (udiv (zext, zext))
15612// srem (zext, zext) -> zext (srem (zext, zext))
15613// urem (zext, zext) -> zext (urem (zext, zext))
15614//
15615// where the sum of the extend widths match, and the the range of the bin op
15616// fits inside the width of the narrower bin op. (For profitability on rvv, we
15617// use a power of two for both inner and outer extend.)
15619
15620 EVT VT = N->getValueType(0);
15621 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15622 return SDValue();
15623
15624 SDValue N0 = N->getOperand(0);
15625 SDValue N1 = N->getOperand(1);
15627 return SDValue();
15628 if (!N0.hasOneUse() || !N1.hasOneUse())
15629 return SDValue();
15630
15631 SDValue Src0 = N0.getOperand(0);
15632 SDValue Src1 = N1.getOperand(0);
15633 EVT SrcVT = Src0.getValueType();
15634 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15635 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15636 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15637 return SDValue();
15638
15639 LLVMContext &C = *DAG.getContext();
15641 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15642
15643 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15644 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15645
15646 // Src0 and Src1 are zero extended, so they're always positive if signed.
15647 //
15648 // sub can produce a negative from two positive operands, so it needs sign
15649 // extended. Other nodes produce a positive from two positive operands, so
15650 // zero extend instead.
15651 unsigned OuterExtend =
15652 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15653
15654 return DAG.getNode(
15655 OuterExtend, SDLoc(N), VT,
15656 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15657}
15658
15659// Try to turn (add (xor bool, 1) -1) into (neg bool).
15661 SDValue N0 = N->getOperand(0);
15662 SDValue N1 = N->getOperand(1);
15663 EVT VT = N->getValueType(0);
15664 SDLoc DL(N);
15665
15666 // RHS should be -1.
15667 if (!isAllOnesConstant(N1))
15668 return SDValue();
15669
15670 // Look for (xor X, 1).
15671 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15672 return SDValue();
15673
15674 // First xor input should be 0 or 1.
15676 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15677 return SDValue();
15678
15679 // Emit a negate of the setcc.
15680 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15681 N0.getOperand(0));
15682}
15683
15686 const RISCVSubtarget &Subtarget) {
15687 SelectionDAG &DAG = DCI.DAG;
15688 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15689 return V;
15690 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15691 return V;
15692 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15693 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15694 return V;
15695 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15696 return V;
15697 }
15698 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15699 return V;
15700 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15701 return V;
15702 if (SDValue V = combineBinOpOfZExt(N, DAG))
15703 return V;
15704
15705 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15706 // (select lhs, rhs, cc, x, (add x, y))
15707 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15708}
15709
15710// Try to turn a sub boolean RHS and constant LHS into an addi.
15712 SDValue N0 = N->getOperand(0);
15713 SDValue N1 = N->getOperand(1);
15714 EVT VT = N->getValueType(0);
15715 SDLoc DL(N);
15716
15717 // Require a constant LHS.
15718 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15719 if (!N0C)
15720 return SDValue();
15721
15722 // All our optimizations involve subtracting 1 from the immediate and forming
15723 // an ADDI. Make sure the new immediate is valid for an ADDI.
15724 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15725 if (!ImmValMinus1.isSignedIntN(12))
15726 return SDValue();
15727
15728 SDValue NewLHS;
15729 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15730 // (sub constant, (setcc x, y, eq/neq)) ->
15731 // (add (setcc x, y, neq/eq), constant - 1)
15732 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15733 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15734 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15735 return SDValue();
15736 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15737 NewLHS =
15738 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15739 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15740 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15741 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15742 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15743 NewLHS = N1.getOperand(0);
15744 } else
15745 return SDValue();
15746
15747 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15748 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15749}
15750
15751// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15752// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15753// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15754// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15756 const RISCVSubtarget &Subtarget) {
15757 if (!Subtarget.hasStdExtZbb())
15758 return SDValue();
15759
15760 EVT VT = N->getValueType(0);
15761
15762 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15763 return SDValue();
15764
15765 SDValue N0 = N->getOperand(0);
15766 SDValue N1 = N->getOperand(1);
15767
15768 if (N0->getOpcode() != ISD::SHL)
15769 return SDValue();
15770
15771 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15772 if (!ShAmtCLeft)
15773 return SDValue();
15774 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15775
15776 if (ShiftedAmount >= 8)
15777 return SDValue();
15778
15779 SDValue LeftShiftOperand = N0->getOperand(0);
15780 SDValue RightShiftOperand = N1;
15781
15782 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15783 if (N1->getOpcode() != ISD::SRL)
15784 return SDValue();
15785 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15786 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15787 return SDValue();
15788 RightShiftOperand = N1.getOperand(0);
15789 }
15790
15791 // At least one shift should have a single use.
15792 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15793 return SDValue();
15794
15795 if (LeftShiftOperand != RightShiftOperand)
15796 return SDValue();
15797
15798 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15799 Mask <<= ShiftedAmount;
15800 // Check that X has indeed the right shape (only the Y-th bit can be set in
15801 // every byte).
15802 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15803 return SDValue();
15804
15805 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15806}
15807
15809 const RISCVSubtarget &Subtarget) {
15810 if (SDValue V = combineSubOfBoolean(N, DAG))
15811 return V;
15812
15813 EVT VT = N->getValueType(0);
15814 SDValue N0 = N->getOperand(0);
15815 SDValue N1 = N->getOperand(1);
15816 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15817 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15818 isNullConstant(N1.getOperand(1))) {
15819 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15820 if (CCVal == ISD::SETLT) {
15821 SDLoc DL(N);
15822 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15823 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15824 DAG.getConstant(ShAmt, DL, VT));
15825 }
15826 }
15827
15828 if (SDValue V = combineBinOpOfZExt(N, DAG))
15829 return V;
15830 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15831 return V;
15832
15833 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15834 // (select lhs, rhs, cc, x, (sub x, y))
15835 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15836}
15837
15838// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15839// Legalizing setcc can introduce xors like this. Doing this transform reduces
15840// the number of xors and may allow the xor to fold into a branch condition.
15842 SDValue N0 = N->getOperand(0);
15843 SDValue N1 = N->getOperand(1);
15844 bool IsAnd = N->getOpcode() == ISD::AND;
15845
15846 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15847 return SDValue();
15848
15849 if (!N0.hasOneUse() || !N1.hasOneUse())
15850 return SDValue();
15851
15852 SDValue N01 = N0.getOperand(1);
15853 SDValue N11 = N1.getOperand(1);
15854
15855 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15856 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15857 // operation is And, allow one of the Xors to use -1.
15858 if (isOneConstant(N01)) {
15859 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15860 return SDValue();
15861 } else if (isOneConstant(N11)) {
15862 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15863 if (!(IsAnd && isAllOnesConstant(N01)))
15864 return SDValue();
15865 } else
15866 return SDValue();
15867
15868 EVT VT = N->getValueType(0);
15869
15870 SDValue N00 = N0.getOperand(0);
15871 SDValue N10 = N1.getOperand(0);
15872
15873 // The LHS of the xors needs to be 0/1.
15875 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15876 return SDValue();
15877
15878 // Invert the opcode and insert a new xor.
15879 SDLoc DL(N);
15880 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15881 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15882 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15883}
15884
15885// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15886// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15887// value to an unsigned value. This will be lowered to vmax and series of
15888// vnclipu instructions later. This can be extended to other truncated types
15889// other than i8 by replacing 256 and 255 with the equivalent constants for the
15890// type.
15892 EVT VT = N->getValueType(0);
15893 SDValue N0 = N->getOperand(0);
15894 EVT SrcVT = N0.getValueType();
15895
15896 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15897 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15898 return SDValue();
15899
15900 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15901 return SDValue();
15902
15903 SDValue Cond = N0.getOperand(0);
15904 SDValue True = N0.getOperand(1);
15905 SDValue False = N0.getOperand(2);
15906
15907 if (Cond.getOpcode() != ISD::SETCC)
15908 return SDValue();
15909
15910 // FIXME: Support the version of this pattern with the select operands
15911 // swapped.
15912 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15913 if (CCVal != ISD::SETULT)
15914 return SDValue();
15915
15916 SDValue CondLHS = Cond.getOperand(0);
15917 SDValue CondRHS = Cond.getOperand(1);
15918
15919 if (CondLHS != True)
15920 return SDValue();
15921
15922 unsigned ScalarBits = VT.getScalarSizeInBits();
15923
15924 // FIXME: Support other constants.
15925 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15926 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15927 return SDValue();
15928
15929 if (False.getOpcode() != ISD::SIGN_EXTEND)
15930 return SDValue();
15931
15932 False = False.getOperand(0);
15933
15934 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15935 return SDValue();
15936
15937 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15938 if (!FalseRHSC || !FalseRHSC->isZero())
15939 return SDValue();
15940
15941 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15942 if (CCVal2 != ISD::SETGT)
15943 return SDValue();
15944
15945 // Emit the signed to unsigned saturation pattern.
15946 SDLoc DL(N);
15947 SDValue Max =
15948 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15949 SDValue Min =
15950 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15951 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15952 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15953}
15954
15956 const RISCVSubtarget &Subtarget) {
15957 SDValue N0 = N->getOperand(0);
15958 EVT VT = N->getValueType(0);
15959
15960 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15961 // extending X. This is safe since we only need the LSB after the shift and
15962 // shift amounts larger than 31 would produce poison. If we wait until
15963 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
15964 // to use a BEXT instruction.
15965 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15966 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15967 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
15968 SDLoc DL(N0);
15969 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
15970 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
15971 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
15972 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
15973 }
15974
15975 return combineTruncSelectToSMaxUSat(N, DAG);
15976}
15977
15978// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15979// truncation. But RVV doesn't have truncation instructions for more than twice
15980// the bitwidth.
15981//
15982// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15983//
15984// vsetvli a0, zero, e32, m2, ta, ma
15985// vnsrl.wi v12, v8, 0
15986// vsetvli zero, zero, e16, m1, ta, ma
15987// vnsrl.wi v8, v12, 0
15988// vsetvli zero, zero, e8, mf2, ta, ma
15989// vnsrl.wi v8, v8, 0
15990//
15991// So reverse the combine so we generate an vmseq/vmsne again:
15992//
15993// and (lshr (trunc X), ShAmt), 1
15994// -->
15995// zext (icmp ne (and X, (1 << ShAmt)), 0)
15996//
15997// and (lshr (not (trunc X)), ShAmt), 1
15998// -->
15999// zext (icmp eq (and X, (1 << ShAmt)), 0)
16001 const RISCVSubtarget &Subtarget) {
16002 using namespace SDPatternMatch;
16003 SDLoc DL(N);
16004
16005 if (!Subtarget.hasVInstructions())
16006 return SDValue();
16007
16008 EVT VT = N->getValueType(0);
16009 if (!VT.isVector())
16010 return SDValue();
16011
16012 APInt ShAmt;
16013 SDValue Inner;
16014 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16015 m_One())))
16016 return SDValue();
16017
16018 SDValue X;
16019 bool IsNot;
16020 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16021 IsNot = true;
16022 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16023 IsNot = false;
16024 else
16025 return SDValue();
16026
16027 EVT WideVT = X.getValueType();
16028 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16029 return SDValue();
16030
16031 SDValue Res =
16032 DAG.getNode(ISD::AND, DL, WideVT, X,
16033 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16034 Res = DAG.getSetCC(DL,
16035 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16036 WideVT.getVectorElementCount()),
16037 Res, DAG.getConstant(0, DL, WideVT),
16038 IsNot ? ISD::SETEQ : ISD::SETNE);
16039 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16040}
16041
16044 SelectionDAG &DAG = DCI.DAG;
16045 if (N->getOpcode() != ISD::AND)
16046 return SDValue();
16047
16048 SDValue N0 = N->getOperand(0);
16049 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16050 return SDValue();
16051 if (!N0.hasOneUse())
16052 return SDValue();
16053
16054 AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
16056 return SDValue();
16057
16058 EVT LoadedVT = ALoad->getMemoryVT();
16059 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16060 if (!MaskConst)
16061 return SDValue();
16062 uint64_t Mask = MaskConst->getZExtValue();
16063 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16064 if (Mask != ExpectedMask)
16065 return SDValue();
16066
16067 SDValue ZextLoad = DAG.getAtomicLoad(
16068 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16069 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16070 DCI.CombineTo(N, ZextLoad);
16071 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16073 return SDValue(N, 0);
16074}
16075
16076// Combines two comparison operation and logic operation to one selection
16077// operation(min, max) and logic operation. Returns new constructed Node if
16078// conditions for optimization are satisfied.
16081 const RISCVSubtarget &Subtarget) {
16082 SelectionDAG &DAG = DCI.DAG;
16083
16084 SDValue N0 = N->getOperand(0);
16085 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16086 // extending X. This is safe since we only need the LSB after the shift and
16087 // shift amounts larger than 31 would produce poison. If we wait until
16088 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16089 // to use a BEXT instruction.
16090 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16091 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16092 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16093 N0.hasOneUse()) {
16094 SDLoc DL(N);
16095 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16096 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16097 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16098 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16099 DAG.getConstant(1, DL, MVT::i64));
16100 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16101 }
16102
16103 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16104 return V;
16105
16106 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16107 return V;
16108 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16109 return V;
16110 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16111 return V;
16112
16113 if (DCI.isAfterLegalizeDAG())
16114 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16115 return V;
16116
16117 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16118 // (select lhs, rhs, cc, x, (and x, y))
16119 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16120}
16121
16122// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16123// FIXME: Generalize to other binary operators with same operand.
16125 SelectionDAG &DAG) {
16126 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16127
16128 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16129 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16130 !N0.hasOneUse() || !N1.hasOneUse())
16131 return SDValue();
16132
16133 // Should have the same condition.
16134 SDValue Cond = N0.getOperand(1);
16135 if (Cond != N1.getOperand(1))
16136 return SDValue();
16137
16138 SDValue TrueV = N0.getOperand(0);
16139 SDValue FalseV = N1.getOperand(0);
16140
16141 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16142 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16143 !isOneConstant(TrueV.getOperand(1)) ||
16144 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16145 return SDValue();
16146
16147 EVT VT = N->getValueType(0);
16148 SDLoc DL(N);
16149
16150 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16151 Cond);
16152 SDValue NewN1 =
16153 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16154 SDValue NewOr =
16155 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16156 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16157}
16158
16160 const RISCVSubtarget &Subtarget) {
16161 SelectionDAG &DAG = DCI.DAG;
16162
16163 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16164 return V;
16165 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16166 return V;
16167
16168 if (DCI.isAfterLegalizeDAG())
16169 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16170 return V;
16171
16172 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16173 // We may be able to pull a common operation out of the true and false value.
16174 SDValue N0 = N->getOperand(0);
16175 SDValue N1 = N->getOperand(1);
16176 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16177 return V;
16178 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16179 return V;
16180
16181 // fold (or (select cond, 0, y), x) ->
16182 // (select cond, x, (or x, y))
16183 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16184}
16185
16187 const RISCVSubtarget &Subtarget) {
16188 SDValue N0 = N->getOperand(0);
16189 SDValue N1 = N->getOperand(1);
16190
16191 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16192 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
16193 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16194 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16195 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16196 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16197 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16198 SDLoc DL(N);
16199 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16200 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16201 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16202 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
16203 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16204 }
16205
16206 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16207 // NOTE: Assumes ROL being legal means ROLW is legal.
16208 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16209 if (N0.getOpcode() == RISCVISD::SLLW &&
16211 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16212 SDLoc DL(N);
16213 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16214 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16215 }
16216
16217 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16218 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16219 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16220 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
16221 if (ConstN00 && CC == ISD::SETLT) {
16222 EVT VT = N0.getValueType();
16223 SDLoc DL(N0);
16224 const APInt &Imm = ConstN00->getAPIntValue();
16225 if ((Imm + 1).isSignedIntN(12))
16226 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16227 DAG.getConstant(Imm + 1, DL, VT), CC);
16228 }
16229 }
16230
16231 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16232 return V;
16233 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16234 return V;
16235
16236 // fold (xor (select cond, 0, y), x) ->
16237 // (select cond, x, (xor x, y))
16238 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16239}
16240
16241// Try to expand a multiply to a sequence of shifts and add/subs,
16242// for a machine without native mul instruction.
16244 uint64_t MulAmt) {
16245 SDLoc DL(N);
16246 EVT VT = N->getValueType(0);
16248
16249 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16250 SDValue N0 = N->getOperand(0);
16251
16252 // Find the Non-adjacent form of the multiplier.
16253 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16254 if (E & 1) {
16255 bool IsAdd = (E & 3) == 1;
16256 E -= IsAdd ? 1 : -1;
16257 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16258 DAG.getShiftAmountConstant(I, VT, DL));
16259 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16260 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16261 }
16262 }
16263
16264 return Result;
16265}
16266
16267// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16269 uint64_t MulAmt) {
16270 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16272 uint64_t ShiftAmt1;
16273 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16274 Op = ISD::SUB;
16275 ShiftAmt1 = MulAmt + MulAmtLowBit;
16276 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16277 Op = ISD::ADD;
16278 ShiftAmt1 = MulAmt - MulAmtLowBit;
16279 } else {
16280 return SDValue();
16281 }
16282 EVT VT = N->getValueType(0);
16283 SDLoc DL(N);
16284 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16285 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16286 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16287 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16288 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16289}
16290
16291// Try to expand a scalar multiply to a faster sequence.
16294 const RISCVSubtarget &Subtarget) {
16295
16296 EVT VT = N->getValueType(0);
16297
16298 // LI + MUL is usually smaller than the alternative sequence.
16300 return SDValue();
16301
16302 if (VT != Subtarget.getXLenVT())
16303 return SDValue();
16304
16305 bool ShouldExpandMul =
16306 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16307 !Subtarget.hasStdExtZmmul();
16308 if (!ShouldExpandMul)
16309 return SDValue();
16310
16311 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16312 if (!CNode)
16313 return SDValue();
16314 uint64_t MulAmt = CNode->getZExtValue();
16315
16316 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16317 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16318 return SDValue();
16319
16320 const bool HasShlAdd = Subtarget.hasStdExtZba() ||
16321 Subtarget.hasVendorXTHeadBa() ||
16322 Subtarget.hasVendorXAndesPerf();
16323
16324 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16325 // We're adding additional uses of X here, and in principle, we should be freezing
16326 // X before doing so. However, adding freeze here causes real regressions, and no
16327 // other target properly freezes X in these cases either.
16328 SDValue X = N->getOperand(0);
16329
16330 if (HasShlAdd) {
16331 for (uint64_t Divisor : {3, 5, 9}) {
16332 if (MulAmt % Divisor != 0)
16333 continue;
16334 uint64_t MulAmt2 = MulAmt / Divisor;
16335 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16336 if (isPowerOf2_64(MulAmt2)) {
16337 SDLoc DL(N);
16338 SDValue X = N->getOperand(0);
16339 // Put the shift first if we can fold a zext into the
16340 // shift forming a slli.uw.
16341 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16342 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16343 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16344 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16345 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16346 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16347 Shl);
16348 }
16349 // Otherwise, put rhe shl second so that it can fold with following
16350 // instructions (e.g. sext or add).
16351 SDValue Mul359 =
16352 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16353 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16354 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16355 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16356 }
16357
16358 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16359 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16360 SDLoc DL(N);
16361 SDValue Mul359 =
16362 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16363 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16364 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16365 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16366 Mul359);
16367 }
16368 }
16369
16370 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16371 // shXadd. First check if this a sum of two power of 2s because that's
16372 // easy. Then count how many zeros are up to the first bit.
16373 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16374 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16375 if (ScaleShift >= 1 && ScaleShift < 4) {
16376 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16377 SDLoc DL(N);
16378 SDValue Shift1 =
16379 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16380 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16381 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16382 }
16383 }
16384
16385 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16386 // This is the two instruction form, there are also three instruction
16387 // variants we could implement. e.g.
16388 // (2^(1,2,3) * 3,5,9 + 1) << C2
16389 // 2^(C1>3) * 3,5,9 +/- 1
16390 for (uint64_t Divisor : {3, 5, 9}) {
16391 uint64_t C = MulAmt - 1;
16392 if (C <= Divisor)
16393 continue;
16394 unsigned TZ = llvm::countr_zero(C);
16395 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16396 SDLoc DL(N);
16397 SDValue Mul359 =
16398 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16399 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16400 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16401 DAG.getConstant(TZ, DL, VT), X);
16402 }
16403 }
16404
16405 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16406 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16407 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16408 if (ScaleShift >= 1 && ScaleShift < 4) {
16409 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16410 SDLoc DL(N);
16411 SDValue Shift1 =
16412 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16413 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16414 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16415 DAG.getConstant(ScaleShift, DL, VT), X));
16416 }
16417 }
16418
16419 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16420 for (uint64_t Offset : {3, 5, 9}) {
16421 if (isPowerOf2_64(MulAmt + Offset)) {
16422 unsigned ShAmt = Log2_64(MulAmt + Offset);
16423 if (ShAmt >= VT.getSizeInBits())
16424 continue;
16425 SDLoc DL(N);
16426 SDValue Shift1 =
16427 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16428 SDValue Mul359 =
16429 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16430 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16431 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16432 }
16433 }
16434
16435 for (uint64_t Divisor : {3, 5, 9}) {
16436 if (MulAmt % Divisor != 0)
16437 continue;
16438 uint64_t MulAmt2 = MulAmt / Divisor;
16439 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16440 // of 25 which happen to be quite common.
16441 for (uint64_t Divisor2 : {3, 5, 9}) {
16442 if (MulAmt2 % Divisor2 != 0)
16443 continue;
16444 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16445 if (isPowerOf2_64(MulAmt3)) {
16446 SDLoc DL(N);
16447 SDValue Mul359A =
16448 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16449 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16450 SDValue Mul359B = DAG.getNode(
16451 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16452 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16453 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16454 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16455 }
16456 }
16457 }
16458 }
16459
16460 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16461 return V;
16462
16463 if (!Subtarget.hasStdExtZmmul())
16464 return expandMulToNAFSequence(N, DAG, MulAmt);
16465
16466 return SDValue();
16467}
16468
16469// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16470// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16471// Same for other equivalent types with other equivalent constants.
16473 EVT VT = N->getValueType(0);
16474 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16475
16476 // Do this for legal vectors unless they are i1 or i8 vectors.
16477 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16478 return SDValue();
16479
16480 if (N->getOperand(0).getOpcode() != ISD::AND ||
16481 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16482 return SDValue();
16483
16484 SDValue And = N->getOperand(0);
16485 SDValue Srl = And.getOperand(0);
16486
16487 APInt V1, V2, V3;
16488 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16489 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16491 return SDValue();
16492
16493 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16494 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16495 V3 != (HalfSize - 1))
16496 return SDValue();
16497
16498 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16499 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16500 VT.getVectorElementCount() * 2);
16501 SDLoc DL(N);
16502 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16503 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16504 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16505 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16506}
16507
16510 const RISCVSubtarget &Subtarget) {
16511 EVT VT = N->getValueType(0);
16512 if (!VT.isVector())
16513 return expandMul(N, DAG, DCI, Subtarget);
16514
16515 SDLoc DL(N);
16516 SDValue N0 = N->getOperand(0);
16517 SDValue N1 = N->getOperand(1);
16518 SDValue MulOper;
16519 unsigned AddSubOpc;
16520
16521 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16522 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16523 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16524 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16525 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16526 AddSubOpc = V->getOpcode();
16527 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16528 SDValue Opnd = V->getOperand(1);
16529 MulOper = V->getOperand(0);
16530 if (AddSubOpc == ISD::SUB)
16531 std::swap(Opnd, MulOper);
16532 if (isOneOrOneSplat(Opnd))
16533 return true;
16534 }
16535 return false;
16536 };
16537
16538 if (IsAddSubWith1(N0)) {
16539 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16540 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16541 }
16542
16543 if (IsAddSubWith1(N1)) {
16544 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16545 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16546 }
16547
16548 if (SDValue V = combineBinOpOfZExt(N, DAG))
16549 return V;
16550
16552 return V;
16553
16554 return SDValue();
16555}
16556
16557/// According to the property that indexed load/store instructions zero-extend
16558/// their indices, try to narrow the type of index operand.
16559static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16560 if (isIndexTypeSigned(IndexType))
16561 return false;
16562
16563 if (!N->hasOneUse())
16564 return false;
16565
16566 EVT VT = N.getValueType();
16567 SDLoc DL(N);
16568
16569 // In general, what we're doing here is seeing if we can sink a truncate to
16570 // a smaller element type into the expression tree building our index.
16571 // TODO: We can generalize this and handle a bunch more cases if useful.
16572
16573 // Narrow a buildvector to the narrowest element type. This requires less
16574 // work and less register pressure at high LMUL, and creates smaller constants
16575 // which may be cheaper to materialize.
16576 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16577 KnownBits Known = DAG.computeKnownBits(N);
16578 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16579 LLVMContext &C = *DAG.getContext();
16580 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16581 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16582 N = DAG.getNode(ISD::TRUNCATE, DL,
16583 VT.changeVectorElementType(ResultVT), N);
16584 return true;
16585 }
16586 }
16587
16588 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16589 if (N.getOpcode() != ISD::SHL)
16590 return false;
16591
16592 SDValue N0 = N.getOperand(0);
16593 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16594 N0.getOpcode() != RISCVISD::VZEXT_VL)
16595 return false;
16596 if (!N0->hasOneUse())
16597 return false;
16598
16599 APInt ShAmt;
16600 SDValue N1 = N.getOperand(1);
16601 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16602 return false;
16603
16604 SDValue Src = N0.getOperand(0);
16605 EVT SrcVT = Src.getValueType();
16606 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16607 unsigned ShAmtV = ShAmt.getZExtValue();
16608 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16609 NewElen = std::max(NewElen, 8U);
16610
16611 // Skip if NewElen is not narrower than the original extended type.
16612 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16613 return false;
16614
16615 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16616 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16617
16618 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16619 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16620 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16621 return true;
16622}
16623
16624/// Try to map an integer comparison with size > XLEN to vector instructions
16625/// before type legalization splits it up into chunks.
16626static SDValue
16628 const SDLoc &DL, SelectionDAG &DAG,
16629 const RISCVSubtarget &Subtarget) {
16630 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16631
16632 if (!Subtarget.hasVInstructions())
16633 return SDValue();
16634
16635 MVT XLenVT = Subtarget.getXLenVT();
16636 EVT OpVT = X.getValueType();
16637 // We're looking for an oversized integer equality comparison.
16638 if (!OpVT.isScalarInteger())
16639 return SDValue();
16640
16641 unsigned OpSize = OpVT.getSizeInBits();
16642 // The size should be larger than XLen and smaller than the maximum vector
16643 // size.
16644 if (OpSize <= Subtarget.getXLen() ||
16645 OpSize > Subtarget.getRealMinVLen() *
16647 return SDValue();
16648
16649 // Don't perform this combine if constructing the vector will be expensive.
16650 auto IsVectorBitCastCheap = [](SDValue X) {
16652 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16653 X.getOpcode() == ISD::LOAD;
16654 };
16655 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16656 return SDValue();
16657
16659 Attribute::NoImplicitFloat))
16660 return SDValue();
16661
16662 // Bail out for non-byte-sized types.
16663 if (!OpVT.isByteSized())
16664 return SDValue();
16665
16666 unsigned VecSize = OpSize / 8;
16667 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16668 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16669
16670 SDValue VecX = DAG.getBitcast(VecVT, X);
16671 SDValue VecY = DAG.getBitcast(VecVT, Y);
16672 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16673 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16674
16675 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16676 DAG.getCondCode(ISD::SETNE), Mask, VL);
16677 return DAG.getSetCC(DL, VT,
16678 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16679 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16680 VL),
16681 DAG.getConstant(0, DL, XLenVT), CC);
16682}
16683
16684// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16685// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16686// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
16687// can become a sext.w instead of a shift pair.
16690 const RISCVSubtarget &Subtarget) {
16691 SelectionDAG &DAG = DCI.DAG;
16692 SDLoc dl(N);
16693 SDValue N0 = N->getOperand(0);
16694 SDValue N1 = N->getOperand(1);
16695 EVT VT = N->getValueType(0);
16696 EVT OpVT = N0.getValueType();
16697
16698 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16699 // Looking for an equality compare.
16700 if (!isIntEqualitySetCC(Cond))
16701 return SDValue();
16702
16703 if (SDValue V =
16704 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16705 return V;
16706
16707 // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
16708 if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
16709 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16710 isa<ConstantSDNode>(N0.getOperand(1))) {
16711 const APInt &AndRHSC =
16712 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
16713 if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
16714 unsigned ShiftBits = AndRHSC.countr_zero();
16715 SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
16716 DAG.getConstant(ShiftBits, dl, VT));
16717 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16718 }
16719 }
16720
16721 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16722 return SDValue();
16723
16724 // RHS needs to be a constant.
16725 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16726 if (!N1C)
16727 return SDValue();
16728
16729 // LHS needs to be (and X, 0xffffffff).
16730 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16731 !isa<ConstantSDNode>(N0.getOperand(1)) ||
16732 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16733 return SDValue();
16734
16735 // Don't do this if the sign bit is provably zero, it will be turned back into
16736 // an AND.
16737 APInt SignMask = APInt::getOneBitSet(64, 31);
16738 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16739 return SDValue();
16740
16741 const APInt &C1 = N1C->getAPIntValue();
16742
16743 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16744 // to be equal.
16745 if (C1.getActiveBits() > 32)
16746 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16747
16748 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16749 N0.getOperand(0), DAG.getValueType(MVT::i32));
16750 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16751 dl, OpVT), Cond);
16752}
16753
16754static SDValue
16756 const RISCVSubtarget &Subtarget) {
16757 SelectionDAG &DAG = DCI.DAG;
16758 SDValue Src = N->getOperand(0);
16759 EVT VT = N->getValueType(0);
16760 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16761 unsigned Opc = Src.getOpcode();
16762 SDLoc DL(N);
16763
16764 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16765 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16766 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16767 Subtarget.hasStdExtZfhmin())
16768 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16769
16770 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16771 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16772 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16773 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16774 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16775 Src.getOperand(1));
16776
16777 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16778 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16779 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16780
16781 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16782 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16783 isAllOnesConstant(Src.getOperand(1)) &&
16784 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16785 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16786 DAG.getAllOnesConstant(DL, VT));
16787
16788 return SDValue();
16789}
16790
16791namespace {
16792// Forward declaration of the structure holding the necessary information to
16793// apply a combine.
16794struct CombineResult;
16795
16796enum ExtKind : uint8_t {
16797 ZExt = 1 << 0,
16798 SExt = 1 << 1,
16799 FPExt = 1 << 2,
16800 BF16Ext = 1 << 3
16801};
16802/// Helper class for folding sign/zero extensions.
16803/// In particular, this class is used for the following combines:
16804/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16805/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16806/// mul | mul_vl -> vwmul(u) | vwmul_su
16807/// shl | shl_vl -> vwsll
16808/// fadd -> vfwadd | vfwadd_w
16809/// fsub -> vfwsub | vfwsub_w
16810/// fmul -> vfwmul
16811/// An object of this class represents an operand of the operation we want to
16812/// combine.
16813/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16814/// NodeExtensionHelper for `a` and one for `b`.
16815///
16816/// This class abstracts away how the extension is materialized and
16817/// how its number of users affect the combines.
16818///
16819/// In particular:
16820/// - VWADD_W is conceptually == add(op0, sext(op1))
16821/// - VWADDU_W == add(op0, zext(op1))
16822/// - VWSUB_W == sub(op0, sext(op1))
16823/// - VWSUBU_W == sub(op0, zext(op1))
16824/// - VFWADD_W == fadd(op0, fpext(op1))
16825/// - VFWSUB_W == fsub(op0, fpext(op1))
16826/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16827/// zext|sext(smaller_value).
16828struct NodeExtensionHelper {
16829 /// Records if this operand is like being zero extended.
16830 bool SupportsZExt;
16831 /// Records if this operand is like being sign extended.
16832 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16833 /// instance, a splat constant (e.g., 3), would support being both sign and
16834 /// zero extended.
16835 bool SupportsSExt;
16836 /// Records if this operand is like being floating point extended.
16837 bool SupportsFPExt;
16838 /// Records if this operand is extended from bf16.
16839 bool SupportsBF16Ext;
16840 /// This boolean captures whether we care if this operand would still be
16841 /// around after the folding happens.
16842 bool EnforceOneUse;
16843 /// Original value that this NodeExtensionHelper represents.
16844 SDValue OrigOperand;
16845
16846 /// Get the value feeding the extension or the value itself.
16847 /// E.g., for zext(a), this would return a.
16848 SDValue getSource() const {
16849 switch (OrigOperand.getOpcode()) {
16850 case ISD::ZERO_EXTEND:
16851 case ISD::SIGN_EXTEND:
16852 case RISCVISD::VSEXT_VL:
16853 case RISCVISD::VZEXT_VL:
16854 case RISCVISD::FP_EXTEND_VL:
16855 return OrigOperand.getOperand(0);
16856 default:
16857 return OrigOperand;
16858 }
16859 }
16860
16861 /// Check if this instance represents a splat.
16862 bool isSplat() const {
16863 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16864 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16865 }
16866
16867 /// Get the extended opcode.
16868 unsigned getExtOpc(ExtKind SupportsExt) const {
16869 switch (SupportsExt) {
16870 case ExtKind::SExt:
16871 return RISCVISD::VSEXT_VL;
16872 case ExtKind::ZExt:
16873 return RISCVISD::VZEXT_VL;
16874 case ExtKind::FPExt:
16875 case ExtKind::BF16Ext:
16876 return RISCVISD::FP_EXTEND_VL;
16877 }
16878 llvm_unreachable("Unknown ExtKind enum");
16879 }
16880
16881 /// Get or create a value that can feed \p Root with the given extension \p
16882 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16883 /// operand. \see ::getSource().
16884 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16885 const RISCVSubtarget &Subtarget,
16886 std::optional<ExtKind> SupportsExt) const {
16887 if (!SupportsExt.has_value())
16888 return OrigOperand;
16889
16890 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16891
16892 SDValue Source = getSource();
16893 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16894 if (Source.getValueType() == NarrowVT)
16895 return Source;
16896
16897 unsigned ExtOpc = getExtOpc(*SupportsExt);
16898
16899 // If we need an extension, we should be changing the type.
16900 SDLoc DL(OrigOperand);
16901 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16902 switch (OrigOperand.getOpcode()) {
16903 case ISD::ZERO_EXTEND:
16904 case ISD::SIGN_EXTEND:
16905 case RISCVISD::VSEXT_VL:
16906 case RISCVISD::VZEXT_VL:
16907 case RISCVISD::FP_EXTEND_VL:
16908 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
16909 case ISD::SPLAT_VECTOR:
16910 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
16911 case RISCVISD::VMV_V_X_VL:
16912 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
16913 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
16914 case RISCVISD::VFMV_V_F_VL:
16915 Source = Source.getOperand(1);
16916 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
16917 Source = Source.getOperand(0);
16918 assert(Source.getValueType() == NarrowVT.getVectorElementType());
16919 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
16920 DAG.getUNDEF(NarrowVT), Source, VL);
16921 default:
16922 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
16923 // and that operand should already have the right NarrowVT so no
16924 // extension should be required at this point.
16925 llvm_unreachable("Unsupported opcode");
16926 }
16927 }
16928
16929 /// Helper function to get the narrow type for \p Root.
16930 /// The narrow type is the type of \p Root where we divided the size of each
16931 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
16932 /// \pre Both the narrow type and the original type should be legal.
16933 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
16934 MVT VT = Root->getSimpleValueType(0);
16935
16936 // Determine the narrow size.
16937 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
16938
16939 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
16940 : SupportsExt == ExtKind::FPExt
16941 ? MVT::getFloatingPointVT(NarrowSize)
16942 : MVT::getIntegerVT(NarrowSize);
16943
16944 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
16945 "Trying to extend something we can't represent");
16946 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
16947 return NarrowVT;
16948 }
16949
16950 /// Get the opcode to materialize:
16951 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
16952 static unsigned getSExtOpcode(unsigned Opcode) {
16953 switch (Opcode) {
16954 case ISD::ADD:
16955 case RISCVISD::ADD_VL:
16956 case RISCVISD::VWADD_W_VL:
16957 case RISCVISD::VWADDU_W_VL:
16958 case ISD::OR:
16959 case RISCVISD::OR_VL:
16960 return RISCVISD::VWADD_VL;
16961 case ISD::SUB:
16962 case RISCVISD::SUB_VL:
16963 case RISCVISD::VWSUB_W_VL:
16964 case RISCVISD::VWSUBU_W_VL:
16965 return RISCVISD::VWSUB_VL;
16966 case ISD::MUL:
16967 case RISCVISD::MUL_VL:
16968 return RISCVISD::VWMUL_VL;
16969 default:
16970 llvm_unreachable("Unexpected opcode");
16971 }
16972 }
16973
16974 /// Get the opcode to materialize:
16975 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
16976 static unsigned getZExtOpcode(unsigned Opcode) {
16977 switch (Opcode) {
16978 case ISD::ADD:
16979 case RISCVISD::ADD_VL:
16980 case RISCVISD::VWADD_W_VL:
16981 case RISCVISD::VWADDU_W_VL:
16982 case ISD::OR:
16983 case RISCVISD::OR_VL:
16984 return RISCVISD::VWADDU_VL;
16985 case ISD::SUB:
16986 case RISCVISD::SUB_VL:
16987 case RISCVISD::VWSUB_W_VL:
16988 case RISCVISD::VWSUBU_W_VL:
16989 return RISCVISD::VWSUBU_VL;
16990 case ISD::MUL:
16991 case RISCVISD::MUL_VL:
16992 return RISCVISD::VWMULU_VL;
16993 case ISD::SHL:
16994 case RISCVISD::SHL_VL:
16995 return RISCVISD::VWSLL_VL;
16996 default:
16997 llvm_unreachable("Unexpected opcode");
16998 }
16999 }
17000
17001 /// Get the opcode to materialize:
17002 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17003 static unsigned getFPExtOpcode(unsigned Opcode) {
17004 switch (Opcode) {
17005 case RISCVISD::FADD_VL:
17006 case RISCVISD::VFWADD_W_VL:
17007 return RISCVISD::VFWADD_VL;
17008 case RISCVISD::FSUB_VL:
17009 case RISCVISD::VFWSUB_W_VL:
17010 return RISCVISD::VFWSUB_VL;
17011 case RISCVISD::FMUL_VL:
17012 return RISCVISD::VFWMUL_VL;
17013 case RISCVISD::VFMADD_VL:
17014 return RISCVISD::VFWMADD_VL;
17015 case RISCVISD::VFMSUB_VL:
17016 return RISCVISD::VFWMSUB_VL;
17017 case RISCVISD::VFNMADD_VL:
17018 return RISCVISD::VFWNMADD_VL;
17019 case RISCVISD::VFNMSUB_VL:
17020 return RISCVISD::VFWNMSUB_VL;
17021 default:
17022 llvm_unreachable("Unexpected opcode");
17023 }
17024 }
17025
17026 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17027 /// newOpcode(a, b).
17028 static unsigned getSUOpcode(unsigned Opcode) {
17029 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17030 "SU is only supported for MUL");
17031 return RISCVISD::VWMULSU_VL;
17032 }
17033
17034 /// Get the opcode to materialize
17035 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17036 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17037 switch (Opcode) {
17038 case ISD::ADD:
17039 case RISCVISD::ADD_VL:
17040 case ISD::OR:
17041 case RISCVISD::OR_VL:
17042 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17043 : RISCVISD::VWADDU_W_VL;
17044 case ISD::SUB:
17045 case RISCVISD::SUB_VL:
17046 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17047 : RISCVISD::VWSUBU_W_VL;
17048 case RISCVISD::FADD_VL:
17049 return RISCVISD::VFWADD_W_VL;
17050 case RISCVISD::FSUB_VL:
17051 return RISCVISD::VFWSUB_W_VL;
17052 default:
17053 llvm_unreachable("Unexpected opcode");
17054 }
17055 }
17056
17057 using CombineToTry = std::function<std::optional<CombineResult>(
17058 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17059 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17060 const RISCVSubtarget &)>;
17061
17062 /// Check if this node needs to be fully folded or extended for all users.
17063 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17064
17065 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17066 const RISCVSubtarget &Subtarget) {
17067 unsigned Opc = OrigOperand.getOpcode();
17068 MVT VT = OrigOperand.getSimpleValueType();
17069
17070 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17071 "Unexpected Opcode");
17072
17073 // The pasthru must be undef for tail agnostic.
17074 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17075 return;
17076
17077 // Get the scalar value.
17078 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17079 : OrigOperand.getOperand(1);
17080
17081 // See if we have enough sign bits or zero bits in the scalar to use a
17082 // widening opcode by splatting to smaller element size.
17083 unsigned EltBits = VT.getScalarSizeInBits();
17084 unsigned ScalarBits = Op.getValueSizeInBits();
17085 // If we're not getting all bits from the element, we need special handling.
17086 if (ScalarBits < EltBits) {
17087 // This should only occur on RV32.
17088 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17089 !Subtarget.is64Bit() && "Unexpected splat");
17090 // vmv.v.x sign extends narrow inputs.
17091 SupportsSExt = true;
17092
17093 // If the input is positive, then sign extend is also zero extend.
17094 if (DAG.SignBitIsZero(Op))
17095 SupportsZExt = true;
17096
17097 EnforceOneUse = false;
17098 return;
17099 }
17100
17101 unsigned NarrowSize = EltBits / 2;
17102 // If the narrow type cannot be expressed with a legal VMV,
17103 // this is not a valid candidate.
17104 if (NarrowSize < 8)
17105 return;
17106
17107 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17108 SupportsSExt = true;
17109
17110 if (DAG.MaskedValueIsZero(Op,
17111 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17112 SupportsZExt = true;
17113
17114 EnforceOneUse = false;
17115 }
17116
17117 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17118 return (NarrowEltVT == MVT::f32 ||
17119 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17120 }
17121
17122 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17123 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17124 }
17125
17126 /// Helper method to set the various fields of this struct based on the
17127 /// type of \p Root.
17128 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17129 const RISCVSubtarget &Subtarget) {
17130 SupportsZExt = false;
17131 SupportsSExt = false;
17132 SupportsFPExt = false;
17133 SupportsBF16Ext = false;
17134 EnforceOneUse = true;
17135 unsigned Opc = OrigOperand.getOpcode();
17136 // For the nodes we handle below, we end up using their inputs directly: see
17137 // getSource(). However since they either don't have a passthru or we check
17138 // that their passthru is undef, we can safely ignore their mask and VL.
17139 switch (Opc) {
17140 case ISD::ZERO_EXTEND:
17141 case ISD::SIGN_EXTEND: {
17142 MVT VT = OrigOperand.getSimpleValueType();
17143 if (!VT.isVector())
17144 break;
17145
17146 SDValue NarrowElt = OrigOperand.getOperand(0);
17147 MVT NarrowVT = NarrowElt.getSimpleValueType();
17148 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17149 if (NarrowVT.getVectorElementType() == MVT::i1)
17150 break;
17151
17152 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17153 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17154 break;
17155 }
17156 case RISCVISD::VZEXT_VL:
17157 SupportsZExt = true;
17158 break;
17159 case RISCVISD::VSEXT_VL:
17160 SupportsSExt = true;
17161 break;
17162 case RISCVISD::FP_EXTEND_VL: {
17163 MVT NarrowEltVT =
17165 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17166 SupportsFPExt = true;
17167 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17168 SupportsBF16Ext = true;
17169
17170 break;
17171 }
17172 case ISD::SPLAT_VECTOR:
17173 case RISCVISD::VMV_V_X_VL:
17174 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17175 break;
17176 case RISCVISD::VFMV_V_F_VL: {
17177 MVT VT = OrigOperand.getSimpleValueType();
17178
17179 if (!OrigOperand.getOperand(0).isUndef())
17180 break;
17181
17182 SDValue Op = OrigOperand.getOperand(1);
17183 if (Op.getOpcode() != ISD::FP_EXTEND)
17184 break;
17185
17186 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17187 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17188 if (NarrowSize != ScalarBits)
17189 break;
17190
17191 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17192 SupportsFPExt = true;
17193 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17194 Subtarget))
17195 SupportsBF16Ext = true;
17196 break;
17197 }
17198 default:
17199 break;
17200 }
17201 }
17202
17203 /// Check if \p Root supports any extension folding combines.
17204 static bool isSupportedRoot(const SDNode *Root,
17205 const RISCVSubtarget &Subtarget) {
17206 switch (Root->getOpcode()) {
17207 case ISD::ADD:
17208 case ISD::SUB:
17209 case ISD::MUL: {
17210 return Root->getValueType(0).isScalableVector();
17211 }
17212 case ISD::OR: {
17213 return Root->getValueType(0).isScalableVector() &&
17214 Root->getFlags().hasDisjoint();
17215 }
17216 // Vector Widening Integer Add/Sub/Mul Instructions
17217 case RISCVISD::ADD_VL:
17218 case RISCVISD::MUL_VL:
17219 case RISCVISD::VWADD_W_VL:
17220 case RISCVISD::VWADDU_W_VL:
17221 case RISCVISD::SUB_VL:
17222 case RISCVISD::VWSUB_W_VL:
17223 case RISCVISD::VWSUBU_W_VL:
17224 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17225 case RISCVISD::FADD_VL:
17226 case RISCVISD::FSUB_VL:
17227 case RISCVISD::FMUL_VL:
17228 case RISCVISD::VFWADD_W_VL:
17229 case RISCVISD::VFWSUB_W_VL:
17230 return true;
17231 case RISCVISD::OR_VL:
17232 return Root->getFlags().hasDisjoint();
17233 case ISD::SHL:
17234 return Root->getValueType(0).isScalableVector() &&
17235 Subtarget.hasStdExtZvbb();
17236 case RISCVISD::SHL_VL:
17237 return Subtarget.hasStdExtZvbb();
17238 case RISCVISD::VFMADD_VL:
17239 case RISCVISD::VFNMSUB_VL:
17240 case RISCVISD::VFNMADD_VL:
17241 case RISCVISD::VFMSUB_VL:
17242 return true;
17243 default:
17244 return false;
17245 }
17246 }
17247
17248 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17249 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17250 const RISCVSubtarget &Subtarget) {
17251 assert(isSupportedRoot(Root, Subtarget) &&
17252 "Trying to build an helper with an "
17253 "unsupported root");
17254 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17256 OrigOperand = Root->getOperand(OperandIdx);
17257
17258 unsigned Opc = Root->getOpcode();
17259 switch (Opc) {
17260 // We consider
17261 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17262 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17263 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17264 case RISCVISD::VWADD_W_VL:
17265 case RISCVISD::VWADDU_W_VL:
17266 case RISCVISD::VWSUB_W_VL:
17267 case RISCVISD::VWSUBU_W_VL:
17268 case RISCVISD::VFWADD_W_VL:
17269 case RISCVISD::VFWSUB_W_VL:
17270 if (OperandIdx == 1) {
17271 SupportsZExt =
17272 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
17273 SupportsSExt =
17274 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
17275 SupportsFPExt =
17276 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
17277 // There's no existing extension here, so we don't have to worry about
17278 // making sure it gets removed.
17279 EnforceOneUse = false;
17280 break;
17281 }
17282 [[fallthrough]];
17283 default:
17284 fillUpExtensionSupport(Root, DAG, Subtarget);
17285 break;
17286 }
17287 }
17288
17289 /// Helper function to get the Mask and VL from \p Root.
17290 static std::pair<SDValue, SDValue>
17291 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17292 const RISCVSubtarget &Subtarget) {
17293 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17294 switch (Root->getOpcode()) {
17295 case ISD::ADD:
17296 case ISD::SUB:
17297 case ISD::MUL:
17298 case ISD::OR:
17299 case ISD::SHL: {
17300 SDLoc DL(Root);
17301 MVT VT = Root->getSimpleValueType(0);
17302 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17303 }
17304 default:
17305 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17306 }
17307 }
17308
17309 /// Helper function to check if \p N is commutative with respect to the
17310 /// foldings that are supported by this class.
17311 static bool isCommutative(const SDNode *N) {
17312 switch (N->getOpcode()) {
17313 case ISD::ADD:
17314 case ISD::MUL:
17315 case ISD::OR:
17316 case RISCVISD::ADD_VL:
17317 case RISCVISD::MUL_VL:
17318 case RISCVISD::OR_VL:
17319 case RISCVISD::VWADD_W_VL:
17320 case RISCVISD::VWADDU_W_VL:
17321 case RISCVISD::FADD_VL:
17322 case RISCVISD::FMUL_VL:
17323 case RISCVISD::VFWADD_W_VL:
17324 case RISCVISD::VFMADD_VL:
17325 case RISCVISD::VFNMSUB_VL:
17326 case RISCVISD::VFNMADD_VL:
17327 case RISCVISD::VFMSUB_VL:
17328 return true;
17329 case ISD::SUB:
17330 case RISCVISD::SUB_VL:
17331 case RISCVISD::VWSUB_W_VL:
17332 case RISCVISD::VWSUBU_W_VL:
17333 case RISCVISD::FSUB_VL:
17334 case RISCVISD::VFWSUB_W_VL:
17335 case ISD::SHL:
17336 case RISCVISD::SHL_VL:
17337 return false;
17338 default:
17339 llvm_unreachable("Unexpected opcode");
17340 }
17341 }
17342
17343 /// Get a list of combine to try for folding extensions in \p Root.
17344 /// Note that each returned CombineToTry function doesn't actually modify
17345 /// anything. Instead they produce an optional CombineResult that if not None,
17346 /// need to be materialized for the combine to be applied.
17347 /// \see CombineResult::materialize.
17348 /// If the related CombineToTry function returns std::nullopt, that means the
17349 /// combine didn't match.
17350 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17351};
17352
17353/// Helper structure that holds all the necessary information to materialize a
17354/// combine that does some extension folding.
17355struct CombineResult {
17356 /// Opcode to be generated when materializing the combine.
17357 unsigned TargetOpcode;
17358 // No value means no extension is needed.
17359 std::optional<ExtKind> LHSExt;
17360 std::optional<ExtKind> RHSExt;
17361 /// Root of the combine.
17362 SDNode *Root;
17363 /// LHS of the TargetOpcode.
17364 NodeExtensionHelper LHS;
17365 /// RHS of the TargetOpcode.
17366 NodeExtensionHelper RHS;
17367
17368 CombineResult(unsigned TargetOpcode, SDNode *Root,
17369 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17370 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17371 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17372 LHS(LHS), RHS(RHS) {}
17373
17374 /// Return a value that uses TargetOpcode and that can be used to replace
17375 /// Root.
17376 /// The actual replacement is *not* done in that method.
17377 SDValue materialize(SelectionDAG &DAG,
17378 const RISCVSubtarget &Subtarget) const {
17379 SDValue Mask, VL, Passthru;
17380 std::tie(Mask, VL) =
17381 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17382 switch (Root->getOpcode()) {
17383 default:
17384 Passthru = Root->getOperand(2);
17385 break;
17386 case ISD::ADD:
17387 case ISD::SUB:
17388 case ISD::MUL:
17389 case ISD::OR:
17390 case ISD::SHL:
17391 Passthru = DAG.getUNDEF(Root->getValueType(0));
17392 break;
17393 }
17394 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17395 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17396 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17397 Passthru, Mask, VL);
17398 }
17399};
17400
17401/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17402/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17403/// are zext) and LHS and RHS can be folded into Root.
17404/// AllowExtMask define which form `ext` can take in this pattern.
17405///
17406/// \note If the pattern can match with both zext and sext, the returned
17407/// CombineResult will feature the zext result.
17408///
17409/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17410/// can be used to apply the pattern.
17411static std::optional<CombineResult>
17412canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17413 const NodeExtensionHelper &RHS,
17414 uint8_t AllowExtMask, SelectionDAG &DAG,
17415 const RISCVSubtarget &Subtarget) {
17416 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17417 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17418 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17419 /*RHSExt=*/{ExtKind::ZExt});
17420 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17421 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17422 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17423 /*RHSExt=*/{ExtKind::SExt});
17424 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17425 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17426 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17427 /*RHSExt=*/{ExtKind::FPExt});
17428 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17429 RHS.SupportsBF16Ext)
17430 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17431 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17432 /*RHSExt=*/{ExtKind::BF16Ext});
17433 return std::nullopt;
17434}
17435
17436/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17437/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17438/// are zext) and LHS and RHS can be folded into Root.
17439///
17440/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17441/// can be used to apply the pattern.
17442static std::optional<CombineResult>
17443canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17444 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17445 const RISCVSubtarget &Subtarget) {
17446 return canFoldToVWWithSameExtensionImpl(
17447 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17448 Subtarget);
17449}
17450
17451/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17452///
17453/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17454/// can be used to apply the pattern.
17455static std::optional<CombineResult>
17456canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17457 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17458 const RISCVSubtarget &Subtarget) {
17459 if (RHS.SupportsFPExt)
17460 return CombineResult(
17461 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17462 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17463
17464 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17465 // sext/zext?
17466 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17467 // purposes.
17468 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17469 return CombineResult(
17470 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17471 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17472 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17473 return CombineResult(
17474 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17475 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17476 return std::nullopt;
17477}
17478
17479/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17480///
17481/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17482/// can be used to apply the pattern.
17483static std::optional<CombineResult>
17484canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17485 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17486 const RISCVSubtarget &Subtarget) {
17487 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
17488 Subtarget);
17489}
17490
17491/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17492///
17493/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17494/// can be used to apply the pattern.
17495static std::optional<CombineResult>
17496canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17497 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17498 const RISCVSubtarget &Subtarget) {
17499 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17500 Subtarget);
17501}
17502
17503/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17504///
17505/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17506/// can be used to apply the pattern.
17507static std::optional<CombineResult>
17508canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17509 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17510 const RISCVSubtarget &Subtarget) {
17511 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
17512 Subtarget);
17513}
17514
17515/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17516///
17517/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17518/// can be used to apply the pattern.
17519static std::optional<CombineResult>
17520canFoldToVWWithBF16EXT(SDNode *Root, const NodeExtensionHelper &LHS,
17521 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17522 const RISCVSubtarget &Subtarget) {
17523 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17524 Subtarget);
17525}
17526
17527/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17528///
17529/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17530/// can be used to apply the pattern.
17531static std::optional<CombineResult>
17532canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17533 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17534 const RISCVSubtarget &Subtarget) {
17535
17536 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17537 return std::nullopt;
17538 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17539 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17540 /*RHSExt=*/{ExtKind::ZExt});
17541}
17542
17544NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17545 SmallVector<CombineToTry> Strategies;
17546 switch (Root->getOpcode()) {
17547 case ISD::ADD:
17548 case ISD::SUB:
17549 case ISD::OR:
17550 case RISCVISD::ADD_VL:
17551 case RISCVISD::SUB_VL:
17552 case RISCVISD::OR_VL:
17553 case RISCVISD::FADD_VL:
17554 case RISCVISD::FSUB_VL:
17555 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17556 Strategies.push_back(canFoldToVWWithSameExtension);
17557 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17558 Strategies.push_back(canFoldToVW_W);
17559 break;
17560 case RISCVISD::FMUL_VL:
17561 case RISCVISD::VFMADD_VL:
17562 case RISCVISD::VFMSUB_VL:
17563 case RISCVISD::VFNMADD_VL:
17564 case RISCVISD::VFNMSUB_VL:
17565 Strategies.push_back(canFoldToVWWithSameExtension);
17566 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17567 Strategies.push_back(canFoldToVWWithBF16EXT);
17568 break;
17569 case ISD::MUL:
17570 case RISCVISD::MUL_VL:
17571 // mul -> vwmul(u)
17572 Strategies.push_back(canFoldToVWWithSameExtension);
17573 // mul -> vwmulsu
17574 Strategies.push_back(canFoldToVW_SU);
17575 break;
17576 case ISD::SHL:
17577 case RISCVISD::SHL_VL:
17578 // shl -> vwsll
17579 Strategies.push_back(canFoldToVWWithZEXT);
17580 break;
17581 case RISCVISD::VWADD_W_VL:
17582 case RISCVISD::VWSUB_W_VL:
17583 // vwadd_w|vwsub_w -> vwadd|vwsub
17584 Strategies.push_back(canFoldToVWWithSEXT);
17585 break;
17586 case RISCVISD::VWADDU_W_VL:
17587 case RISCVISD::VWSUBU_W_VL:
17588 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17589 Strategies.push_back(canFoldToVWWithZEXT);
17590 break;
17591 case RISCVISD::VFWADD_W_VL:
17592 case RISCVISD::VFWSUB_W_VL:
17593 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17594 Strategies.push_back(canFoldToVWWithFPEXT);
17595 break;
17596 default:
17597 llvm_unreachable("Unexpected opcode");
17598 }
17599 return Strategies;
17600}
17601} // End anonymous namespace.
17602
17604 // TODO: Extend this to other binops using generic identity logic
17605 assert(N->getOpcode() == RISCVISD::ADD_VL);
17606 SDValue A = N->getOperand(0);
17607 SDValue B = N->getOperand(1);
17608 SDValue Passthru = N->getOperand(2);
17609 if (!Passthru.isUndef())
17610 // TODO:This could be a vmerge instead
17611 return SDValue();
17612 ;
17614 return A;
17615 // Peek through fixed to scalable
17616 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17617 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17618 return A;
17619 return SDValue();
17620}
17621
17622/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17623/// The supported combines are:
17624/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17625/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17626/// mul | mul_vl -> vwmul(u) | vwmul_su
17627/// shl | shl_vl -> vwsll
17628/// fadd_vl -> vfwadd | vfwadd_w
17629/// fsub_vl -> vfwsub | vfwsub_w
17630/// fmul_vl -> vfwmul
17631/// vwadd_w(u) -> vwadd(u)
17632/// vwsub_w(u) -> vwsub(u)
17633/// vfwadd_w -> vfwadd
17634/// vfwsub_w -> vfwsub
17637 const RISCVSubtarget &Subtarget) {
17638 SelectionDAG &DAG = DCI.DAG;
17639 if (DCI.isBeforeLegalize())
17640 return SDValue();
17641
17642 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17643 return SDValue();
17644
17645 SmallVector<SDNode *> Worklist;
17646 SmallPtrSet<SDNode *, 8> Inserted;
17647 Worklist.push_back(N);
17648 Inserted.insert(N);
17649 SmallVector<CombineResult> CombinesToApply;
17650
17651 while (!Worklist.empty()) {
17652 SDNode *Root = Worklist.pop_back_val();
17653
17654 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17655 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17656 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17657 &Inserted](const NodeExtensionHelper &Op) {
17658 if (Op.needToPromoteOtherUsers()) {
17659 for (SDUse &Use : Op.OrigOperand->uses()) {
17660 SDNode *TheUser = Use.getUser();
17661 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17662 return false;
17663 // We only support the first 2 operands of FMA.
17664 if (Use.getOperandNo() >= 2)
17665 return false;
17666 if (Inserted.insert(TheUser).second)
17667 Worklist.push_back(TheUser);
17668 }
17669 }
17670 return true;
17671 };
17672
17673 // Control the compile time by limiting the number of node we look at in
17674 // total.
17675 if (Inserted.size() > ExtensionMaxWebSize)
17676 return SDValue();
17677
17679 NodeExtensionHelper::getSupportedFoldings(Root);
17680
17681 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17682 bool Matched = false;
17683 for (int Attempt = 0;
17684 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17685 ++Attempt) {
17686
17687 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17688 FoldingStrategies) {
17689 std::optional<CombineResult> Res =
17690 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17691 if (Res) {
17692 Matched = true;
17693 CombinesToApply.push_back(*Res);
17694 // All the inputs that are extended need to be folded, otherwise
17695 // we would be leaving the old input (since it is may still be used),
17696 // and the new one.
17697 if (Res->LHSExt.has_value())
17698 if (!AppendUsersIfNeeded(LHS))
17699 return SDValue();
17700 if (Res->RHSExt.has_value())
17701 if (!AppendUsersIfNeeded(RHS))
17702 return SDValue();
17703 break;
17704 }
17705 }
17706 std::swap(LHS, RHS);
17707 }
17708 // Right now we do an all or nothing approach.
17709 if (!Matched)
17710 return SDValue();
17711 }
17712 // Store the value for the replacement of the input node separately.
17713 SDValue InputRootReplacement;
17714 // We do the RAUW after we materialize all the combines, because some replaced
17715 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17716 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17717 // yet-to-be-visited CombinesToApply roots.
17719 ValuesToReplace.reserve(CombinesToApply.size());
17720 for (CombineResult Res : CombinesToApply) {
17721 SDValue NewValue = Res.materialize(DAG, Subtarget);
17722 if (!InputRootReplacement) {
17723 assert(Res.Root == N &&
17724 "First element is expected to be the current node");
17725 InputRootReplacement = NewValue;
17726 } else {
17727 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17728 }
17729 }
17730 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17731 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17732 DCI.AddToWorklist(OldNewValues.second.getNode());
17733 }
17734 return InputRootReplacement;
17735}
17736
17737// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17738// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17739// y will be the Passthru and cond will be the Mask.
17741 unsigned Opc = N->getOpcode();
17742 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17743 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17744
17745 SDValue Y = N->getOperand(0);
17746 SDValue MergeOp = N->getOperand(1);
17747 unsigned MergeOpc = MergeOp.getOpcode();
17748
17749 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17750 return SDValue();
17751
17752 SDValue X = MergeOp->getOperand(1);
17753
17754 if (!MergeOp.hasOneUse())
17755 return SDValue();
17756
17757 // Passthru should be undef
17758 SDValue Passthru = N->getOperand(2);
17759 if (!Passthru.isUndef())
17760 return SDValue();
17761
17762 // Mask should be all ones
17763 SDValue Mask = N->getOperand(3);
17764 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17765 return SDValue();
17766
17767 // False value of MergeOp should be all zeros
17768 SDValue Z = MergeOp->getOperand(2);
17769
17770 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17771 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17772 Z = Z.getOperand(1);
17773
17774 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17775 return SDValue();
17776
17777 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17778 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17779 N->getFlags());
17780}
17781
17784 const RISCVSubtarget &Subtarget) {
17785 [[maybe_unused]] unsigned Opc = N->getOpcode();
17786 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17787 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17788
17789 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17790 return V;
17791
17792 return combineVWADDSUBWSelect(N, DCI.DAG);
17793}
17794
17795// Helper function for performMemPairCombine.
17796// Try to combine the memory loads/stores LSNode1 and LSNode2
17797// into a single memory pair operation.
17799 LSBaseSDNode *LSNode2, SDValue BasePtr,
17800 uint64_t Imm) {
17802 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17803
17804 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17805 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17806 return SDValue();
17807
17809 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17810
17811 // The new operation has twice the width.
17812 MVT XLenVT = Subtarget.getXLenVT();
17813 EVT MemVT = LSNode1->getMemoryVT();
17814 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17815 MachineMemOperand *MMO = LSNode1->getMemOperand();
17817 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17818
17819 if (LSNode1->getOpcode() == ISD::LOAD) {
17820 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17821 unsigned Opcode;
17822 if (MemVT == MVT::i32)
17823 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17824 else
17825 Opcode = RISCVISD::TH_LDD;
17826
17827 SDValue Res = DAG.getMemIntrinsicNode(
17828 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17829 {LSNode1->getChain(), BasePtr,
17830 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17831 NewMemVT, NewMMO);
17832
17833 SDValue Node1 =
17834 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17835 SDValue Node2 =
17836 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17837
17838 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17839 return Node1;
17840 } else {
17841 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17842
17843 SDValue Res = DAG.getMemIntrinsicNode(
17844 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17845 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17846 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17847 NewMemVT, NewMMO);
17848
17849 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17850 return Res;
17851 }
17852}
17853
17854// Try to combine two adjacent loads/stores to a single pair instruction from
17855// the XTHeadMemPair vendor extension.
17858 SelectionDAG &DAG = DCI.DAG;
17860 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17861
17862 // Target does not support load/store pair.
17863 if (!Subtarget.hasVendorXTHeadMemPair())
17864 return SDValue();
17865
17866 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17867 EVT MemVT = LSNode1->getMemoryVT();
17868 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17869
17870 // No volatile, indexed or atomic loads/stores.
17871 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17872 return SDValue();
17873
17874 // Function to get a base + constant representation from a memory value.
17875 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17876 if (Ptr->getOpcode() == ISD::ADD)
17877 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17878 return {Ptr->getOperand(0), C1->getZExtValue()};
17879 return {Ptr, 0};
17880 };
17881
17882 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17883
17884 SDValue Chain = N->getOperand(0);
17885 for (SDUse &Use : Chain->uses()) {
17886 if (Use.getUser() != N && Use.getResNo() == 0 &&
17887 Use.getUser()->getOpcode() == N->getOpcode()) {
17888 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
17889
17890 // No volatile, indexed or atomic loads/stores.
17891 if (!LSNode2->isSimple() || LSNode2->isIndexed())
17892 continue;
17893
17894 // Check if LSNode1 and LSNode2 have the same type and extension.
17895 if (LSNode1->getOpcode() == ISD::LOAD)
17896 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
17897 cast<LoadSDNode>(LSNode1)->getExtensionType())
17898 continue;
17899
17900 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17901 continue;
17902
17903 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
17904
17905 // Check if the base pointer is the same for both instruction.
17906 if (Base1 != Base2)
17907 continue;
17908
17909 // Check if the offsets match the XTHeadMemPair encoding constraints.
17910 bool Valid = false;
17911 if (MemVT == MVT::i32) {
17912 // Check for adjacent i32 values and a 2-bit index.
17913 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
17914 Valid = true;
17915 } else if (MemVT == MVT::i64) {
17916 // Check for adjacent i64 values and a 2-bit index.
17917 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
17918 Valid = true;
17919 }
17920
17921 if (!Valid)
17922 continue;
17923
17924 // Try to combine.
17925 if (SDValue Res =
17926 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
17927 return Res;
17928 }
17929 }
17930
17931 return SDValue();
17932}
17933
17934// Fold
17935// (fp_to_int (froundeven X)) -> fcvt X, rne
17936// (fp_to_int (ftrunc X)) -> fcvt X, rtz
17937// (fp_to_int (ffloor X)) -> fcvt X, rdn
17938// (fp_to_int (fceil X)) -> fcvt X, rup
17939// (fp_to_int (fround X)) -> fcvt X, rmm
17940// (fp_to_int (frint X)) -> fcvt X
17943 const RISCVSubtarget &Subtarget) {
17944 SelectionDAG &DAG = DCI.DAG;
17945 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17946 MVT XLenVT = Subtarget.getXLenVT();
17947
17948 SDValue Src = N->getOperand(0);
17949
17950 // Don't do this for strict-fp Src.
17951 if (Src->isStrictFPOpcode())
17952 return SDValue();
17953
17954 // Ensure the FP type is legal.
17955 if (!TLI.isTypeLegal(Src.getValueType()))
17956 return SDValue();
17957
17958 // Don't do this for f16 with Zfhmin and not Zfh.
17959 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
17960 return SDValue();
17961
17962 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
17963 // If the result is invalid, we didn't find a foldable instruction.
17964 if (FRM == RISCVFPRndMode::Invalid)
17965 return SDValue();
17966
17967 SDLoc DL(N);
17968 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
17969 EVT VT = N->getValueType(0);
17970
17971 if (VT.isVector() && TLI.isTypeLegal(VT)) {
17972 MVT SrcVT = Src.getSimpleValueType();
17973 MVT SrcContainerVT = SrcVT;
17974 MVT ContainerVT = VT.getSimpleVT();
17975 SDValue XVal = Src.getOperand(0);
17976
17977 // For widening and narrowing conversions we just combine it into a
17978 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
17979 // end up getting lowered to their appropriate pseudo instructions based on
17980 // their operand types
17981 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
17982 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
17983 return SDValue();
17984
17985 // Make fixed-length vectors scalable first
17986 if (SrcVT.isFixedLengthVector()) {
17987 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
17988 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
17989 ContainerVT =
17990 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
17991 }
17992
17993 auto [Mask, VL] =
17994 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
17995
17996 SDValue FpToInt;
17997 if (FRM == RISCVFPRndMode::RTZ) {
17998 // Use the dedicated trunc static rounding mode if we're truncating so we
17999 // don't need to generate calls to fsrmi/fsrm
18000 unsigned Opc =
18001 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18002 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18003 } else {
18004 unsigned Opc =
18005 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18006 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18007 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18008 }
18009
18010 // If converted from fixed-length to scalable, convert back
18011 if (VT.isFixedLengthVector())
18012 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18013
18014 return FpToInt;
18015 }
18016
18017 // Only handle XLen or i32 types. Other types narrower than XLen will
18018 // eventually be legalized to XLenVT.
18019 if (VT != MVT::i32 && VT != XLenVT)
18020 return SDValue();
18021
18022 unsigned Opc;
18023 if (VT == XLenVT)
18024 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18025 else
18026 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18027
18028 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18029 DAG.getTargetConstant(FRM, DL, XLenVT));
18030 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18031}
18032
18033// Fold
18034// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18035// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18036// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18037// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18038// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18039// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18042 const RISCVSubtarget &Subtarget) {
18043 SelectionDAG &DAG = DCI.DAG;
18044 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18045 MVT XLenVT = Subtarget.getXLenVT();
18046
18047 // Only handle XLen types. Other types narrower than XLen will eventually be
18048 // legalized to XLenVT.
18049 EVT DstVT = N->getValueType(0);
18050 if (DstVT != XLenVT)
18051 return SDValue();
18052
18053 SDValue Src = N->getOperand(0);
18054
18055 // Don't do this for strict-fp Src.
18056 if (Src->isStrictFPOpcode())
18057 return SDValue();
18058
18059 // Ensure the FP type is also legal.
18060 if (!TLI.isTypeLegal(Src.getValueType()))
18061 return SDValue();
18062
18063 // Don't do this for f16 with Zfhmin and not Zfh.
18064 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18065 return SDValue();
18066
18067 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18068
18069 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18070 if (FRM == RISCVFPRndMode::Invalid)
18071 return SDValue();
18072
18073 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18074
18075 unsigned Opc;
18076 if (SatVT == DstVT)
18077 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18078 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18079 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18080 else
18081 return SDValue();
18082 // FIXME: Support other SatVTs by clamping before or after the conversion.
18083
18084 Src = Src.getOperand(0);
18085
18086 SDLoc DL(N);
18087 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18088 DAG.getTargetConstant(FRM, DL, XLenVT));
18089
18090 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18091 // extend.
18092 if (Opc == RISCVISD::FCVT_WU_RV64)
18093 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18094
18095 // RISC-V FP-to-int conversions saturate to the destination register size, but
18096 // don't produce 0 for nan.
18097 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18098 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18099}
18100
18101// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18102// smaller than XLenVT.
18104 const RISCVSubtarget &Subtarget) {
18105 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18106
18107 SDValue Src = N->getOperand(0);
18108 if (Src.getOpcode() != ISD::BSWAP)
18109 return SDValue();
18110
18111 EVT VT = N->getValueType(0);
18112 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18113 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
18114 return SDValue();
18115
18116 SDLoc DL(N);
18117 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18118}
18119
18121 const RISCVSubtarget &Subtarget) {
18122 // Fold:
18123 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18124
18125 // Check if its first operand is a vp.load.
18126 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18127 if (!VPLoad)
18128 return SDValue();
18129
18130 EVT LoadVT = VPLoad->getValueType(0);
18131 // We do not have a strided_load version for masks, and the evl of vp.reverse
18132 // and vp.load should always be the same.
18133 if (!LoadVT.getVectorElementType().isByteSized() ||
18134 N->getOperand(2) != VPLoad->getVectorLength() ||
18135 !N->getOperand(0).hasOneUse())
18136 return SDValue();
18137
18138 // Check if the mask of outer vp.reverse are all 1's.
18139 if (!isOneOrOneSplat(N->getOperand(1)))
18140 return SDValue();
18141
18142 SDValue LoadMask = VPLoad->getMask();
18143 // If Mask is all ones, then load is unmasked and can be reversed.
18144 if (!isOneOrOneSplat(LoadMask)) {
18145 // If the mask is not all ones, we can reverse the load if the mask was also
18146 // reversed by an unmasked vp.reverse with the same EVL.
18147 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18148 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18149 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18150 return SDValue();
18151 LoadMask = LoadMask.getOperand(0);
18152 }
18153
18154 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18155 SDLoc DL(N);
18156 MVT XLenVT = Subtarget.getXLenVT();
18157 SDValue NumElem = VPLoad->getVectorLength();
18158 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18159
18160 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18161 DAG.getConstant(1, DL, XLenVT));
18162 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18163 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18164 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18165 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18166
18168 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18170 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18171 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18172
18173 SDValue Ret = DAG.getStridedLoadVP(
18174 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18175 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18176
18177 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18178
18179 return Ret;
18180}
18181
18183 const RISCVSubtarget &Subtarget) {
18184 // Fold:
18185 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18186 // -1, MASK)
18187 auto *VPStore = cast<VPStoreSDNode>(N);
18188
18189 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18190 return SDValue();
18191
18192 SDValue VPReverse = VPStore->getValue();
18193 EVT ReverseVT = VPReverse->getValueType(0);
18194
18195 // We do not have a strided_store version for masks, and the evl of vp.reverse
18196 // and vp.store should always be the same.
18197 if (!ReverseVT.getVectorElementType().isByteSized() ||
18198 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18199 !VPReverse.hasOneUse())
18200 return SDValue();
18201
18202 SDValue StoreMask = VPStore->getMask();
18203 // If Mask is all ones, then load is unmasked and can be reversed.
18204 if (!isOneOrOneSplat(StoreMask)) {
18205 // If the mask is not all ones, we can reverse the store if the mask was
18206 // also reversed by an unmasked vp.reverse with the same EVL.
18207 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18208 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18209 StoreMask.getOperand(2) != VPStore->getVectorLength())
18210 return SDValue();
18211 StoreMask = StoreMask.getOperand(0);
18212 }
18213
18214 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18215 SDLoc DL(N);
18216 MVT XLenVT = Subtarget.getXLenVT();
18217 SDValue NumElem = VPStore->getVectorLength();
18218 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18219
18220 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18221 DAG.getConstant(1, DL, XLenVT));
18222 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18223 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18224 SDValue Base =
18225 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18226 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18227
18229 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18231 PtrInfo, VPStore->getMemOperand()->getFlags(),
18232 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18233
18234 return DAG.getStridedStoreVP(
18235 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18236 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18237 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18238 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18239}
18240
18241// Peephole avgceil pattern.
18242// %1 = zext <N x i8> %a to <N x i32>
18243// %2 = zext <N x i8> %b to <N x i32>
18244// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18245// %4 = add nuw nsw <N x i32> %3, %2
18246// %5 = lshr <N x i32> %4, splat (i32 1)
18247// %6 = trunc <N x i32> %5 to <N x i8>
18249 const RISCVSubtarget &Subtarget) {
18250 EVT VT = N->getValueType(0);
18251
18252 // Ignore fixed vectors.
18253 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18254 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18255 return SDValue();
18256
18257 SDValue In = N->getOperand(0);
18258 SDValue Mask = N->getOperand(1);
18259 SDValue VL = N->getOperand(2);
18260
18261 // Input should be a vp_srl with same mask and VL.
18262 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18263 In.getOperand(3) != VL)
18264 return SDValue();
18265
18266 // Shift amount should be 1.
18267 if (!isOneOrOneSplat(In.getOperand(1)))
18268 return SDValue();
18269
18270 // Shifted value should be a vp_add with same mask and VL.
18271 SDValue LHS = In.getOperand(0);
18272 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18273 LHS.getOperand(3) != VL)
18274 return SDValue();
18275
18276 SDValue Operands[3];
18277
18278 // Matches another VP_ADD with same VL and Mask.
18279 auto FindAdd = [&](SDValue V, SDValue Other) {
18280 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18281 V.getOperand(3) != VL)
18282 return false;
18283
18284 Operands[0] = Other;
18285 Operands[1] = V.getOperand(1);
18286 Operands[2] = V.getOperand(0);
18287 return true;
18288 };
18289
18290 // We need to find another VP_ADD in one of the operands.
18291 SDValue LHS0 = LHS.getOperand(0);
18292 SDValue LHS1 = LHS.getOperand(1);
18293 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18294 return SDValue();
18295
18296 // Now we have three operands of two additions. Check that one of them is a
18297 // constant vector with ones.
18298 auto I = llvm::find_if(Operands,
18299 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18300 if (I == std::end(Operands))
18301 return SDValue();
18302 // We found a vector with ones, move if it to the end of the Operands array.
18303 std::swap(*I, Operands[2]);
18304
18305 // Make sure the other 2 operands can be promoted from the result type.
18306 for (SDValue Op : drop_end(Operands)) {
18307 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18308 Op.getOperand(2) != VL)
18309 return SDValue();
18310 // Input must be the same size or smaller than our result.
18311 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18312 return SDValue();
18313 }
18314
18315 // Pattern is detected.
18316 // Rebuild the zero extends in case the inputs are smaller than our result.
18317 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18318 Operands[0].getOperand(0), Mask, VL);
18319 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18320 Operands[1].getOperand(0), Mask, VL);
18321 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18322 // mode.
18323 SDLoc DL(N);
18324 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18325 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18326}
18327
18328// Convert from one FMA opcode to another based on whether we are negating the
18329// multiply result and/or the accumulator.
18330// NOTE: Only supports RVV operations with VL.
18331static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18332 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18333 if (NegMul) {
18334 // clang-format off
18335 switch (Opcode) {
18336 default: llvm_unreachable("Unexpected opcode");
18337 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18338 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18339 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18340 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18341 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18342 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18343 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18344 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18345 }
18346 // clang-format on
18347 }
18348
18349 // Negating the accumulator changes ADD<->SUB.
18350 if (NegAcc) {
18351 // clang-format off
18352 switch (Opcode) {
18353 default: llvm_unreachable("Unexpected opcode");
18354 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18355 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18356 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18357 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18358 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18359 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18360 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18361 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18362 }
18363 // clang-format on
18364 }
18365
18366 return Opcode;
18367}
18368
18370 // Fold FNEG_VL into FMA opcodes.
18371 // The first operand of strict-fp is chain.
18372 bool IsStrict =
18373 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18374 unsigned Offset = IsStrict ? 1 : 0;
18375 SDValue A = N->getOperand(0 + Offset);
18376 SDValue B = N->getOperand(1 + Offset);
18377 SDValue C = N->getOperand(2 + Offset);
18378 SDValue Mask = N->getOperand(3 + Offset);
18379 SDValue VL = N->getOperand(4 + Offset);
18380
18381 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18382 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18383 V.getOperand(2) == VL) {
18384 // Return the negated input.
18385 V = V.getOperand(0);
18386 return true;
18387 }
18388
18389 return false;
18390 };
18391
18392 bool NegA = invertIfNegative(A);
18393 bool NegB = invertIfNegative(B);
18394 bool NegC = invertIfNegative(C);
18395
18396 // If no operands are negated, we're done.
18397 if (!NegA && !NegB && !NegC)
18398 return SDValue();
18399
18400 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18401 if (IsStrict)
18402 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18403 {N->getOperand(0), A, B, C, Mask, VL});
18404 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18405 VL);
18406}
18407
18410 const RISCVSubtarget &Subtarget) {
18411 SelectionDAG &DAG = DCI.DAG;
18412
18414 return V;
18415
18416 // FIXME: Ignore strict opcodes for now.
18417 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18418 return SDValue();
18419
18420 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18421}
18422
18424 const RISCVSubtarget &Subtarget) {
18425 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18426
18427 EVT VT = N->getValueType(0);
18428
18429 if (VT != Subtarget.getXLenVT())
18430 return SDValue();
18431
18432 if (!isa<ConstantSDNode>(N->getOperand(1)))
18433 return SDValue();
18434 uint64_t ShAmt = N->getConstantOperandVal(1);
18435
18436 SDValue N0 = N->getOperand(0);
18437
18438 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18439 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18440 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18441 unsigned ExtSize =
18442 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18443 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18444 N0.getOperand(0).hasOneUse() &&
18445 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
18446 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18447 if (LShAmt < ExtSize) {
18448 unsigned Size = VT.getSizeInBits();
18449 SDLoc ShlDL(N0.getOperand(0));
18450 SDValue Shl =
18451 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18452 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18453 SDLoc DL(N);
18454 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18455 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18456 }
18457 }
18458 }
18459
18460 if (ShAmt > 32 || VT != MVT::i64)
18461 return SDValue();
18462
18463 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18464 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18465 //
18466 // Also try these folds where an add or sub is in the middle.
18467 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18468 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18469 SDValue Shl;
18470 ConstantSDNode *AddC = nullptr;
18471
18472 // We might have an ADD or SUB between the SRA and SHL.
18473 bool IsAdd = N0.getOpcode() == ISD::ADD;
18474 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18475 // Other operand needs to be a constant we can modify.
18476 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18477 if (!AddC)
18478 return SDValue();
18479
18480 // AddC needs to have at least 32 trailing zeros.
18481 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18482 return SDValue();
18483
18484 // All users should be a shift by constant less than or equal to 32. This
18485 // ensures we'll do this optimization for each of them to produce an
18486 // add/sub+sext_inreg they can all share.
18487 for (SDNode *U : N0->users()) {
18488 if (U->getOpcode() != ISD::SRA ||
18489 !isa<ConstantSDNode>(U->getOperand(1)) ||
18490 U->getConstantOperandVal(1) > 32)
18491 return SDValue();
18492 }
18493
18494 Shl = N0.getOperand(IsAdd ? 0 : 1);
18495 } else {
18496 // Not an ADD or SUB.
18497 Shl = N0;
18498 }
18499
18500 // Look for a shift left by 32.
18501 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18502 Shl.getConstantOperandVal(1) != 32)
18503 return SDValue();
18504
18505 // We if we didn't look through an add/sub, then the shl should have one use.
18506 // If we did look through an add/sub, the sext_inreg we create is free so
18507 // we're only creating 2 new instructions. It's enough to only remove the
18508 // original sra+add/sub.
18509 if (!AddC && !Shl.hasOneUse())
18510 return SDValue();
18511
18512 SDLoc DL(N);
18513 SDValue In = Shl.getOperand(0);
18514
18515 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18516 // constant.
18517 if (AddC) {
18518 SDValue ShiftedAddC =
18519 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18520 if (IsAdd)
18521 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18522 else
18523 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18524 }
18525
18526 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18527 DAG.getValueType(MVT::i32));
18528 if (ShAmt == 32)
18529 return SExt;
18530
18531 return DAG.getNode(
18532 ISD::SHL, DL, MVT::i64, SExt,
18533 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18534}
18535
18536// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18537// the result is used as the condition of a br_cc or select_cc we can invert,
18538// inverting the setcc is free, and Z is 0/1. Caller will invert the
18539// br_cc/select_cc.
18541 bool IsAnd = Cond.getOpcode() == ISD::AND;
18542 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18543 return SDValue();
18544
18545 if (!Cond.hasOneUse())
18546 return SDValue();
18547
18548 SDValue Setcc = Cond.getOperand(0);
18549 SDValue Xor = Cond.getOperand(1);
18550 // Canonicalize setcc to LHS.
18551 if (Setcc.getOpcode() != ISD::SETCC)
18552 std::swap(Setcc, Xor);
18553 // LHS should be a setcc and RHS should be an xor.
18554 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18555 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18556 return SDValue();
18557
18558 // If the condition is an And, SimplifyDemandedBits may have changed
18559 // (xor Z, 1) to (not Z).
18560 SDValue Xor1 = Xor.getOperand(1);
18561 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18562 return SDValue();
18563
18564 EVT VT = Cond.getValueType();
18565 SDValue Xor0 = Xor.getOperand(0);
18566
18567 // The LHS of the xor needs to be 0/1.
18569 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18570 return SDValue();
18571
18572 // We can only invert integer setccs.
18573 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18574 if (!SetCCOpVT.isScalarInteger())
18575 return SDValue();
18576
18577 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18578 if (ISD::isIntEqualitySetCC(CCVal)) {
18579 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18580 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18581 Setcc.getOperand(1), CCVal);
18582 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18583 // Invert (setlt 0, X) by converting to (setlt X, 1).
18584 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18585 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18586 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18587 // (setlt X, 1) by converting to (setlt 0, X).
18588 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18589 DAG.getConstant(0, SDLoc(Setcc), VT),
18590 Setcc.getOperand(0), CCVal);
18591 } else
18592 return SDValue();
18593
18594 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18595 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18596}
18597
18598// Perform common combines for BR_CC and SELECT_CC conditions.
18599static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18600 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18601 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18602
18603 // As far as arithmetic right shift always saves the sign,
18604 // shift can be omitted.
18605 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18606 // setge (sra X, N), 0 -> setge X, 0
18607 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18608 LHS.getOpcode() == ISD::SRA) {
18609 LHS = LHS.getOperand(0);
18610 return true;
18611 }
18612
18613 if (!ISD::isIntEqualitySetCC(CCVal))
18614 return false;
18615
18616 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18617 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18618 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18619 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18620 // If we're looking for eq 0 instead of ne 0, we need to invert the
18621 // condition.
18622 bool Invert = CCVal == ISD::SETEQ;
18623 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18624 if (Invert)
18625 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18626
18627 RHS = LHS.getOperand(1);
18628 LHS = LHS.getOperand(0);
18629 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18630
18631 CC = DAG.getCondCode(CCVal);
18632 return true;
18633 }
18634
18635 // If XOR is reused and has an immediate that will fit in XORI,
18636 // do not fold.
18637 auto isXorImmediate = [](const SDValue &Op) -> bool {
18638 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18639 return isInt<12>(XorCnst->getSExtValue());
18640 return false;
18641 };
18642 // Fold (X(i1) ^ 1) == 0 -> X != 0
18643 auto singleBitOp = [&DAG](const SDValue &VarOp,
18644 const SDValue &ConstOp) -> bool {
18645 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18646 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18647 return (XorCnst->getSExtValue() == 1) &&
18648 DAG.MaskedValueIsZero(VarOp, Mask);
18649 }
18650 return false;
18651 };
18652 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18653 for (const SDNode *UserNode : Op->users()) {
18654 const unsigned Opcode = UserNode->getOpcode();
18655 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18656 return false;
18657 }
18658 return true;
18659 };
18660 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18661 const SDValue &LHS, const SDValue &RHS) -> bool {
18662 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18663 (!isXorImmediate(LHS.getOperand(1)) ||
18664 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18665 onlyUsedBySelectOrBR(LHS));
18666 };
18667 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18668 if (isFoldableXorEq(LHS, RHS)) {
18669 RHS = LHS.getOperand(1);
18670 LHS = LHS.getOperand(0);
18671 return true;
18672 }
18673 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18674 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18675 const SDValue LHS0 = LHS.getOperand(0);
18676 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18677 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18678 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18679 LHS0.getOperand(1), LHS.getOperand(1));
18680 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18681 LHS0.getOperand(0), LHS.getOperand(1));
18682 return true;
18683 }
18684 }
18685
18686 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18687 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18688 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18689 SDValue LHS0 = LHS.getOperand(0);
18690 if (LHS0.getOpcode() == ISD::AND &&
18691 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18692 uint64_t Mask = LHS0.getConstantOperandVal(1);
18693 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18694 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18695 // XAndesPerf supports branch on test bit.
18696 if (Subtarget.hasVendorXAndesPerf()) {
18697 LHS =
18698 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18699 DAG.getConstant(Mask, DL, LHS.getValueType()));
18700 return true;
18701 }
18702
18703 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18704 CC = DAG.getCondCode(CCVal);
18705
18706 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18707 LHS = LHS0.getOperand(0);
18708 if (ShAmt != 0)
18709 LHS =
18710 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18711 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18712 return true;
18713 }
18714 }
18715 }
18716
18717 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18718 // This can occur when legalizing some floating point comparisons.
18719 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18720 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18721 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18722 CC = DAG.getCondCode(CCVal);
18723 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18724 return true;
18725 }
18726
18727 if (isNullConstant(RHS)) {
18728 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18729 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18730 CC = DAG.getCondCode(CCVal);
18731 LHS = NewCond;
18732 return true;
18733 }
18734 }
18735
18736 return false;
18737}
18738
18739// Fold
18740// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18741// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18742// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18743// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18744// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18745// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18747 SDValue TrueVal, SDValue FalseVal,
18748 bool Swapped) {
18749 bool Commutative = true;
18750 unsigned Opc = TrueVal.getOpcode();
18751 switch (Opc) {
18752 default:
18753 return SDValue();
18754 case ISD::SHL:
18755 case ISD::SRA:
18756 case ISD::SRL:
18757 case ISD::SUB:
18758 case ISD::ROTL:
18759 case ISD::ROTR:
18760 Commutative = false;
18761 break;
18762 case ISD::ADD:
18763 case ISD::OR:
18764 case ISD::XOR:
18765 break;
18766 }
18767
18768 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
18769 return SDValue();
18770
18771 unsigned OpToFold;
18772 if (FalseVal == TrueVal.getOperand(0))
18773 OpToFold = 0;
18774 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18775 OpToFold = 1;
18776 else
18777 return SDValue();
18778
18779 EVT VT = N->getValueType(0);
18780 SDLoc DL(N);
18781 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18782 EVT OtherOpVT = OtherOp.getValueType();
18783 SDValue IdentityOperand =
18784 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18785 if (!Commutative)
18786 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18787 assert(IdentityOperand && "No identity operand!");
18788
18789 if (Swapped)
18790 std::swap(OtherOp, IdentityOperand);
18791 SDValue NewSel =
18792 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18793 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18794}
18795
18796// This tries to get rid of `select` and `icmp` that are being used to handle
18797// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18799 SDValue Cond = N->getOperand(0);
18800
18801 // This represents either CTTZ or CTLZ instruction.
18802 SDValue CountZeroes;
18803
18804 SDValue ValOnZero;
18805
18806 if (Cond.getOpcode() != ISD::SETCC)
18807 return SDValue();
18808
18809 if (!isNullConstant(Cond->getOperand(1)))
18810 return SDValue();
18811
18812 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18813 if (CCVal == ISD::CondCode::SETEQ) {
18814 CountZeroes = N->getOperand(2);
18815 ValOnZero = N->getOperand(1);
18816 } else if (CCVal == ISD::CondCode::SETNE) {
18817 CountZeroes = N->getOperand(1);
18818 ValOnZero = N->getOperand(2);
18819 } else {
18820 return SDValue();
18821 }
18822
18823 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18824 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18825 CountZeroes = CountZeroes.getOperand(0);
18826
18827 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18828 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18829 CountZeroes.getOpcode() != ISD::CTLZ &&
18830 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18831 return SDValue();
18832
18833 if (!isNullConstant(ValOnZero))
18834 return SDValue();
18835
18836 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18837 if (Cond->getOperand(0) != CountZeroesArgument)
18838 return SDValue();
18839
18840 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18841 if (!isPowerOf2_32(BitWidth))
18842 return SDValue();
18843
18844 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18845 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18846 CountZeroes.getValueType(), CountZeroesArgument);
18847 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18848 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18849 CountZeroes.getValueType(), CountZeroesArgument);
18850 }
18851
18852 SDValue BitWidthMinusOne =
18853 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18854
18855 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18856 CountZeroes, BitWidthMinusOne);
18857 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18858}
18859
18861 const RISCVSubtarget &Subtarget) {
18862 SDValue Cond = N->getOperand(0);
18863 SDValue True = N->getOperand(1);
18864 SDValue False = N->getOperand(2);
18865 SDLoc DL(N);
18866 EVT VT = N->getValueType(0);
18867 EVT CondVT = Cond.getValueType();
18868
18869 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18870 return SDValue();
18871
18872 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18873 // BEXTI, where C is power of 2.
18874 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
18875 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
18876 SDValue LHS = Cond.getOperand(0);
18877 SDValue RHS = Cond.getOperand(1);
18878 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18879 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18880 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18881 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18882 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18883 return DAG.getSelect(DL, VT,
18884 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18885 False, True);
18886 }
18887 }
18888 return SDValue();
18889}
18890
18891static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18892 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
18893 return false;
18894
18895 SwapCC = false;
18896 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18897 std::swap(TrueVal, FalseVal);
18898 SwapCC = true;
18899 }
18900
18901 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
18902 return false;
18903
18904 SDValue A = FalseVal.getOperand(0);
18905 SDValue B = FalseVal.getOperand(1);
18906 // Add is commutative, so check both orders
18907 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
18908 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
18909}
18910
18911/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18912/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18913/// register pressure over the add followed by masked vsub sequence.
18915 SDLoc DL(N);
18916 EVT VT = N->getValueType(0);
18917 SDValue CC = N->getOperand(0);
18918 SDValue TrueVal = N->getOperand(1);
18919 SDValue FalseVal = N->getOperand(2);
18920
18921 bool SwapCC;
18922 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
18923 return SDValue();
18924
18925 SDValue Sub = SwapCC ? TrueVal : FalseVal;
18926 SDValue A = Sub.getOperand(0);
18927 SDValue B = Sub.getOperand(1);
18928
18929 // Arrange the select such that we can match a masked
18930 // vrsub.vi to perform the conditional negate
18931 SDValue NegB = DAG.getNegative(B, DL, VT);
18932 if (!SwapCC)
18933 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
18934 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
18935 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
18936}
18937
18939 const RISCVSubtarget &Subtarget) {
18940 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
18941 return Folded;
18942
18943 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
18944 return V;
18945
18946 if (Subtarget.hasConditionalMoveFusion())
18947 return SDValue();
18948
18949 SDValue TrueVal = N->getOperand(1);
18950 SDValue FalseVal = N->getOperand(2);
18951 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
18952 return V;
18953 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
18954}
18955
18956/// If we have a build_vector where each lane is binop X, C, where C
18957/// is a constant (but not necessarily the same constant on all lanes),
18958/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
18959/// We assume that materializing a constant build vector will be no more
18960/// expensive that performing O(n) binops.
18962 const RISCVSubtarget &Subtarget,
18963 const RISCVTargetLowering &TLI) {
18964 SDLoc DL(N);
18965 EVT VT = N->getValueType(0);
18966
18967 assert(!VT.isScalableVector() && "unexpected build vector");
18968
18969 if (VT.getVectorNumElements() == 1)
18970 return SDValue();
18971
18972 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
18973 if (!TLI.isBinOp(Opcode))
18974 return SDValue();
18975
18976 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
18977 return SDValue();
18978
18979 // This BUILD_VECTOR involves an implicit truncation, and sinking
18980 // truncates through binops is non-trivial.
18981 if (N->op_begin()->getValueType() != VT.getVectorElementType())
18982 return SDValue();
18983
18984 SmallVector<SDValue> LHSOps;
18985 SmallVector<SDValue> RHSOps;
18986 for (SDValue Op : N->ops()) {
18987 if (Op.isUndef()) {
18988 // We can't form a divide or remainder from undef.
18989 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
18990 return SDValue();
18991
18992 LHSOps.push_back(Op);
18993 RHSOps.push_back(Op);
18994 continue;
18995 }
18996
18997 // TODO: We can handle operations which have an neutral rhs value
18998 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
18999 // of profit in a more explicit manner.
19000 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19001 return SDValue();
19002
19003 LHSOps.push_back(Op.getOperand(0));
19004 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19005 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19006 return SDValue();
19007 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19008 // have different LHS and RHS types.
19009 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19010 return SDValue();
19011
19012 RHSOps.push_back(Op.getOperand(1));
19013 }
19014
19015 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19016 DAG.getBuildVector(VT, DL, RHSOps));
19017}
19018
19020 ElementCount OpEC = OpVT.getVectorElementCount();
19021 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19022 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19023}
19024
19025/// Given fixed length vectors A and B with equal element types, but possibly
19026/// different number of elements, return A + B where either A or B is zero
19027/// padded to the larger number of elements.
19029 SelectionDAG &DAG) {
19030 // NOTE: Manually doing the extract/add/insert scheme produces
19031 // significantly better codegen than the naive pad with zeros
19032 // and add scheme.
19033 EVT AVT = A.getValueType();
19034 EVT BVT = B.getValueType();
19037 std::swap(A, B);
19038 std::swap(AVT, BVT);
19039 }
19040
19041 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19042 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19043 return DAG.getInsertSubvector(DL, B, Res, 0);
19044}
19045
19047 SelectionDAG &DAG,
19048 const RISCVSubtarget &Subtarget,
19049 const RISCVTargetLowering &TLI) {
19050 // Note: We intentionally do not check the legality of the reduction type.
19051 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19052 // intermediate types flow through here.
19053 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19055 return SDValue();
19056
19057 // Recurse through adds (since generic dag canonicalizes to that
19058 // form). TODO: Handle disjoint or here.
19059 if (InVec->getOpcode() == ISD::ADD) {
19060 SDValue A = InVec.getOperand(0);
19061 SDValue B = InVec.getOperand(1);
19062 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19063 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19064 if (AOpt || BOpt) {
19065 if (AOpt)
19066 A = AOpt;
19067 if (BOpt)
19068 B = BOpt;
19069 // From here, we're doing A + B with mixed types, implicitly zero
19070 // padded to the wider type. Note that we *don't* need the result
19071 // type to be the original VT, and in fact prefer narrower ones
19072 // if possible.
19073 return getZeroPaddedAdd(DL, A, B, DAG);
19074 }
19075 }
19076
19077 // zext a <--> partial_reduce_umla 0, a, 1
19078 // sext a <--> partial_reduce_smla 0, a, 1
19079 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19080 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19081 SDValue A = InVec.getOperand(0);
19082 EVT OpVT = A.getValueType();
19083 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19084 return SDValue();
19085
19086 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19087 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19088 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19089 unsigned Opc =
19091 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19092 }
19093
19094 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19095 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19096 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19097 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19098 if (InVec.getOpcode() != ISD::MUL)
19099 return SDValue();
19100
19101 SDValue A = InVec.getOperand(0);
19102 SDValue B = InVec.getOperand(1);
19103
19104 if (!ISD::isExtOpcode(A.getOpcode()))
19105 return SDValue();
19106
19107 EVT OpVT = A.getOperand(0).getValueType();
19108 if (OpVT.getVectorElementType() != MVT::i8 ||
19109 OpVT != B.getOperand(0).getValueType() ||
19110 !TLI.isTypeLegal(A.getValueType()))
19111 return SDValue();
19112
19113 unsigned Opc;
19114 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19116 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19117 B.getOpcode() == ISD::ZERO_EXTEND)
19119 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19120 B.getOpcode() == ISD::ZERO_EXTEND)
19122 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19123 B.getOpcode() == ISD::SIGN_EXTEND) {
19125 std::swap(A, B);
19126 } else
19127 return SDValue();
19128
19129 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19130 return DAG.getNode(
19131 Opc, DL, ResVT,
19132 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19133}
19134
19136 const RISCVSubtarget &Subtarget,
19137 const RISCVTargetLowering &TLI) {
19138 if (!Subtarget.hasStdExtZvqdotq())
19139 return SDValue();
19140
19141 SDLoc DL(N);
19142 EVT VT = N->getValueType(0);
19143 SDValue InVec = N->getOperand(0);
19144 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19145 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19146 return SDValue();
19147}
19148
19150 const RISCVSubtarget &Subtarget,
19151 const RISCVTargetLowering &TLI) {
19152 SDValue InVec = N->getOperand(0);
19153 SDValue InVal = N->getOperand(1);
19154 SDValue EltNo = N->getOperand(2);
19155 SDLoc DL(N);
19156
19157 EVT VT = InVec.getValueType();
19158 if (VT.isScalableVector())
19159 return SDValue();
19160
19161 if (!InVec.hasOneUse())
19162 return SDValue();
19163
19164 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19165 // move the insert_vector_elts into the arms of the binop. Note that
19166 // the new RHS must be a constant.
19167 const unsigned InVecOpcode = InVec->getOpcode();
19168 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19169 InVal.hasOneUse()) {
19170 SDValue InVecLHS = InVec->getOperand(0);
19171 SDValue InVecRHS = InVec->getOperand(1);
19172 SDValue InValLHS = InVal->getOperand(0);
19173 SDValue InValRHS = InVal->getOperand(1);
19174
19176 return SDValue();
19177 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19178 return SDValue();
19179 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19180 // have different LHS and RHS types.
19181 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19182 return SDValue();
19184 InVecLHS, InValLHS, EltNo);
19186 InVecRHS, InValRHS, EltNo);
19187 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19188 }
19189
19190 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19191 // move the insert_vector_elt to the source operand of the concat_vector.
19192 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19193 return SDValue();
19194
19195 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19196 if (!IndexC)
19197 return SDValue();
19198 unsigned Elt = IndexC->getZExtValue();
19199
19200 EVT ConcatVT = InVec.getOperand(0).getValueType();
19201 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19202 return SDValue();
19203 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19204 unsigned NewIdx = Elt % ConcatNumElts;
19205
19206 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19207 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19208 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19209
19210 SmallVector<SDValue> ConcatOps(InVec->ops());
19211 ConcatOps[ConcatOpIdx] = ConcatOp;
19212 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19213}
19214
19215// If we're concatenating a series of vector loads like
19216// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19217// Then we can turn this into a strided load by widening the vector elements
19218// vlse32 p, stride=n
19220 const RISCVSubtarget &Subtarget,
19221 const RISCVTargetLowering &TLI) {
19222 SDLoc DL(N);
19223 EVT VT = N->getValueType(0);
19224
19225 // Only perform this combine on legal MVTs.
19226 if (!TLI.isTypeLegal(VT))
19227 return SDValue();
19228
19229 // TODO: Potentially extend this to scalable vectors
19230 if (VT.isScalableVector())
19231 return SDValue();
19232
19233 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19234 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19235 !SDValue(BaseLd, 0).hasOneUse())
19236 return SDValue();
19237
19238 EVT BaseLdVT = BaseLd->getValueType(0);
19239
19240 // Go through the loads and check that they're strided
19242 Lds.push_back(BaseLd);
19243 Align Align = BaseLd->getAlign();
19244 for (SDValue Op : N->ops().drop_front()) {
19245 auto *Ld = dyn_cast<LoadSDNode>(Op);
19246 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19247 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19248 Ld->getValueType(0) != BaseLdVT)
19249 return SDValue();
19250
19251 Lds.push_back(Ld);
19252
19253 // The common alignment is the most restrictive (smallest) of all the loads
19254 Align = std::min(Align, Ld->getAlign());
19255 }
19256
19257 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19258 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19259 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19260 // If the load ptrs can be decomposed into a common (Base + Index) with a
19261 // common constant stride, then return the constant stride.
19262 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19263 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19264 if (BIO1.equalBaseIndex(BIO2, DAG))
19265 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19266
19267 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19268 SDValue P1 = Ld1->getBasePtr();
19269 SDValue P2 = Ld2->getBasePtr();
19270 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19271 return {{P2.getOperand(1), false}};
19272 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19273 return {{P1.getOperand(1), true}};
19274
19275 return std::nullopt;
19276 };
19277
19278 // Get the distance between the first and second loads
19279 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19280 if (!BaseDiff)
19281 return SDValue();
19282
19283 // Check all the loads are the same distance apart
19284 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19285 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19286 return SDValue();
19287
19288 // TODO: At this point, we've successfully matched a generalized gather
19289 // load. Maybe we should emit that, and then move the specialized
19290 // matchers above and below into a DAG combine?
19291
19292 // Get the widened scalar type, e.g. v4i8 -> i64
19293 unsigned WideScalarBitWidth =
19294 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19295 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19296
19297 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19298 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19299 if (!TLI.isTypeLegal(WideVecVT))
19300 return SDValue();
19301
19302 // Check that the operation is legal
19303 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19304 return SDValue();
19305
19306 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19307 SDValue Stride =
19308 std::holds_alternative<SDValue>(StrideVariant)
19309 ? std::get<SDValue>(StrideVariant)
19310 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19311 Lds[0]->getOffset().getValueType());
19312 if (MustNegateStride)
19313 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19314
19315 SDValue AllOneMask =
19316 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19317 DAG.getConstant(1, DL, MVT::i1));
19318
19319 uint64_t MemSize;
19320 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19321 ConstStride && ConstStride->getSExtValue() >= 0)
19322 // total size = (elsize * n) + (stride - elsize) * (n-1)
19323 // = elsize + stride * (n-1)
19324 MemSize = WideScalarVT.getSizeInBits() +
19325 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19326 else
19327 // If Stride isn't constant, then we can't know how much it will load
19329
19331 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19332 Align);
19333
19334 SDValue StridedLoad = DAG.getStridedLoadVP(
19335 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19336 AllOneMask,
19337 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19338
19339 for (SDValue Ld : N->ops())
19340 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19341
19342 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19343}
19344
19346 const RISCVSubtarget &Subtarget,
19347 const RISCVTargetLowering &TLI) {
19348 SDLoc DL(N);
19349 EVT VT = N->getValueType(0);
19350 const unsigned ElementSize = VT.getScalarSizeInBits();
19351 const unsigned NumElts = VT.getVectorNumElements();
19352 SDValue V1 = N->getOperand(0);
19353 SDValue V2 = N->getOperand(1);
19354 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19355 MVT XLenVT = Subtarget.getXLenVT();
19356
19357 // Recognized a disguised select of add/sub.
19358 bool SwapCC;
19359 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19360 matchSelectAddSub(V1, V2, SwapCC)) {
19361 SDValue Sub = SwapCC ? V1 : V2;
19362 SDValue A = Sub.getOperand(0);
19363 SDValue B = Sub.getOperand(1);
19364
19365 SmallVector<SDValue> MaskVals;
19366 for (int MaskIndex : Mask) {
19367 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19368 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19369 }
19370 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19371 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19372 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19373
19374 // Arrange the select such that we can match a masked
19375 // vrsub.vi to perform the conditional negate
19376 SDValue NegB = DAG.getNegative(B, DL, VT);
19377 if (!SwapCC)
19378 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19379 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19380 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19381 }
19382
19383 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19384 // during the combine phase before type legalization, and relies on
19385 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19386 // for the source mask.
19387 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19388 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19389 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19390 return SDValue();
19391
19392 SmallVector<int, 8> NewMask;
19393 narrowShuffleMaskElts(2, Mask, NewMask);
19394
19395 LLVMContext &C = *DAG.getContext();
19396 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19397 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19398 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19399 DAG.getBitcast(NewVT, V2), NewMask);
19400 return DAG.getBitcast(VT, Res);
19401}
19402
19404 const RISCVSubtarget &Subtarget) {
19405 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19406
19407 if (N->getValueType(0).isFixedLengthVector())
19408 return SDValue();
19409
19410 SDValue Addend = N->getOperand(0);
19411 SDValue MulOp = N->getOperand(1);
19412
19413 if (N->getOpcode() == RISCVISD::ADD_VL) {
19414 SDValue AddPassthruOp = N->getOperand(2);
19415 if (!AddPassthruOp.isUndef())
19416 return SDValue();
19417 }
19418
19419 auto IsVWMulOpc = [](unsigned Opc) {
19420 switch (Opc) {
19421 case RISCVISD::VWMUL_VL:
19422 case RISCVISD::VWMULU_VL:
19423 case RISCVISD::VWMULSU_VL:
19424 return true;
19425 default:
19426 return false;
19427 }
19428 };
19429
19430 if (!IsVWMulOpc(MulOp.getOpcode()))
19431 std::swap(Addend, MulOp);
19432
19433 if (!IsVWMulOpc(MulOp.getOpcode()))
19434 return SDValue();
19435
19436 SDValue MulPassthruOp = MulOp.getOperand(2);
19437
19438 if (!MulPassthruOp.isUndef())
19439 return SDValue();
19440
19441 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19442 const RISCVSubtarget &Subtarget) {
19443 if (N->getOpcode() == ISD::ADD) {
19444 SDLoc DL(N);
19445 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19446 Subtarget);
19447 }
19448 return std::make_pair(N->getOperand(3), N->getOperand(4));
19449 }(N, DAG, Subtarget);
19450
19451 SDValue MulMask = MulOp.getOperand(3);
19452 SDValue MulVL = MulOp.getOperand(4);
19453
19454 if (AddMask != MulMask || AddVL != MulVL)
19455 return SDValue();
19456
19457 const auto &TSInfo =
19458 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19459 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19460
19461 SDLoc DL(N);
19462 EVT VT = N->getValueType(0);
19463 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19464 AddVL};
19465 return DAG.getNode(Opc, DL, VT, Ops);
19466}
19467
19469 const RISCVSubtarget &Subtarget) {
19470
19471 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19472
19473 if (!N->getValueType(0).isVector())
19474 return SDValue();
19475
19476 SDValue Addend = N->getOperand(0);
19477 SDValue DotOp = N->getOperand(1);
19478
19479 if (N->getOpcode() == RISCVISD::ADD_VL) {
19480 SDValue AddPassthruOp = N->getOperand(2);
19481 if (!AddPassthruOp.isUndef())
19482 return SDValue();
19483 }
19484
19485 auto IsVqdotqOpc = [](unsigned Opc) {
19486 switch (Opc) {
19487 case RISCVISD::VQDOT_VL:
19488 case RISCVISD::VQDOTU_VL:
19489 case RISCVISD::VQDOTSU_VL:
19490 return true;
19491 default:
19492 return false;
19493 }
19494 };
19495
19496 if (!IsVqdotqOpc(DotOp.getOpcode()))
19497 std::swap(Addend, DotOp);
19498
19499 if (!IsVqdotqOpc(DotOp.getOpcode()))
19500 return SDValue();
19501
19502 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19503 const RISCVSubtarget &Subtarget) {
19504 if (N->getOpcode() == ISD::ADD) {
19505 SDLoc DL(N);
19506 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19507 Subtarget);
19508 }
19509 return std::make_pair(N->getOperand(3), N->getOperand(4));
19510 }(N, DAG, Subtarget);
19511
19512 SDValue MulVL = DotOp.getOperand(4);
19513 if (AddVL != MulVL)
19514 return SDValue();
19515
19516 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19517 AddMask.getOperand(0) != MulVL)
19518 return SDValue();
19519
19520 SDValue AccumOp = DotOp.getOperand(2);
19521 SDLoc DL(N);
19522 EVT VT = N->getValueType(0);
19523 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19524 DAG.getUNDEF(VT), AddMask, AddVL);
19525
19526 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19527 DotOp.getOperand(3), DotOp->getOperand(4)};
19528 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19529}
19530
19531static bool
19533 ISD::MemIndexType &IndexType,
19535 if (!DCI.isBeforeLegalize())
19536 return false;
19537
19538 SelectionDAG &DAG = DCI.DAG;
19539 const MVT XLenVT =
19540 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19541
19542 const EVT IndexVT = Index.getValueType();
19543
19544 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19545 // mode, so anything else must be manually legalized.
19546 if (!isIndexTypeSigned(IndexType))
19547 return false;
19548
19549 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19550 // Any index legalization should first promote to XLenVT, so we don't lose
19551 // bits when scaling. This may create an illegal index type so we let
19552 // LLVM's legalization take care of the splitting.
19553 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19554 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19555 IndexVT.changeVectorElementType(XLenVT), Index);
19556 }
19557 IndexType = ISD::UNSIGNED_SCALED;
19558 return true;
19559}
19560
19561/// Match the index vector of a scatter or gather node as the shuffle mask
19562/// which performs the rearrangement if possible. Will only match if
19563/// all lanes are touched, and thus replacing the scatter or gather with
19564/// a unit strided access and shuffle is legal.
19565static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19566 SmallVector<int> &ShuffleMask) {
19567 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19568 return false;
19569 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19570 return false;
19571
19572 const unsigned ElementSize = VT.getScalarStoreSize();
19573 const unsigned NumElems = VT.getVectorNumElements();
19574
19575 // Create the shuffle mask and check all bits active
19576 assert(ShuffleMask.empty());
19577 BitVector ActiveLanes(NumElems);
19578 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19579 // TODO: We've found an active bit of UB, and could be
19580 // more aggressive here if desired.
19581 if (Index->getOperand(i)->isUndef())
19582 return false;
19583 uint64_t C = Index->getConstantOperandVal(i);
19584 if (C % ElementSize != 0)
19585 return false;
19586 C = C / ElementSize;
19587 if (C >= NumElems)
19588 return false;
19589 ShuffleMask.push_back(C);
19590 ActiveLanes.set(C);
19591 }
19592 return ActiveLanes.all();
19593}
19594
19595/// Match the index of a gather or scatter operation as an operation
19596/// with twice the element width and half the number of elements. This is
19597/// generally profitable (if legal) because these operations are linear
19598/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19599/// come out ahead.
19600static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19601 Align BaseAlign, const RISCVSubtarget &ST) {
19602 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19603 return false;
19604 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19605 return false;
19606
19607 // Attempt a doubling. If we can use a element type 4x or 8x in
19608 // size, this will happen via multiply iterations of the transform.
19609 const unsigned NumElems = VT.getVectorNumElements();
19610 if (NumElems % 2 != 0)
19611 return false;
19612
19613 const unsigned ElementSize = VT.getScalarStoreSize();
19614 const unsigned WiderElementSize = ElementSize * 2;
19615 if (WiderElementSize > ST.getELen()/8)
19616 return false;
19617
19618 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19619 return false;
19620
19621 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19622 // TODO: We've found an active bit of UB, and could be
19623 // more aggressive here if desired.
19624 if (Index->getOperand(i)->isUndef())
19625 return false;
19626 // TODO: This offset check is too strict if we support fully
19627 // misaligned memory operations.
19628 uint64_t C = Index->getConstantOperandVal(i);
19629 if (i % 2 == 0) {
19630 if (C % WiderElementSize != 0)
19631 return false;
19632 continue;
19633 }
19634 uint64_t Last = Index->getConstantOperandVal(i-1);
19635 if (C != Last + ElementSize)
19636 return false;
19637 }
19638 return true;
19639}
19640
19641// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19642// This would be benefit for the cases where X and Y are both the same value
19643// type of low precision vectors. Since the truncate would be lowered into
19644// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19645// restriction, such pattern would be expanded into a series of "vsetvli"
19646// and "vnsrl" instructions later to reach this point.
19648 SDValue Mask = N->getOperand(1);
19649 SDValue VL = N->getOperand(2);
19650
19651 bool IsVLMAX = isAllOnesConstant(VL) ||
19652 (isa<RegisterSDNode>(VL) &&
19653 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19654 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19655 Mask.getOperand(0) != VL)
19656 return SDValue();
19657
19658 auto IsTruncNode = [&](SDValue V) {
19659 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19660 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19661 };
19662
19663 SDValue Op = N->getOperand(0);
19664
19665 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19666 // to distinguish such pattern.
19667 while (IsTruncNode(Op)) {
19668 if (!Op.hasOneUse())
19669 return SDValue();
19670 Op = Op.getOperand(0);
19671 }
19672
19673 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19674 return SDValue();
19675
19676 SDValue N0 = Op.getOperand(0);
19677 SDValue N1 = Op.getOperand(1);
19678 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19679 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19680 return SDValue();
19681
19682 SDValue N00 = N0.getOperand(0);
19683 SDValue N10 = N1.getOperand(0);
19684 if (!N00.getValueType().isVector() ||
19685 N00.getValueType() != N10.getValueType() ||
19686 N->getValueType(0) != N10.getValueType())
19687 return SDValue();
19688
19689 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19690 SDValue SMin =
19691 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19692 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19693 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19694}
19695
19696// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19697// maximum value for the truncated type.
19698// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19699// is the signed maximum value for the truncated type and C2 is the signed
19700// minimum value.
19702 const RISCVSubtarget &Subtarget) {
19703 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19704
19705 MVT VT = N->getSimpleValueType(0);
19706
19707 SDValue Mask = N->getOperand(1);
19708 SDValue VL = N->getOperand(2);
19709
19710 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19711 APInt &SplatVal) {
19712 if (V.getOpcode() != Opc &&
19713 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19714 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19715 return SDValue();
19716
19717 SDValue Op = V.getOperand(1);
19718
19719 // Peek through conversion between fixed and scalable vectors.
19720 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19721 isNullConstant(Op.getOperand(2)) &&
19722 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19723 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19724 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19725 isNullConstant(Op.getOperand(1).getOperand(1)))
19726 Op = Op.getOperand(1).getOperand(0);
19727
19728 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19729 return V.getOperand(0);
19730
19731 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19732 Op.getOperand(2) == VL) {
19733 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19734 SplatVal =
19735 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19736 return V.getOperand(0);
19737 }
19738 }
19739
19740 return SDValue();
19741 };
19742
19743 SDLoc DL(N);
19744
19745 auto DetectUSatPattern = [&](SDValue V) {
19746 APInt LoC, HiC;
19747
19748 // Simple case, V is a UMIN.
19749 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19750 if (HiC.isMask(VT.getScalarSizeInBits()))
19751 return UMinOp;
19752
19753 // If we have an SMAX that removes negative numbers first, then we can match
19754 // SMIN instead of UMIN.
19755 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19756 if (SDValue SMaxOp =
19757 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19758 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19759 return SMinOp;
19760
19761 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19762 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19763 // first.
19764 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19765 if (SDValue SMinOp =
19766 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19767 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19768 HiC.uge(LoC))
19769 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19770 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19771 Mask, VL);
19772
19773 return SDValue();
19774 };
19775
19776 auto DetectSSatPattern = [&](SDValue V) {
19777 unsigned NumDstBits = VT.getScalarSizeInBits();
19778 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19779 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19780 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19781
19782 APInt HiC, LoC;
19783 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19784 if (SDValue SMaxOp =
19785 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19786 if (HiC == SignedMax && LoC == SignedMin)
19787 return SMaxOp;
19788
19789 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19790 if (SDValue SMinOp =
19791 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19792 if (HiC == SignedMax && LoC == SignedMin)
19793 return SMinOp;
19794
19795 return SDValue();
19796 };
19797
19798 SDValue Src = N->getOperand(0);
19799
19800 // Look through multiple layers of truncates.
19801 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19802 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19803 Src.hasOneUse())
19804 Src = Src.getOperand(0);
19805
19806 SDValue Val;
19807 unsigned ClipOpc;
19808 if ((Val = DetectUSatPattern(Src)))
19809 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19810 else if ((Val = DetectSSatPattern(Src)))
19811 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19812 else
19813 return SDValue();
19814
19815 MVT ValVT = Val.getSimpleValueType();
19816
19817 do {
19818 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19819 ValVT = ValVT.changeVectorElementType(ValEltVT);
19820 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19821 } while (ValVT != VT);
19822
19823 return Val;
19824}
19825
19826// Convert
19827// (iX ctpop (bitcast (vXi1 A)))
19828// ->
19829// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19830// and
19831// (iN reduce.add (zext (vXi1 A to vXiN))
19832// ->
19833// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19834// FIXME: It's complicated to match all the variations of this after type
19835// legalization so we only handle the pre-type legalization pattern, but that
19836// requires the fixed vector type to be legal.
19838 const RISCVSubtarget &Subtarget) {
19839 unsigned Opc = N->getOpcode();
19841 "Unexpected opcode");
19842 EVT VT = N->getValueType(0);
19843 if (!VT.isScalarInteger())
19844 return SDValue();
19845
19846 SDValue Src = N->getOperand(0);
19847
19848 if (Opc == ISD::CTPOP) {
19849 // Peek through zero_extend. It doesn't change the count.
19850 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19851 Src = Src.getOperand(0);
19852
19853 if (Src.getOpcode() != ISD::BITCAST)
19854 return SDValue();
19855 Src = Src.getOperand(0);
19856 } else if (Opc == ISD::VECREDUCE_ADD) {
19857 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19858 return SDValue();
19859 Src = Src.getOperand(0);
19860 }
19861
19862 EVT SrcEVT = Src.getValueType();
19863 if (!SrcEVT.isSimple())
19864 return SDValue();
19865
19866 MVT SrcMVT = SrcEVT.getSimpleVT();
19867 // Make sure the input is an i1 vector.
19868 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19869 return SDValue();
19870
19871 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19872 if (!TLI.isTypeLegal(SrcMVT))
19873 return SDValue();
19874
19875 // Check that destination type is large enough to hold result without
19876 // overflow.
19877 if (Opc == ISD::VECREDUCE_ADD) {
19878 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19879 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19880 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19881 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19882 ? SrcMVT.getVectorNumElements()
19884 VectorBitsMax, EltSize, MinSize);
19885 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19886 return SDValue();
19887 }
19888
19889 MVT ContainerVT = SrcMVT;
19890 if (SrcMVT.isFixedLengthVector()) {
19891 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
19892 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
19893 }
19894
19895 SDLoc DL(N);
19896 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
19897
19898 MVT XLenVT = Subtarget.getXLenVT();
19899 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
19900 return DAG.getZExtOrTrunc(Pop, DL, VT);
19901}
19902
19905 const RISCVSubtarget &Subtarget) {
19906 // (shl (zext x), y) -> (vwsll x, y)
19907 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19908 return V;
19909
19910 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
19911 // (shl (zext x), C) -> (vwmulu x, 1u << C)
19912
19913 if (!DCI.isAfterLegalizeDAG())
19914 return SDValue();
19915
19916 SDValue LHS = N->getOperand(0);
19917 if (!LHS.hasOneUse())
19918 return SDValue();
19919 unsigned Opcode;
19920 switch (LHS.getOpcode()) {
19921 case ISD::SIGN_EXTEND:
19922 case RISCVISD::VSEXT_VL:
19923 Opcode = RISCVISD::VWMULSU_VL;
19924 break;
19925 case ISD::ZERO_EXTEND:
19926 case RISCVISD::VZEXT_VL:
19927 Opcode = RISCVISD::VWMULU_VL;
19928 break;
19929 default:
19930 return SDValue();
19931 }
19932
19933 SDValue RHS = N->getOperand(1);
19934 APInt ShAmt;
19935 uint64_t ShAmtInt;
19936 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
19937 ShAmtInt = ShAmt.getZExtValue();
19938 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
19939 RHS.getOperand(1).getOpcode() == ISD::Constant)
19940 ShAmtInt = RHS.getConstantOperandVal(1);
19941 else
19942 return SDValue();
19943
19944 // Better foldings:
19945 // (shl (sext x), 1) -> (vwadd x, x)
19946 // (shl (zext x), 1) -> (vwaddu x, x)
19947 if (ShAmtInt <= 1)
19948 return SDValue();
19949
19950 SDValue NarrowOp = LHS.getOperand(0);
19951 MVT NarrowVT = NarrowOp.getSimpleValueType();
19952 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
19953 if (ShAmtInt >= NarrowBits)
19954 return SDValue();
19955 MVT VT = N->getSimpleValueType(0);
19956 if (NarrowBits * 2 != VT.getScalarSizeInBits())
19957 return SDValue();
19958
19959 SelectionDAG &DAG = DCI.DAG;
19960 SDLoc DL(N);
19961 SDValue Passthru, Mask, VL;
19962 switch (N->getOpcode()) {
19963 case ISD::SHL:
19964 Passthru = DAG.getUNDEF(VT);
19965 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
19966 break;
19967 case RISCVISD::SHL_VL:
19968 Passthru = N->getOperand(2);
19969 Mask = N->getOperand(3);
19970 VL = N->getOperand(4);
19971 break;
19972 default:
19973 llvm_unreachable("Expected SHL");
19974 }
19975 return DAG.getNode(Opcode, DL, VT, NarrowOp,
19976 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
19977 Passthru, Mask, VL);
19978}
19979
19981 DAGCombinerInfo &DCI) const {
19982 SelectionDAG &DAG = DCI.DAG;
19983 const MVT XLenVT = Subtarget.getXLenVT();
19984 SDLoc DL(N);
19985
19986 // Helper to call SimplifyDemandedBits on an operand of N where only some low
19987 // bits are demanded. N will be added to the Worklist if it was not deleted.
19988 // Caller should return SDValue(N, 0) if this returns true.
19989 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
19990 SDValue Op = N->getOperand(OpNo);
19991 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
19992 if (!SimplifyDemandedBits(Op, Mask, DCI))
19993 return false;
19994
19995 if (N->getOpcode() != ISD::DELETED_NODE)
19996 DCI.AddToWorklist(N);
19997 return true;
19998 };
19999
20000 switch (N->getOpcode()) {
20001 default:
20002 break;
20003 case RISCVISD::SplitF64: {
20004 SDValue Op0 = N->getOperand(0);
20005 // If the input to SplitF64 is just BuildPairF64 then the operation is
20006 // redundant. Instead, use BuildPairF64's operands directly.
20007 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20008 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20009
20010 if (Op0->isUndef()) {
20011 SDValue Lo = DAG.getUNDEF(MVT::i32);
20012 SDValue Hi = DAG.getUNDEF(MVT::i32);
20013 return DCI.CombineTo(N, Lo, Hi);
20014 }
20015
20016 // It's cheaper to materialise two 32-bit integers than to load a double
20017 // from the constant pool and transfer it to integer registers through the
20018 // stack.
20019 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
20020 APInt V = C->getValueAPF().bitcastToAPInt();
20021 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20022 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20023 return DCI.CombineTo(N, Lo, Hi);
20024 }
20025
20026 // This is a target-specific version of a DAGCombine performed in
20027 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20028 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20029 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20030 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20031 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20032 break;
20033 SDValue NewSplitF64 =
20034 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20035 Op0.getOperand(0));
20036 SDValue Lo = NewSplitF64.getValue(0);
20037 SDValue Hi = NewSplitF64.getValue(1);
20038 APInt SignBit = APInt::getSignMask(32);
20039 if (Op0.getOpcode() == ISD::FNEG) {
20040 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20041 DAG.getConstant(SignBit, DL, MVT::i32));
20042 return DCI.CombineTo(N, Lo, NewHi);
20043 }
20044 assert(Op0.getOpcode() == ISD::FABS);
20045 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20046 DAG.getConstant(~SignBit, DL, MVT::i32));
20047 return DCI.CombineTo(N, Lo, NewHi);
20048 }
20049 case RISCVISD::SLLW:
20050 case RISCVISD::SRAW:
20051 case RISCVISD::SRLW:
20052 case RISCVISD::RORW:
20053 case RISCVISD::ROLW: {
20054 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20055 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20056 SimplifyDemandedLowBitsHelper(1, 5))
20057 return SDValue(N, 0);
20058
20059 break;
20060 }
20061 case RISCVISD::CLZW:
20062 case RISCVISD::CTZW: {
20063 // Only the lower 32 bits of the first operand are read
20064 if (SimplifyDemandedLowBitsHelper(0, 32))
20065 return SDValue(N, 0);
20066 break;
20067 }
20068 case RISCVISD::FMV_W_X_RV64: {
20069 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20070 // conversion is unnecessary and can be replaced with the
20071 // FMV_X_ANYEXTW_RV64 operand.
20072 SDValue Op0 = N->getOperand(0);
20073 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20074 return Op0.getOperand(0);
20075 break;
20076 }
20077 case RISCVISD::FMV_X_ANYEXTH:
20078 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20079 SDLoc DL(N);
20080 SDValue Op0 = N->getOperand(0);
20081 MVT VT = N->getSimpleValueType(0);
20082
20083 // Constant fold.
20084 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20085 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20086 return DAG.getConstant(Val, DL, VT);
20087 }
20088
20089 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20090 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20091 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20092 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20093 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20094 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20095 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20096 assert(Op0.getOperand(0).getValueType() == VT &&
20097 "Unexpected value type!");
20098 return Op0.getOperand(0);
20099 }
20100
20101 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20102 cast<LoadSDNode>(Op0)->isSimple()) {
20104 auto *LN0 = cast<LoadSDNode>(Op0);
20105 SDValue Load =
20106 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20107 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20108 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20109 return Load;
20110 }
20111
20112 // This is a target-specific version of a DAGCombine performed in
20113 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20114 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20115 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20116 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20117 !Op0.getNode()->hasOneUse())
20118 break;
20119 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20120 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20121 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20122 if (Op0.getOpcode() == ISD::FNEG)
20123 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20124 DAG.getConstant(SignBit, DL, VT));
20125
20126 assert(Op0.getOpcode() == ISD::FABS);
20127 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20128 DAG.getConstant(~SignBit, DL, VT));
20129 }
20130 case ISD::ABS: {
20131 EVT VT = N->getValueType(0);
20132 SDValue N0 = N->getOperand(0);
20133 // abs (sext) -> zext (abs)
20134 // abs (zext) -> zext (handled elsewhere)
20135 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20136 SDValue Src = N0.getOperand(0);
20137 SDLoc DL(N);
20138 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20139 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20140 }
20141 break;
20142 }
20143 case ISD::ADD: {
20144 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20145 return V;
20146 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20147 return V;
20148 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20149 return V;
20150 return performADDCombine(N, DCI, Subtarget);
20151 }
20152 case ISD::SUB: {
20153 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20154 return V;
20155 return performSUBCombine(N, DAG, Subtarget);
20156 }
20157 case ISD::AND:
20158 return performANDCombine(N, DCI, Subtarget);
20159 case ISD::OR: {
20160 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20161 return V;
20162 return performORCombine(N, DCI, Subtarget);
20163 }
20164 case ISD::XOR:
20165 return performXORCombine(N, DAG, Subtarget);
20166 case ISD::MUL:
20167 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20168 return V;
20169 return performMULCombine(N, DAG, DCI, Subtarget);
20170 case ISD::SDIV:
20171 case ISD::UDIV:
20172 case ISD::SREM:
20173 case ISD::UREM:
20174 if (SDValue V = combineBinOpOfZExt(N, DAG))
20175 return V;
20176 break;
20177 case ISD::FMUL: {
20178 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20179 SDValue N0 = N->getOperand(0);
20180 SDValue N1 = N->getOperand(1);
20181 if (N0->getOpcode() != ISD::FCOPYSIGN)
20182 std::swap(N0, N1);
20183 if (N0->getOpcode() != ISD::FCOPYSIGN)
20184 return SDValue();
20185 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
20186 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20187 return SDValue();
20188 EVT VT = N->getValueType(0);
20189 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20190 return SDValue();
20191 SDValue Sign = N0->getOperand(1);
20192 if (Sign.getValueType() != VT)
20193 return SDValue();
20194 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
20195 }
20196 case ISD::FADD:
20197 case ISD::UMAX:
20198 case ISD::UMIN:
20199 case ISD::SMAX:
20200 case ISD::SMIN:
20201 case ISD::FMAXNUM:
20202 case ISD::FMINNUM: {
20203 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20204 return V;
20205 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20206 return V;
20207 return SDValue();
20208 }
20209 case ISD::SETCC:
20210 return performSETCCCombine(N, DCI, Subtarget);
20212 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20213 case ISD::ZERO_EXTEND:
20214 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20215 // type legalization. This is safe because fp_to_uint produces poison if
20216 // it overflows.
20217 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20218 SDValue Src = N->getOperand(0);
20219 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20220 isTypeLegal(Src.getOperand(0).getValueType()))
20221 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20222 Src.getOperand(0));
20223 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20224 isTypeLegal(Src.getOperand(1).getValueType())) {
20225 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20226 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20227 Src.getOperand(0), Src.getOperand(1));
20228 DCI.CombineTo(N, Res);
20229 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20230 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20231 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20232 }
20233 }
20234 return SDValue();
20235 case RISCVISD::TRUNCATE_VECTOR_VL:
20236 if (SDValue V = combineTruncOfSraSext(N, DAG))
20237 return V;
20238 return combineTruncToVnclip(N, DAG, Subtarget);
20239 case ISD::VP_TRUNCATE:
20240 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20241 case ISD::TRUNCATE:
20242 return performTRUNCATECombine(N, DAG, Subtarget);
20243 case ISD::SELECT:
20244 return performSELECTCombine(N, DAG, Subtarget);
20245 case ISD::VSELECT:
20246 return performVSELECTCombine(N, DAG);
20247 case RISCVISD::CZERO_EQZ:
20248 case RISCVISD::CZERO_NEZ: {
20249 SDValue Val = N->getOperand(0);
20250 SDValue Cond = N->getOperand(1);
20251
20252 unsigned Opc = N->getOpcode();
20253
20254 // czero_eqz x, x -> x
20255 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20256 return Val;
20257
20258 unsigned InvOpc =
20259 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20260
20261 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20262 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20263 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20264 SDValue NewCond = Cond.getOperand(0);
20265 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20266 if (DAG.MaskedValueIsZero(NewCond, Mask))
20267 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20268 }
20269 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20270 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20271 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20272 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20273 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20274 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20275 if (ISD::isIntEqualitySetCC(CCVal))
20276 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20277 N->getValueType(0), Val, Cond.getOperand(0));
20278 }
20279 return SDValue();
20280 }
20281 case RISCVISD::SELECT_CC: {
20282 // Transform
20283 SDValue LHS = N->getOperand(0);
20284 SDValue RHS = N->getOperand(1);
20285 SDValue CC = N->getOperand(2);
20286 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20287 SDValue TrueV = N->getOperand(3);
20288 SDValue FalseV = N->getOperand(4);
20289 SDLoc DL(N);
20290 EVT VT = N->getValueType(0);
20291
20292 // If the True and False values are the same, we don't need a select_cc.
20293 if (TrueV == FalseV)
20294 return TrueV;
20295
20296 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20297 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20298 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20299 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20300 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20301 if (CCVal == ISD::CondCode::SETGE)
20302 std::swap(TrueV, FalseV);
20303
20304 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20305 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20306 // Only handle simm12, if it is not in this range, it can be considered as
20307 // register.
20308 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20309 isInt<12>(TrueSImm - FalseSImm)) {
20310 SDValue SRA =
20311 DAG.getNode(ISD::SRA, DL, VT, LHS,
20312 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20313 SDValue AND =
20314 DAG.getNode(ISD::AND, DL, VT, SRA,
20315 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20316 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20317 }
20318
20319 if (CCVal == ISD::CondCode::SETGE)
20320 std::swap(TrueV, FalseV);
20321 }
20322
20323 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20324 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20325 {LHS, RHS, CC, TrueV, FalseV});
20326
20327 if (!Subtarget.hasConditionalMoveFusion()) {
20328 // (select c, -1, y) -> -c | y
20329 if (isAllOnesConstant(TrueV)) {
20330 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20331 SDValue Neg = DAG.getNegative(C, DL, VT);
20332 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20333 }
20334 // (select c, y, -1) -> -!c | y
20335 if (isAllOnesConstant(FalseV)) {
20336 SDValue C =
20337 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20338 SDValue Neg = DAG.getNegative(C, DL, VT);
20339 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20340 }
20341
20342 // (select c, 0, y) -> -!c & y
20343 if (isNullConstant(TrueV)) {
20344 SDValue C =
20345 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20346 SDValue Neg = DAG.getNegative(C, DL, VT);
20347 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20348 }
20349 // (select c, y, 0) -> -c & y
20350 if (isNullConstant(FalseV)) {
20351 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20352 SDValue Neg = DAG.getNegative(C, DL, VT);
20353 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20354 }
20355 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20356 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20357 if (((isOneConstant(FalseV) && LHS == TrueV &&
20358 CCVal == ISD::CondCode::SETNE) ||
20359 (isOneConstant(TrueV) && LHS == FalseV &&
20360 CCVal == ISD::CondCode::SETEQ)) &&
20362 // freeze it to be safe.
20363 LHS = DAG.getFreeze(LHS);
20365 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20366 }
20367 }
20368
20369 // If both true/false are an xor with 1, pull through the select.
20370 // This can occur after op legalization if both operands are setccs that
20371 // require an xor to invert.
20372 // FIXME: Generalize to other binary ops with identical operand?
20373 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20374 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20375 isOneConstant(TrueV.getOperand(1)) &&
20376 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20377 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20378 TrueV.getOperand(0), FalseV.getOperand(0));
20379 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20380 }
20381
20382 return SDValue();
20383 }
20384 case RISCVISD::BR_CC: {
20385 SDValue LHS = N->getOperand(1);
20386 SDValue RHS = N->getOperand(2);
20387 SDValue CC = N->getOperand(3);
20388 SDLoc DL(N);
20389
20390 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20391 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20392 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20393
20394 return SDValue();
20395 }
20396 case ISD::BITREVERSE:
20397 return performBITREVERSECombine(N, DAG, Subtarget);
20398 case ISD::FP_TO_SINT:
20399 case ISD::FP_TO_UINT:
20400 return performFP_TO_INTCombine(N, DCI, Subtarget);
20403 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20404 case ISD::FCOPYSIGN: {
20405 EVT VT = N->getValueType(0);
20406 if (!VT.isVector())
20407 break;
20408 // There is a form of VFSGNJ which injects the negated sign of its second
20409 // operand. Try and bubble any FNEG up after the extend/round to produce
20410 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20411 // TRUNC=1.
20412 SDValue In2 = N->getOperand(1);
20413 // Avoid cases where the extend/round has multiple uses, as duplicating
20414 // those is typically more expensive than removing a fneg.
20415 if (!In2.hasOneUse())
20416 break;
20417 if (In2.getOpcode() != ISD::FP_EXTEND &&
20418 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20419 break;
20420 In2 = In2.getOperand(0);
20421 if (In2.getOpcode() != ISD::FNEG)
20422 break;
20423 SDLoc DL(N);
20424 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20425 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20426 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20427 }
20428 case ISD::MGATHER: {
20429 const auto *MGN = cast<MaskedGatherSDNode>(N);
20430 const EVT VT = N->getValueType(0);
20431 SDValue Index = MGN->getIndex();
20432 SDValue ScaleOp = MGN->getScale();
20433 ISD::MemIndexType IndexType = MGN->getIndexType();
20434 assert(!MGN->isIndexScaled() &&
20435 "Scaled gather/scatter should not be formed");
20436
20437 SDLoc DL(N);
20438 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20439 return DAG.getMaskedGather(
20440 N->getVTList(), MGN->getMemoryVT(), DL,
20441 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20442 MGN->getBasePtr(), Index, ScaleOp},
20443 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20444
20445 if (narrowIndex(Index, IndexType, DAG))
20446 return DAG.getMaskedGather(
20447 N->getVTList(), MGN->getMemoryVT(), DL,
20448 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20449 MGN->getBasePtr(), Index, ScaleOp},
20450 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20451
20452 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20453 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20454 // The sequence will be XLenVT, not the type of Index. Tell
20455 // isSimpleVIDSequence this so we avoid overflow.
20456 if (std::optional<VIDSequence> SimpleVID =
20457 isSimpleVIDSequence(Index, Subtarget.getXLen());
20458 SimpleVID && SimpleVID->StepDenominator == 1) {
20459 const int64_t StepNumerator = SimpleVID->StepNumerator;
20460 const int64_t Addend = SimpleVID->Addend;
20461
20462 // Note: We don't need to check alignment here since (by assumption
20463 // from the existence of the gather), our offsets must be sufficiently
20464 // aligned.
20465
20466 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20467 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20468 assert(IndexType == ISD::UNSIGNED_SCALED);
20469 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20470 DAG.getSignedConstant(Addend, DL, PtrVT));
20471
20472 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20474 SDValue StridedLoad = DAG.getStridedLoadVP(
20475 VT, DL, MGN->getChain(), BasePtr,
20476 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20477 EVL, MGN->getMemOperand());
20478 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
20479 StridedLoad, MGN->getPassThru(), EVL);
20480 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
20481 DL);
20482 }
20483 }
20484
20485 SmallVector<int> ShuffleMask;
20486 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20487 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20488 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20489 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20490 MGN->getMask(), DAG.getUNDEF(VT),
20491 MGN->getMemoryVT(), MGN->getMemOperand(),
20493 SDValue Shuffle =
20494 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20495 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20496 }
20497
20498 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20499 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20500 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20501 SmallVector<SDValue> NewIndices;
20502 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20503 NewIndices.push_back(Index.getOperand(i));
20504 EVT IndexVT = Index.getValueType()
20505 .getHalfNumVectorElementsVT(*DAG.getContext());
20506 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20507
20508 unsigned ElementSize = VT.getScalarStoreSize();
20509 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20510 auto EltCnt = VT.getVectorElementCount();
20511 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20512 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20513 EltCnt.divideCoefficientBy(2));
20514 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20515 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20516 EltCnt.divideCoefficientBy(2));
20517 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20518
20519 SDValue Gather =
20520 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20521 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20522 Index, ScaleOp},
20523 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20524 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20525 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20526 }
20527 break;
20528 }
20529 case ISD::MSCATTER:{
20530 const auto *MSN = cast<MaskedScatterSDNode>(N);
20531 SDValue Index = MSN->getIndex();
20532 SDValue ScaleOp = MSN->getScale();
20533 ISD::MemIndexType IndexType = MSN->getIndexType();
20534 assert(!MSN->isIndexScaled() &&
20535 "Scaled gather/scatter should not be formed");
20536
20537 SDLoc DL(N);
20538 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20539 return DAG.getMaskedScatter(
20540 N->getVTList(), MSN->getMemoryVT(), DL,
20541 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20542 Index, ScaleOp},
20543 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20544
20545 if (narrowIndex(Index, IndexType, DAG))
20546 return DAG.getMaskedScatter(
20547 N->getVTList(), MSN->getMemoryVT(), DL,
20548 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20549 Index, ScaleOp},
20550 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20551
20552 EVT VT = MSN->getValue()->getValueType(0);
20553 SmallVector<int> ShuffleMask;
20554 if (!MSN->isTruncatingStore() &&
20555 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20556 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20557 DAG.getUNDEF(VT), ShuffleMask);
20558 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20559 DAG.getUNDEF(XLenVT), MSN->getMask(),
20560 MSN->getMemoryVT(), MSN->getMemOperand(),
20561 ISD::UNINDEXED, false);
20562 }
20563 break;
20564 }
20565 case ISD::VP_GATHER: {
20566 const auto *VPGN = cast<VPGatherSDNode>(N);
20567 SDValue Index = VPGN->getIndex();
20568 SDValue ScaleOp = VPGN->getScale();
20569 ISD::MemIndexType IndexType = VPGN->getIndexType();
20570 assert(!VPGN->isIndexScaled() &&
20571 "Scaled gather/scatter should not be formed");
20572
20573 SDLoc DL(N);
20574 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20575 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20576 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20577 ScaleOp, VPGN->getMask(),
20578 VPGN->getVectorLength()},
20579 VPGN->getMemOperand(), IndexType);
20580
20581 if (narrowIndex(Index, IndexType, DAG))
20582 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20583 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20584 ScaleOp, VPGN->getMask(),
20585 VPGN->getVectorLength()},
20586 VPGN->getMemOperand(), IndexType);
20587
20588 break;
20589 }
20590 case ISD::VP_SCATTER: {
20591 const auto *VPSN = cast<VPScatterSDNode>(N);
20592 SDValue Index = VPSN->getIndex();
20593 SDValue ScaleOp = VPSN->getScale();
20594 ISD::MemIndexType IndexType = VPSN->getIndexType();
20595 assert(!VPSN->isIndexScaled() &&
20596 "Scaled gather/scatter should not be formed");
20597
20598 SDLoc DL(N);
20599 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20600 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20601 {VPSN->getChain(), VPSN->getValue(),
20602 VPSN->getBasePtr(), Index, ScaleOp,
20603 VPSN->getMask(), VPSN->getVectorLength()},
20604 VPSN->getMemOperand(), IndexType);
20605
20606 if (narrowIndex(Index, IndexType, DAG))
20607 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20608 {VPSN->getChain(), VPSN->getValue(),
20609 VPSN->getBasePtr(), Index, ScaleOp,
20610 VPSN->getMask(), VPSN->getVectorLength()},
20611 VPSN->getMemOperand(), IndexType);
20612 break;
20613 }
20614 case RISCVISD::SHL_VL:
20615 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20616 return V;
20617 [[fallthrough]];
20618 case RISCVISD::SRA_VL:
20619 case RISCVISD::SRL_VL: {
20620 SDValue ShAmt = N->getOperand(1);
20621 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20622 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20623 SDLoc DL(N);
20624 SDValue VL = N->getOperand(4);
20625 EVT VT = N->getValueType(0);
20626 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20627 ShAmt.getOperand(1), VL);
20628 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20629 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20630 }
20631 break;
20632 }
20633 case ISD::SRA:
20634 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20635 return V;
20636 [[fallthrough]];
20637 case ISD::SRL:
20638 case ISD::SHL: {
20639 if (N->getOpcode() == ISD::SHL) {
20640 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20641 return V;
20642 }
20643 SDValue ShAmt = N->getOperand(1);
20644 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20645 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20646 SDLoc DL(N);
20647 EVT VT = N->getValueType(0);
20648 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20649 ShAmt.getOperand(1),
20650 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20651 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20652 }
20653 break;
20654 }
20655 case RISCVISD::ADD_VL:
20656 if (SDValue V = simplifyOp_VL(N))
20657 return V;
20658 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20659 return V;
20660 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20661 return V;
20662 return combineToVWMACC(N, DAG, Subtarget);
20663 case RISCVISD::VWADD_W_VL:
20664 case RISCVISD::VWADDU_W_VL:
20665 case RISCVISD::VWSUB_W_VL:
20666 case RISCVISD::VWSUBU_W_VL:
20667 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20668 case RISCVISD::OR_VL:
20669 case RISCVISD::SUB_VL:
20670 case RISCVISD::MUL_VL:
20671 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20672 case RISCVISD::VFMADD_VL:
20673 case RISCVISD::VFNMADD_VL:
20674 case RISCVISD::VFMSUB_VL:
20675 case RISCVISD::VFNMSUB_VL:
20676 case RISCVISD::STRICT_VFMADD_VL:
20677 case RISCVISD::STRICT_VFNMADD_VL:
20678 case RISCVISD::STRICT_VFMSUB_VL:
20679 case RISCVISD::STRICT_VFNMSUB_VL:
20680 return performVFMADD_VLCombine(N, DCI, Subtarget);
20681 case RISCVISD::FADD_VL:
20682 case RISCVISD::FSUB_VL:
20683 case RISCVISD::FMUL_VL:
20684 case RISCVISD::VFWADD_W_VL:
20685 case RISCVISD::VFWSUB_W_VL:
20686 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20687 case ISD::LOAD:
20688 case ISD::STORE: {
20689 if (DCI.isAfterLegalizeDAG())
20690 if (SDValue V = performMemPairCombine(N, DCI))
20691 return V;
20692
20693 if (N->getOpcode() != ISD::STORE)
20694 break;
20695
20696 auto *Store = cast<StoreSDNode>(N);
20697 SDValue Chain = Store->getChain();
20698 EVT MemVT = Store->getMemoryVT();
20699 SDValue Val = Store->getValue();
20700 SDLoc DL(N);
20701
20702 bool IsScalarizable =
20703 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20704 Store->isSimple() &&
20705 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20706 isPowerOf2_64(MemVT.getSizeInBits()) &&
20707 MemVT.getSizeInBits() <= Subtarget.getXLen();
20708
20709 // If sufficiently aligned we can scalarize stores of constant vectors of
20710 // any power-of-two size up to XLen bits, provided that they aren't too
20711 // expensive to materialize.
20712 // vsetivli zero, 2, e8, m1, ta, ma
20713 // vmv.v.i v8, 4
20714 // vse64.v v8, (a0)
20715 // ->
20716 // li a1, 1028
20717 // sh a1, 0(a0)
20718 if (DCI.isBeforeLegalize() && IsScalarizable &&
20720 // Get the constant vector bits
20721 APInt NewC(Val.getValueSizeInBits(), 0);
20722 uint64_t EltSize = Val.getScalarValueSizeInBits();
20723 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20724 if (Val.getOperand(i).isUndef())
20725 continue;
20726 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20727 i * EltSize);
20728 }
20729 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20730
20731 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20732 true) <= 2 &&
20734 NewVT, *Store->getMemOperand())) {
20735 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20736 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20737 Store->getPointerInfo(), Store->getBaseAlign(),
20738 Store->getMemOperand()->getFlags());
20739 }
20740 }
20741
20742 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20743 // vsetivli zero, 2, e16, m1, ta, ma
20744 // vle16.v v8, (a0)
20745 // vse16.v v8, (a1)
20746 if (auto *L = dyn_cast<LoadSDNode>(Val);
20747 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20748 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20749 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20750 L->getMemoryVT() == MemVT) {
20751 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20753 NewVT, *Store->getMemOperand()) &&
20755 NewVT, *L->getMemOperand())) {
20756 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20757 L->getPointerInfo(), L->getBaseAlign(),
20758 L->getMemOperand()->getFlags());
20759 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20760 Store->getPointerInfo(), Store->getBaseAlign(),
20761 Store->getMemOperand()->getFlags());
20762 }
20763 }
20764
20765 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20766 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20767 // any illegal types.
20768 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20769 (DCI.isAfterLegalizeDAG() &&
20771 isNullConstant(Val.getOperand(1)))) &&
20772 Val.hasOneUse()) {
20773 SDValue Src = Val.getOperand(0);
20774 MVT VecVT = Src.getSimpleValueType();
20775 // VecVT should be scalable and memory VT should match the element type.
20776 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20777 MemVT == VecVT.getVectorElementType()) {
20778 SDLoc DL(N);
20779 MVT MaskVT = getMaskTypeFor(VecVT);
20780 return DAG.getStoreVP(
20781 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20782 DAG.getConstant(1, DL, MaskVT),
20783 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20784 Store->getMemOperand(), Store->getAddressingMode(),
20785 Store->isTruncatingStore(), /*IsCompress*/ false);
20786 }
20787 }
20788
20789 break;
20790 }
20791 case ISD::SPLAT_VECTOR: {
20792 EVT VT = N->getValueType(0);
20793 // Only perform this combine on legal MVT types.
20794 if (!isTypeLegal(VT))
20795 break;
20796 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20797 DAG, Subtarget))
20798 return Gather;
20799 break;
20800 }
20801 case ISD::BUILD_VECTOR:
20802 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20803 return V;
20804 break;
20806 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20807 return V;
20808 break;
20810 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20811 return V;
20812 break;
20814 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20815 return V;
20816 break;
20817 case RISCVISD::VFMV_V_F_VL: {
20818 const MVT VT = N->getSimpleValueType(0);
20819 SDValue Passthru = N->getOperand(0);
20820 SDValue Scalar = N->getOperand(1);
20821 SDValue VL = N->getOperand(2);
20822
20823 // If VL is 1, we can use vfmv.s.f.
20824 if (isOneConstant(VL))
20825 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20826 break;
20827 }
20828 case RISCVISD::VMV_V_X_VL: {
20829 const MVT VT = N->getSimpleValueType(0);
20830 SDValue Passthru = N->getOperand(0);
20831 SDValue Scalar = N->getOperand(1);
20832 SDValue VL = N->getOperand(2);
20833
20834 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20835 // scalar input.
20836 unsigned ScalarSize = Scalar.getValueSizeInBits();
20837 unsigned EltWidth = VT.getScalarSizeInBits();
20838 if (ScalarSize > EltWidth && Passthru.isUndef())
20839 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20840 return SDValue(N, 0);
20841
20842 // If VL is 1 and the scalar value won't benefit from immediate, we can
20843 // use vmv.s.x.
20844 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20845 if (isOneConstant(VL) &&
20846 (!Const || Const->isZero() ||
20847 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20848 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20849
20850 break;
20851 }
20852 case RISCVISD::VFMV_S_F_VL: {
20853 SDValue Src = N->getOperand(1);
20854 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20855 // into an undef vector.
20856 // TODO: Could use a vslide or vmv.v.v for non-undef.
20857 if (N->getOperand(0).isUndef() &&
20858 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20859 isNullConstant(Src.getOperand(1)) &&
20860 Src.getOperand(0).getValueType().isScalableVector()) {
20861 EVT VT = N->getValueType(0);
20862 SDValue EVSrc = Src.getOperand(0);
20863 EVT EVSrcVT = EVSrc.getValueType();
20865 // Widths match, just return the original vector.
20866 if (EVSrcVT == VT)
20867 return EVSrc;
20868 SDLoc DL(N);
20869 // Width is narrower, using insert_subvector.
20870 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20871 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20872 EVSrc,
20873 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20874 }
20875 // Width is wider, using extract_subvector.
20876 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20877 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20878 }
20879 [[fallthrough]];
20880 }
20881 case RISCVISD::VMV_S_X_VL: {
20882 const MVT VT = N->getSimpleValueType(0);
20883 SDValue Passthru = N->getOperand(0);
20884 SDValue Scalar = N->getOperand(1);
20885 SDValue VL = N->getOperand(2);
20886
20887 // The vmv.s.x instruction copies the scalar integer register to element 0
20888 // of the destination vector register. If SEW < XLEN, the least-significant
20889 // bits are copied and the upper XLEN-SEW bits are ignored.
20890 unsigned ScalarSize = Scalar.getValueSizeInBits();
20891 unsigned EltWidth = VT.getScalarSizeInBits();
20892 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
20893 return SDValue(N, 0);
20894
20895 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20896 Scalar.getOperand(0).getValueType() == N->getValueType(0))
20897 return Scalar.getOperand(0);
20898
20899 // Use M1 or smaller to avoid over constraining register allocation
20900 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20901 if (M1VT.bitsLT(VT)) {
20902 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
20903 SDValue Result =
20904 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
20905 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
20906 return Result;
20907 }
20908
20909 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
20910 // higher would involve overly constraining the register allocator for
20911 // no purpose.
20912 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20913 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
20914 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
20915 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
20916
20917 break;
20918 }
20919 case RISCVISD::VMV_X_S: {
20920 SDValue Vec = N->getOperand(0);
20921 MVT VecVT = N->getOperand(0).getSimpleValueType();
20922 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
20923 if (M1VT.bitsLT(VecVT)) {
20924 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
20925 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
20926 }
20927 break;
20928 }
20932 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
20933 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
20934 switch (IntNo) {
20935 // By default we do not combine any intrinsic.
20936 default:
20937 return SDValue();
20938 case Intrinsic::riscv_vcpop:
20939 case Intrinsic::riscv_vcpop_mask:
20940 case Intrinsic::riscv_vfirst:
20941 case Intrinsic::riscv_vfirst_mask: {
20942 SDValue VL = N->getOperand(2);
20943 if (IntNo == Intrinsic::riscv_vcpop_mask ||
20944 IntNo == Intrinsic::riscv_vfirst_mask)
20945 VL = N->getOperand(3);
20946 if (!isNullConstant(VL))
20947 return SDValue();
20948 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
20949 SDLoc DL(N);
20950 EVT VT = N->getValueType(0);
20951 if (IntNo == Intrinsic::riscv_vfirst ||
20952 IntNo == Intrinsic::riscv_vfirst_mask)
20953 return DAG.getAllOnesConstant(DL, VT);
20954 return DAG.getConstant(0, DL, VT);
20955 }
20956 case Intrinsic::riscv_vsseg2_mask:
20957 case Intrinsic::riscv_vsseg3_mask:
20958 case Intrinsic::riscv_vsseg4_mask:
20959 case Intrinsic::riscv_vsseg5_mask:
20960 case Intrinsic::riscv_vsseg6_mask:
20961 case Intrinsic::riscv_vsseg7_mask:
20962 case Intrinsic::riscv_vsseg8_mask: {
20963 SDValue Tuple = N->getOperand(2);
20964 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
20965
20966 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
20967 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
20968 !Tuple.getOperand(0).isUndef())
20969 return SDValue();
20970
20971 SDValue Val = Tuple.getOperand(1);
20972 unsigned Idx = Tuple.getConstantOperandVal(2);
20973
20974 unsigned SEW = Val.getValueType().getScalarSizeInBits();
20975 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
20976 "Type mismatch without bitcast?");
20977 unsigned Stride = SEW / 8 * NF;
20978 unsigned Offset = SEW / 8 * Idx;
20979
20980 SDValue Ops[] = {
20981 /*Chain=*/N->getOperand(0),
20982 /*IntID=*/
20983 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
20984 /*StoredVal=*/Val,
20985 /*Ptr=*/
20986 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
20987 DAG.getConstant(Offset, DL, XLenVT)),
20988 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
20989 /*Mask=*/N->getOperand(4),
20990 /*VL=*/N->getOperand(5)};
20991
20992 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
20993 // Match getTgtMemIntrinsic for non-unit stride case
20994 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
20997 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
20998
20999 SDVTList VTs = DAG.getVTList(MVT::Other);
21000 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21001 MMO);
21002 }
21003 }
21004 }
21005 case ISD::EXPERIMENTAL_VP_REVERSE:
21006 return performVP_REVERSECombine(N, DAG, Subtarget);
21007 case ISD::VP_STORE:
21008 return performVP_STORECombine(N, DAG, Subtarget);
21009 case ISD::BITCAST: {
21011 SDValue N0 = N->getOperand(0);
21012 EVT VT = N->getValueType(0);
21013 EVT SrcVT = N0.getValueType();
21014 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21015 unsigned NF = VT.getRISCVVectorTupleNumFields();
21016 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21017 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21018 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21019
21020 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21021
21022 SDValue Result = DAG.getUNDEF(VT);
21023 for (unsigned i = 0; i < NF; ++i)
21024 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21025 DAG.getTargetConstant(i, DL, MVT::i32));
21026 return Result;
21027 }
21028 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21029 // type, widen both sides to avoid a trip through memory.
21030 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21031 VT.isScalarInteger()) {
21032 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21033 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21034 Ops[0] = N0;
21035 SDLoc DL(N);
21036 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21037 N0 = DAG.getBitcast(MVT::i8, N0);
21038 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21039 }
21040
21041 return SDValue();
21042 }
21043 case ISD::VECREDUCE_ADD:
21044 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21045 return V;
21046 [[fallthrough]];
21047 case ISD::CTPOP:
21048 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21049 return V;
21050 break;
21051 case RISCVISD::VRGATHER_VX_VL: {
21052 // Note this assumes that out of bounds indices produce poison
21053 // and can thus be replaced without having to prove them inbounds..
21054 EVT VT = N->getValueType(0);
21055 SDValue Src = N->getOperand(0);
21056 SDValue Idx = N->getOperand(1);
21057 SDValue Passthru = N->getOperand(2);
21058 SDValue VL = N->getOperand(4);
21059
21060 // Warning: Unlike most cases we strip an insert_subvector, this one
21061 // does not require the first operand to be undef.
21062 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21063 isNullConstant(Src.getOperand(2)))
21064 Src = Src.getOperand(1);
21065
21066 switch (Src.getOpcode()) {
21067 default:
21068 break;
21069 case RISCVISD::VMV_V_X_VL:
21070 case RISCVISD::VFMV_V_F_VL:
21071 // Drop a redundant vrgather_vx.
21072 // TODO: Remove the type restriction if we find a motivating
21073 // test case?
21074 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21075 Src.getValueType() == VT)
21076 return Src;
21077 break;
21078 case RISCVISD::VMV_S_X_VL:
21079 case RISCVISD::VFMV_S_F_VL:
21080 // If this use only demands lane zero from the source vmv.s.x, and
21081 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21082 // a vmv.v.x. Note that there can be other uses of the original
21083 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21084 if (isNullConstant(Idx) && Passthru.isUndef() &&
21085 VL == Src.getOperand(2)) {
21086 unsigned Opc =
21087 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21088 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21089 VL);
21090 }
21091 break;
21092 }
21093 break;
21094 }
21095 case RISCVISD::TUPLE_EXTRACT: {
21096 EVT VT = N->getValueType(0);
21097 SDValue Tuple = N->getOperand(0);
21098 unsigned Idx = N->getConstantOperandVal(1);
21099 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21100 break;
21101
21102 unsigned NF = 0;
21103 switch (Tuple.getConstantOperandVal(1)) {
21104 default:
21105 break;
21106 case Intrinsic::riscv_vlseg2_mask:
21107 case Intrinsic::riscv_vlseg3_mask:
21108 case Intrinsic::riscv_vlseg4_mask:
21109 case Intrinsic::riscv_vlseg5_mask:
21110 case Intrinsic::riscv_vlseg6_mask:
21111 case Intrinsic::riscv_vlseg7_mask:
21112 case Intrinsic::riscv_vlseg8_mask:
21114 break;
21115 }
21116
21117 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21118 break;
21119
21120 unsigned SEW = VT.getScalarSizeInBits();
21121 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21122 "Type mismatch without bitcast?");
21123 unsigned Stride = SEW / 8 * NF;
21124 unsigned Offset = SEW / 8 * Idx;
21125
21126 SDValue Ops[] = {
21127 /*Chain=*/Tuple.getOperand(0),
21128 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21129 /*Passthru=*/Tuple.getOperand(2),
21130 /*Ptr=*/
21131 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21132 DAG.getConstant(Offset, DL, XLenVT)),
21133 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21134 /*Mask=*/Tuple.getOperand(4),
21135 /*VL=*/Tuple.getOperand(5),
21136 /*Policy=*/Tuple.getOperand(6)};
21137
21138 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21139 // Match getTgtMemIntrinsic for non-unit stride case
21140 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21143 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21144
21145 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21147 Ops, MemVT, MMO);
21148 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21149 return Result.getValue(0);
21150 }
21151 case RISCVISD::TUPLE_INSERT: {
21152 // tuple_insert tuple, undef, idx -> tuple
21153 if (N->getOperand(1).isUndef())
21154 return N->getOperand(0);
21155 break;
21156 }
21157 }
21158
21159 return SDValue();
21160}
21161
21163 EVT XVT, unsigned KeptBits) const {
21164 // For vectors, we don't have a preference..
21165 if (XVT.isVector())
21166 return false;
21167
21168 if (XVT != MVT::i32 && XVT != MVT::i64)
21169 return false;
21170
21171 // We can use sext.w for RV64 or an srai 31 on RV32.
21172 if (KeptBits == 32 || KeptBits == 64)
21173 return true;
21174
21175 // With Zbb we can use sext.h/sext.b.
21176 return Subtarget.hasStdExtZbb() &&
21177 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21178 KeptBits == 16);
21179}
21180
21182 const SDNode *N, CombineLevel Level) const {
21183 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21184 N->getOpcode() == ISD::SRL) &&
21185 "Expected shift op");
21186
21187 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21188 // materialised in fewer instructions than `(OP _, c1)`:
21189 //
21190 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21191 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21192 SDValue N0 = N->getOperand(0);
21193 EVT Ty = N0.getValueType();
21194
21195 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21196 // LD/ST, it can still complete the folding optimization operation performed
21197 // above.
21198 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21199 for (SDNode *Use : X->users()) {
21200 // This use is the one we're on right now. Skip it
21201 if (Use == User || Use->getOpcode() == ISD::SELECT)
21202 continue;
21203 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
21204 return false;
21205 }
21206 return true;
21207 };
21208
21209 if (Ty.isScalarInteger() &&
21210 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21211 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21212 return isUsedByLdSt(N0.getNode(), N);
21213
21214 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21215 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21216
21217 bool IsShXAdd =
21218 (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
21219 C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
21220 bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
21221 C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
21222
21223 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21224 if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
21225 N->user_begin()->getOpcode() == ISD::ADD &&
21226 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21227 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21228 return false;
21229
21230 if (C1 && C2) {
21231 const APInt &C1Int = C1->getAPIntValue();
21232 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21233
21234 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21235 // and the combine should happen, to potentially allow further combines
21236 // later.
21237 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21238 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21239 return true;
21240
21241 // We can materialise `c1` in an add immediate, so it's "free", and the
21242 // combine should be prevented.
21243 if (C1Int.getSignificantBits() <= 64 &&
21245 return false;
21246
21247 // Neither constant will fit into an immediate, so find materialisation
21248 // costs.
21249 int C1Cost =
21250 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21251 /*CompressionCost*/ true);
21252 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21253 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21254 /*CompressionCost*/ true);
21255
21256 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21257 // combine should be prevented.
21258 if (C1Cost < ShiftedC1Cost)
21259 return false;
21260 }
21261 }
21262
21263 if (!N0->hasOneUse())
21264 return false;
21265
21266 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21267 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21268 !N0->getOperand(0)->hasOneUse())
21269 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21270
21271 return true;
21272}
21273
21275 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21276 TargetLoweringOpt &TLO) const {
21277 // Delay this optimization as late as possible.
21278 if (!TLO.LegalOps)
21279 return false;
21280
21281 EVT VT = Op.getValueType();
21282 if (VT.isVector())
21283 return false;
21284
21285 unsigned Opcode = Op.getOpcode();
21286 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21287 return false;
21288
21289 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21290 if (!C)
21291 return false;
21292
21293 const APInt &Mask = C->getAPIntValue();
21294
21295 // Clear all non-demanded bits initially.
21296 APInt ShrunkMask = Mask & DemandedBits;
21297
21298 // Try to make a smaller immediate by setting undemanded bits.
21299
21300 APInt ExpandedMask = Mask | ~DemandedBits;
21301
21302 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21303 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21304 };
21305 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21306 if (NewMask == Mask)
21307 return true;
21308 SDLoc DL(Op);
21309 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21310 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21311 Op.getOperand(0), NewC);
21312 return TLO.CombineTo(Op, NewOp);
21313 };
21314
21315 // If the shrunk mask fits in sign extended 12 bits, let the target
21316 // independent code apply it.
21317 if (ShrunkMask.isSignedIntN(12))
21318 return false;
21319
21320 // And has a few special cases for zext.
21321 if (Opcode == ISD::AND) {
21322 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21323 // otherwise use SLLI + SRLI.
21324 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21325 if (IsLegalMask(NewMask))
21326 return UseMask(NewMask);
21327
21328 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21329 if (VT == MVT::i64) {
21330 APInt NewMask = APInt(64, 0xffffffff);
21331 if (IsLegalMask(NewMask))
21332 return UseMask(NewMask);
21333 }
21334 }
21335
21336 // For the remaining optimizations, we need to be able to make a negative
21337 // number through a combination of mask and undemanded bits.
21338 if (!ExpandedMask.isNegative())
21339 return false;
21340
21341 // What is the fewest number of bits we need to represent the negative number.
21342 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21343
21344 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21345 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21346 // If we can't create a simm12, we shouldn't change opaque constants.
21347 APInt NewMask = ShrunkMask;
21348 if (MinSignedBits <= 12)
21349 NewMask.setBitsFrom(11);
21350 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21351 NewMask.setBitsFrom(31);
21352 else
21353 return false;
21354
21355 // Check that our new mask is a subset of the demanded mask.
21356 assert(IsLegalMask(NewMask));
21357 return UseMask(NewMask);
21358}
21359
21360static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21361 static const uint64_t GREVMasks[] = {
21362 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21363 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21364
21365 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21366 unsigned Shift = 1 << Stage;
21367 if (ShAmt & Shift) {
21368 uint64_t Mask = GREVMasks[Stage];
21369 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21370 if (IsGORC)
21371 Res |= x;
21372 x = Res;
21373 }
21374 }
21375
21376 return x;
21377}
21378
21380 KnownBits &Known,
21381 const APInt &DemandedElts,
21382 const SelectionDAG &DAG,
21383 unsigned Depth) const {
21384 unsigned BitWidth = Known.getBitWidth();
21385 unsigned Opc = Op.getOpcode();
21390 "Should use MaskedValueIsZero if you don't know whether Op"
21391 " is a target node!");
21392
21393 Known.resetAll();
21394 switch (Opc) {
21395 default: break;
21396 case RISCVISD::SELECT_CC: {
21397 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21398 // If we don't know any bits, early out.
21399 if (Known.isUnknown())
21400 break;
21401 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21402
21403 // Only known if known in both the LHS and RHS.
21404 Known = Known.intersectWith(Known2);
21405 break;
21406 }
21407 case RISCVISD::VCPOP_VL: {
21408 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21409 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21410 break;
21411 }
21412 case RISCVISD::CZERO_EQZ:
21413 case RISCVISD::CZERO_NEZ:
21414 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21415 // Result is either all zero or operand 0. We can propagate zeros, but not
21416 // ones.
21417 Known.One.clearAllBits();
21418 break;
21419 case RISCVISD::REMUW: {
21420 KnownBits Known2;
21421 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21422 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21423 // We only care about the lower 32 bits.
21424 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21425 // Restore the original width by sign extending.
21426 Known = Known.sext(BitWidth);
21427 break;
21428 }
21429 case RISCVISD::DIVUW: {
21430 KnownBits Known2;
21431 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21432 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21433 // We only care about the lower 32 bits.
21434 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21435 // Restore the original width by sign extending.
21436 Known = Known.sext(BitWidth);
21437 break;
21438 }
21439 case RISCVISD::SLLW: {
21440 KnownBits Known2;
21441 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21442 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21443 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21444 // Restore the original width by sign extending.
21445 Known = Known.sext(BitWidth);
21446 break;
21447 }
21448 case RISCVISD::SRAW: {
21449 KnownBits Known2;
21450 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21451 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21452 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21453 // Restore the original width by sign extending.
21454 Known = Known.sext(BitWidth);
21455 break;
21456 }
21457 case RISCVISD::CTZW: {
21458 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21459 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21460 unsigned LowBits = llvm::bit_width(PossibleTZ);
21461 Known.Zero.setBitsFrom(LowBits);
21462 break;
21463 }
21464 case RISCVISD::CLZW: {
21465 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21466 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21467 unsigned LowBits = llvm::bit_width(PossibleLZ);
21468 Known.Zero.setBitsFrom(LowBits);
21469 break;
21470 }
21471 case RISCVISD::BREV8:
21472 case RISCVISD::ORC_B: {
21473 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21474 // control value of 7 is equivalent to brev8 and orc.b.
21475 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21476 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21477 // To compute zeros for ORC_B, we need to invert the value and invert it
21478 // back after. This inverting is harmless for BREV8.
21479 Known.Zero =
21480 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21481 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21482 break;
21483 }
21484 case RISCVISD::READ_VLENB: {
21485 // We can use the minimum and maximum VLEN values to bound VLENB. We
21486 // know VLEN must be a power of two.
21487 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21488 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21489 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21490 Known.Zero.setLowBits(Log2_32(MinVLenB));
21491 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21492 if (MaxVLenB == MinVLenB)
21493 Known.One.setBit(Log2_32(MinVLenB));
21494 break;
21495 }
21496 case RISCVISD::FCLASS: {
21497 // fclass will only set one of the low 10 bits.
21498 Known.Zero.setBitsFrom(10);
21499 break;
21500 }
21503 unsigned IntNo =
21504 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21505 switch (IntNo) {
21506 default:
21507 // We can't do anything for most intrinsics.
21508 break;
21509 case Intrinsic::riscv_vsetvli:
21510 case Intrinsic::riscv_vsetvlimax: {
21511 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21512 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21513 RISCVVType::VLMUL VLMUL =
21514 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21515 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21516 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21517 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21518 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21519
21520 // Result of vsetvli must be not larger than AVL.
21521 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21522 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21523
21524 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21525 if (BitWidth > KnownZeroFirstBit)
21526 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21527 break;
21528 }
21529 }
21530 break;
21531 }
21532 }
21533}
21534
21536 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21537 unsigned Depth) const {
21538 switch (Op.getOpcode()) {
21539 default:
21540 break;
21541 case RISCVISD::SELECT_CC: {
21542 unsigned Tmp =
21543 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21544 if (Tmp == 1) return 1; // Early out.
21545 unsigned Tmp2 =
21546 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21547 return std::min(Tmp, Tmp2);
21548 }
21549 case RISCVISD::CZERO_EQZ:
21550 case RISCVISD::CZERO_NEZ:
21551 // Output is either all zero or operand 0. We can propagate sign bit count
21552 // from operand 0.
21553 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21554 case RISCVISD::ABSW: {
21555 // We expand this at isel to negw+max. The result will have 33 sign bits
21556 // if the input has at least 33 sign bits.
21557 unsigned Tmp =
21558 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21559 if (Tmp < 33) return 1;
21560 return 33;
21561 }
21562 case RISCVISD::SRAW: {
21563 unsigned Tmp =
21564 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21565 // sraw produces at least 33 sign bits. If the input already has more than
21566 // 33 sign bits sraw, will preserve them.
21567 // TODO: A more precise answer could be calculated depending on known bits
21568 // in the shift amount.
21569 return std::max(Tmp, 33U);
21570 }
21571 case RISCVISD::SLLW:
21572 case RISCVISD::SRLW:
21573 case RISCVISD::DIVW:
21574 case RISCVISD::DIVUW:
21575 case RISCVISD::REMUW:
21576 case RISCVISD::ROLW:
21577 case RISCVISD::RORW:
21578 case RISCVISD::FCVT_W_RV64:
21579 case RISCVISD::FCVT_WU_RV64:
21580 case RISCVISD::STRICT_FCVT_W_RV64:
21581 case RISCVISD::STRICT_FCVT_WU_RV64:
21582 // TODO: As the result is sign-extended, this is conservatively correct.
21583 return 33;
21584 case RISCVISD::VMV_X_S: {
21585 // The number of sign bits of the scalar result is computed by obtaining the
21586 // element type of the input vector operand, subtracting its width from the
21587 // XLEN, and then adding one (sign bit within the element type). If the
21588 // element type is wider than XLen, the least-significant XLEN bits are
21589 // taken.
21590 unsigned XLen = Subtarget.getXLen();
21591 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21592 if (EltBits <= XLen)
21593 return XLen - EltBits + 1;
21594 break;
21595 }
21597 unsigned IntNo = Op.getConstantOperandVal(1);
21598 switch (IntNo) {
21599 default:
21600 break;
21601 case Intrinsic::riscv_masked_atomicrmw_xchg:
21602 case Intrinsic::riscv_masked_atomicrmw_add:
21603 case Intrinsic::riscv_masked_atomicrmw_sub:
21604 case Intrinsic::riscv_masked_atomicrmw_nand:
21605 case Intrinsic::riscv_masked_atomicrmw_max:
21606 case Intrinsic::riscv_masked_atomicrmw_min:
21607 case Intrinsic::riscv_masked_atomicrmw_umax:
21608 case Intrinsic::riscv_masked_atomicrmw_umin:
21609 case Intrinsic::riscv_masked_cmpxchg:
21610 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21611 // narrow atomic operation. These are implemented using atomic
21612 // operations at the minimum supported atomicrmw/cmpxchg width whose
21613 // result is then sign extended to XLEN. With +A, the minimum width is
21614 // 32 for both 64 and 32.
21616 assert(Subtarget.hasStdExtA());
21617 return Op.getValueSizeInBits() - 31;
21618 }
21619 break;
21620 }
21621 }
21622
21623 return 1;
21624}
21625
21627 SDValue Op, const APInt &OriginalDemandedBits,
21628 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21629 unsigned Depth) const {
21630 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21631
21632 switch (Op.getOpcode()) {
21633 case RISCVISD::BREV8:
21634 case RISCVISD::ORC_B: {
21635 KnownBits Known2;
21636 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21637 // For BREV8, we need to do BREV8 on the demanded bits.
21638 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21639 // So we need to do ORC_B on the demanded bits.
21641 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21642 7, IsGORC));
21643 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21644 OriginalDemandedElts, Known2, TLO, Depth + 1))
21645 return true;
21646
21647 // To compute zeros for ORC_B, we need to invert the value and invert it
21648 // back after. This inverting is harmless for BREV8.
21649 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21650 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21651 return false;
21652 }
21653 }
21654
21656 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21657}
21658
21660 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21661 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21662
21663 // TODO: Add more target nodes.
21664 switch (Op.getOpcode()) {
21665 case RISCVISD::SLLW:
21666 case RISCVISD::SRAW:
21667 case RISCVISD::SRLW:
21668 case RISCVISD::RORW:
21669 case RISCVISD::ROLW:
21670 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21671 // amount is bounds.
21672 return false;
21673 case RISCVISD::SELECT_CC:
21674 // Integer comparisons cannot create poison.
21675 assert(Op.getOperand(0).getValueType().isInteger() &&
21676 "RISCVISD::SELECT_CC only compares integers");
21677 return false;
21678 }
21680 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21681}
21682
21683const Constant *
21685 assert(Ld && "Unexpected null LoadSDNode");
21686 if (!ISD::isNormalLoad(Ld))
21687 return nullptr;
21688
21689 SDValue Ptr = Ld->getBasePtr();
21690
21691 // Only constant pools with no offset are supported.
21692 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21693 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21694 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21695 CNode->getOffset() != 0)
21696 return nullptr;
21697
21698 return CNode;
21699 };
21700
21701 // Simple case, LLA.
21702 if (Ptr.getOpcode() == RISCVISD::LLA) {
21703 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21704 if (!CNode || CNode->getTargetFlags() != 0)
21705 return nullptr;
21706
21707 return CNode->getConstVal();
21708 }
21709
21710 // Look for a HI and ADD_LO pair.
21711 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21712 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21713 return nullptr;
21714
21715 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21716 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21717
21718 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21719 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21720 return nullptr;
21721
21722 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21723 return nullptr;
21724
21725 return CNodeLo->getConstVal();
21726}
21727
21729 MachineBasicBlock *BB) {
21730 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21731
21732 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21733 // Should the count have wrapped while it was being read, we need to try
21734 // again.
21735 // For example:
21736 // ```
21737 // read:
21738 // csrrs x3, counterh # load high word of counter
21739 // csrrs x2, counter # load low word of counter
21740 // csrrs x4, counterh # load high word of counter
21741 // bne x3, x4, read # check if high word reads match, otherwise try again
21742 // ```
21743
21744 MachineFunction &MF = *BB->getParent();
21745 const BasicBlock *LLVMBB = BB->getBasicBlock();
21747
21748 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21749 MF.insert(It, LoopMBB);
21750
21751 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21752 MF.insert(It, DoneMBB);
21753
21754 // Transfer the remainder of BB and its successor edges to DoneMBB.
21755 DoneMBB->splice(DoneMBB->begin(), BB,
21756 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21758
21759 BB->addSuccessor(LoopMBB);
21760
21762 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21763 Register LoReg = MI.getOperand(0).getReg();
21764 Register HiReg = MI.getOperand(1).getReg();
21765 int64_t LoCounter = MI.getOperand(2).getImm();
21766 int64_t HiCounter = MI.getOperand(3).getImm();
21767 DebugLoc DL = MI.getDebugLoc();
21768
21770 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21771 .addImm(HiCounter)
21772 .addReg(RISCV::X0);
21773 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21774 .addImm(LoCounter)
21775 .addReg(RISCV::X0);
21776 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21777 .addImm(HiCounter)
21778 .addReg(RISCV::X0);
21779
21780 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21781 .addReg(HiReg)
21782 .addReg(ReadAgainReg)
21783 .addMBB(LoopMBB);
21784
21785 LoopMBB->addSuccessor(LoopMBB);
21786 LoopMBB->addSuccessor(DoneMBB);
21787
21788 MI.eraseFromParent();
21789
21790 return DoneMBB;
21791}
21792
21795 const RISCVSubtarget &Subtarget) {
21796 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21797
21798 MachineFunction &MF = *BB->getParent();
21799 DebugLoc DL = MI.getDebugLoc();
21802 Register LoReg = MI.getOperand(0).getReg();
21803 Register HiReg = MI.getOperand(1).getReg();
21804 Register SrcReg = MI.getOperand(2).getReg();
21805
21806 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21807 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21808
21809 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21810 RI, Register());
21812 MachineMemOperand *MMOLo =
21816 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21817 .addFrameIndex(FI)
21818 .addImm(0)
21819 .addMemOperand(MMOLo);
21820 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21821 .addFrameIndex(FI)
21822 .addImm(4)
21823 .addMemOperand(MMOHi);
21824 MI.eraseFromParent(); // The pseudo instruction is gone now.
21825 return BB;
21826}
21827
21830 const RISCVSubtarget &Subtarget) {
21831 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21832 "Unexpected instruction");
21833
21834 MachineFunction &MF = *BB->getParent();
21835 DebugLoc DL = MI.getDebugLoc();
21838 Register DstReg = MI.getOperand(0).getReg();
21839 Register LoReg = MI.getOperand(1).getReg();
21840 Register HiReg = MI.getOperand(2).getReg();
21841
21842 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21843 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21844
21846 MachineMemOperand *MMOLo =
21850 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21851 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
21852 .addFrameIndex(FI)
21853 .addImm(0)
21854 .addMemOperand(MMOLo);
21855 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21856 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
21857 .addFrameIndex(FI)
21858 .addImm(4)
21859 .addMemOperand(MMOHi);
21860 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
21861 MI.eraseFromParent(); // The pseudo instruction is gone now.
21862 return BB;
21863}
21864
21866 unsigned RelOpcode, unsigned EqOpcode,
21867 const RISCVSubtarget &Subtarget) {
21868 DebugLoc DL = MI.getDebugLoc();
21869 Register DstReg = MI.getOperand(0).getReg();
21870 Register Src1Reg = MI.getOperand(1).getReg();
21871 Register Src2Reg = MI.getOperand(2).getReg();
21873 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
21875
21876 // Save the current FFLAGS.
21877 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
21878
21879 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
21880 .addReg(Src1Reg)
21881 .addReg(Src2Reg);
21884
21885 // Restore the FFLAGS.
21886 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
21887 .addReg(SavedFFlags, RegState::Kill);
21888
21889 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
21890 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
21891 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
21892 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
21895
21896 // Erase the pseudoinstruction.
21897 MI.eraseFromParent();
21898 return BB;
21899}
21900
21901static MachineBasicBlock *
21903 MachineBasicBlock *ThisMBB,
21904 const RISCVSubtarget &Subtarget) {
21905 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
21906 // Without this, custom-inserter would have generated:
21907 //
21908 // A
21909 // | \
21910 // | B
21911 // | /
21912 // C
21913 // | \
21914 // | D
21915 // | /
21916 // E
21917 //
21918 // A: X = ...; Y = ...
21919 // B: empty
21920 // C: Z = PHI [X, A], [Y, B]
21921 // D: empty
21922 // E: PHI [X, C], [Z, D]
21923 //
21924 // If we lower both Select_FPRX_ in a single step, we can instead generate:
21925 //
21926 // A
21927 // | \
21928 // | C
21929 // | /|
21930 // |/ |
21931 // | |
21932 // | D
21933 // | /
21934 // E
21935 //
21936 // A: X = ...; Y = ...
21937 // D: empty
21938 // E: PHI [X, A], [X, C], [Y, D]
21939
21940 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
21941 const DebugLoc &DL = First.getDebugLoc();
21942 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
21943 MachineFunction *F = ThisMBB->getParent();
21944 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
21945 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
21946 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
21947 MachineFunction::iterator It = ++ThisMBB->getIterator();
21948 F->insert(It, FirstMBB);
21949 F->insert(It, SecondMBB);
21950 F->insert(It, SinkMBB);
21951
21952 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
21953 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
21955 ThisMBB->end());
21956 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
21957
21958 // Fallthrough block for ThisMBB.
21959 ThisMBB->addSuccessor(FirstMBB);
21960 // Fallthrough block for FirstMBB.
21961 FirstMBB->addSuccessor(SecondMBB);
21962 ThisMBB->addSuccessor(SinkMBB);
21963 FirstMBB->addSuccessor(SinkMBB);
21964 // This is fallthrough.
21965 SecondMBB->addSuccessor(SinkMBB);
21966
21967 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
21968 Register FLHS = First.getOperand(1).getReg();
21969 Register FRHS = First.getOperand(2).getReg();
21970 // Insert appropriate branch.
21971 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
21972 .addReg(FLHS)
21973 .addReg(FRHS)
21974 .addMBB(SinkMBB);
21975
21976 Register SLHS = Second.getOperand(1).getReg();
21977 Register SRHS = Second.getOperand(2).getReg();
21978 Register Op1Reg4 = First.getOperand(4).getReg();
21979 Register Op1Reg5 = First.getOperand(5).getReg();
21980
21981 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
21982 // Insert appropriate branch.
21983 BuildMI(ThisMBB, DL,
21984 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
21985 .addReg(SLHS)
21986 .addReg(SRHS)
21987 .addMBB(SinkMBB);
21988
21989 Register DestReg = Second.getOperand(0).getReg();
21990 Register Op2Reg4 = Second.getOperand(4).getReg();
21991 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
21992 .addReg(Op2Reg4)
21993 .addMBB(ThisMBB)
21994 .addReg(Op1Reg4)
21995 .addMBB(FirstMBB)
21996 .addReg(Op1Reg5)
21997 .addMBB(SecondMBB);
21998
21999 // Now remove the Select_FPRX_s.
22000 First.eraseFromParent();
22001 Second.eraseFromParent();
22002 return SinkMBB;
22003}
22004
22007 const RISCVSubtarget &Subtarget) {
22008 // To "insert" Select_* instructions, we actually have to insert the triangle
22009 // control-flow pattern. The incoming instructions know the destination vreg
22010 // to set, the condition code register to branch on, the true/false values to
22011 // select between, and the condcode to use to select the appropriate branch.
22012 //
22013 // We produce the following control flow:
22014 // HeadMBB
22015 // | \
22016 // | IfFalseMBB
22017 // | /
22018 // TailMBB
22019 //
22020 // When we find a sequence of selects we attempt to optimize their emission
22021 // by sharing the control flow. Currently we only handle cases where we have
22022 // multiple selects with the exact same condition (same LHS, RHS and CC).
22023 // The selects may be interleaved with other instructions if the other
22024 // instructions meet some requirements we deem safe:
22025 // - They are not pseudo instructions.
22026 // - They are debug instructions. Otherwise,
22027 // - They do not have side-effects, do not access memory and their inputs do
22028 // not depend on the results of the select pseudo-instructions.
22029 // The TrueV/FalseV operands of the selects cannot depend on the result of
22030 // previous selects in the sequence.
22031 // These conditions could be further relaxed. See the X86 target for a
22032 // related approach and more information.
22033 //
22034 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22035 // is checked here and handled by a separate function -
22036 // EmitLoweredCascadedSelect.
22037
22038 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22039 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22040 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22041 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22042 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22043 Next->getOperand(5).isKill())
22044 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22045
22046 Register LHS = MI.getOperand(1).getReg();
22047 Register RHS;
22048 if (MI.getOperand(2).isReg())
22049 RHS = MI.getOperand(2).getReg();
22050 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22051
22052 SmallVector<MachineInstr *, 4> SelectDebugValues;
22053 SmallSet<Register, 4> SelectDests;
22054 SelectDests.insert(MI.getOperand(0).getReg());
22055
22056 MachineInstr *LastSelectPseudo = &MI;
22057 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22058 SequenceMBBI != E; ++SequenceMBBI) {
22059 if (SequenceMBBI->isDebugInstr())
22060 continue;
22061 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22062 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22063 !SequenceMBBI->getOperand(2).isReg() ||
22064 SequenceMBBI->getOperand(2).getReg() != RHS ||
22065 SequenceMBBI->getOperand(3).getImm() != CC ||
22066 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22067 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22068 break;
22069 LastSelectPseudo = &*SequenceMBBI;
22070 SequenceMBBI->collectDebugValues(SelectDebugValues);
22071 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22072 continue;
22073 }
22074 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22075 SequenceMBBI->mayLoadOrStore() ||
22076 SequenceMBBI->usesCustomInsertionHook())
22077 break;
22078 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22079 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22080 }))
22081 break;
22082 }
22083
22084 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22085 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22086 DebugLoc DL = MI.getDebugLoc();
22088
22089 MachineBasicBlock *HeadMBB = BB;
22090 MachineFunction *F = BB->getParent();
22091 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22092 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22093
22094 F->insert(I, IfFalseMBB);
22095 F->insert(I, TailMBB);
22096
22097 // Set the call frame size on entry to the new basic blocks.
22098 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22099 IfFalseMBB->setCallFrameSize(CallFrameSize);
22100 TailMBB->setCallFrameSize(CallFrameSize);
22101
22102 // Transfer debug instructions associated with the selects to TailMBB.
22103 for (MachineInstr *DebugInstr : SelectDebugValues) {
22104 TailMBB->push_back(DebugInstr->removeFromParent());
22105 }
22106
22107 // Move all instructions after the sequence to TailMBB.
22108 TailMBB->splice(TailMBB->end(), HeadMBB,
22109 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22110 // Update machine-CFG edges by transferring all successors of the current
22111 // block to the new block which will contain the Phi nodes for the selects.
22112 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22113 // Set the successors for HeadMBB.
22114 HeadMBB->addSuccessor(IfFalseMBB);
22115 HeadMBB->addSuccessor(TailMBB);
22116
22117 // Insert appropriate branch.
22118 if (MI.getOperand(2).isImm())
22119 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22120 .addReg(LHS)
22121 .addImm(MI.getOperand(2).getImm())
22122 .addMBB(TailMBB);
22123 else
22124 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22125 .addReg(LHS)
22126 .addReg(RHS)
22127 .addMBB(TailMBB);
22128
22129 // IfFalseMBB just falls through to TailMBB.
22130 IfFalseMBB->addSuccessor(TailMBB);
22131
22132 // Create PHIs for all of the select pseudo-instructions.
22133 auto SelectMBBI = MI.getIterator();
22134 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22135 auto InsertionPoint = TailMBB->begin();
22136 while (SelectMBBI != SelectEnd) {
22137 auto Next = std::next(SelectMBBI);
22138 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22139 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22140 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22141 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22142 .addReg(SelectMBBI->getOperand(4).getReg())
22143 .addMBB(HeadMBB)
22144 .addReg(SelectMBBI->getOperand(5).getReg())
22145 .addMBB(IfFalseMBB);
22146 SelectMBBI->eraseFromParent();
22147 }
22148 SelectMBBI = Next;
22149 }
22150
22151 F->getProperties().resetNoPHIs();
22152 return TailMBB;
22153}
22154
22155// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22156static const RISCV::RISCVMaskedPseudoInfo *
22157lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22159 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22160 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22162 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22163 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22164 return Masked;
22165}
22166
22169 unsigned CVTXOpc) {
22170 DebugLoc DL = MI.getDebugLoc();
22171
22173
22175 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22176
22177 // Save the old value of FFLAGS.
22178 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22179
22180 assert(MI.getNumOperands() == 7);
22181
22182 // Emit a VFCVT_X_F
22183 const TargetRegisterInfo *TRI =
22185 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22186 Register Tmp = MRI.createVirtualRegister(RC);
22187 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22188 .add(MI.getOperand(1))
22189 .add(MI.getOperand(2))
22190 .add(MI.getOperand(3))
22191 .add(MachineOperand::CreateImm(7)) // frm = DYN
22192 .add(MI.getOperand(4))
22193 .add(MI.getOperand(5))
22194 .add(MI.getOperand(6))
22195 .add(MachineOperand::CreateReg(RISCV::FRM,
22196 /*IsDef*/ false,
22197 /*IsImp*/ true));
22198
22199 // Emit a VFCVT_F_X
22200 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22201 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22202 // There is no E8 variant for VFCVT_F_X.
22203 assert(Log2SEW >= 4);
22204 unsigned CVTFOpc =
22205 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22206 ->MaskedPseudo;
22207
22208 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22209 .add(MI.getOperand(0))
22210 .add(MI.getOperand(1))
22211 .addReg(Tmp)
22212 .add(MI.getOperand(3))
22213 .add(MachineOperand::CreateImm(7)) // frm = DYN
22214 .add(MI.getOperand(4))
22215 .add(MI.getOperand(5))
22216 .add(MI.getOperand(6))
22217 .add(MachineOperand::CreateReg(RISCV::FRM,
22218 /*IsDef*/ false,
22219 /*IsImp*/ true));
22220
22221 // Restore FFLAGS.
22222 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22223 .addReg(SavedFFLAGS, RegState::Kill);
22224
22225 // Erase the pseudoinstruction.
22226 MI.eraseFromParent();
22227 return BB;
22228}
22229
22231 const RISCVSubtarget &Subtarget) {
22232 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22233 const TargetRegisterClass *RC;
22234 switch (MI.getOpcode()) {
22235 default:
22236 llvm_unreachable("Unexpected opcode");
22237 case RISCV::PseudoFROUND_H:
22238 CmpOpc = RISCV::FLT_H;
22239 F2IOpc = RISCV::FCVT_W_H;
22240 I2FOpc = RISCV::FCVT_H_W;
22241 FSGNJOpc = RISCV::FSGNJ_H;
22242 FSGNJXOpc = RISCV::FSGNJX_H;
22243 RC = &RISCV::FPR16RegClass;
22244 break;
22245 case RISCV::PseudoFROUND_H_INX:
22246 CmpOpc = RISCV::FLT_H_INX;
22247 F2IOpc = RISCV::FCVT_W_H_INX;
22248 I2FOpc = RISCV::FCVT_H_W_INX;
22249 FSGNJOpc = RISCV::FSGNJ_H_INX;
22250 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22251 RC = &RISCV::GPRF16RegClass;
22252 break;
22253 case RISCV::PseudoFROUND_S:
22254 CmpOpc = RISCV::FLT_S;
22255 F2IOpc = RISCV::FCVT_W_S;
22256 I2FOpc = RISCV::FCVT_S_W;
22257 FSGNJOpc = RISCV::FSGNJ_S;
22258 FSGNJXOpc = RISCV::FSGNJX_S;
22259 RC = &RISCV::FPR32RegClass;
22260 break;
22261 case RISCV::PseudoFROUND_S_INX:
22262 CmpOpc = RISCV::FLT_S_INX;
22263 F2IOpc = RISCV::FCVT_W_S_INX;
22264 I2FOpc = RISCV::FCVT_S_W_INX;
22265 FSGNJOpc = RISCV::FSGNJ_S_INX;
22266 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22267 RC = &RISCV::GPRF32RegClass;
22268 break;
22269 case RISCV::PseudoFROUND_D:
22270 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22271 CmpOpc = RISCV::FLT_D;
22272 F2IOpc = RISCV::FCVT_L_D;
22273 I2FOpc = RISCV::FCVT_D_L;
22274 FSGNJOpc = RISCV::FSGNJ_D;
22275 FSGNJXOpc = RISCV::FSGNJX_D;
22276 RC = &RISCV::FPR64RegClass;
22277 break;
22278 case RISCV::PseudoFROUND_D_INX:
22279 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22280 CmpOpc = RISCV::FLT_D_INX;
22281 F2IOpc = RISCV::FCVT_L_D_INX;
22282 I2FOpc = RISCV::FCVT_D_L_INX;
22283 FSGNJOpc = RISCV::FSGNJ_D_INX;
22284 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22285 RC = &RISCV::GPRRegClass;
22286 break;
22287 }
22288
22289 const BasicBlock *BB = MBB->getBasicBlock();
22290 DebugLoc DL = MI.getDebugLoc();
22292
22294 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22295 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22296
22297 F->insert(I, CvtMBB);
22298 F->insert(I, DoneMBB);
22299 // Move all instructions after the sequence to DoneMBB.
22300 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22301 MBB->end());
22302 // Update machine-CFG edges by transferring all successors of the current
22303 // block to the new block which will contain the Phi nodes for the selects.
22305 // Set the successors for MBB.
22306 MBB->addSuccessor(CvtMBB);
22307 MBB->addSuccessor(DoneMBB);
22308
22309 Register DstReg = MI.getOperand(0).getReg();
22310 Register SrcReg = MI.getOperand(1).getReg();
22311 Register MaxReg = MI.getOperand(2).getReg();
22312 int64_t FRM = MI.getOperand(3).getImm();
22313
22314 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22316
22317 Register FabsReg = MRI.createVirtualRegister(RC);
22318 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22319
22320 // Compare the FP value to the max value.
22321 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22322 auto MIB =
22323 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22326
22327 // Insert branch.
22328 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22329 .addReg(CmpReg)
22330 .addReg(RISCV::X0)
22331 .addMBB(DoneMBB);
22332
22333 CvtMBB->addSuccessor(DoneMBB);
22334
22335 // Convert to integer.
22336 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22337 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22340
22341 // Convert back to FP.
22342 Register I2FReg = MRI.createVirtualRegister(RC);
22343 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22346
22347 // Restore the sign bit.
22348 Register CvtReg = MRI.createVirtualRegister(RC);
22349 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22350
22351 // Merge the results.
22352 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22353 .addReg(SrcReg)
22354 .addMBB(MBB)
22355 .addReg(CvtReg)
22356 .addMBB(CvtMBB);
22357
22358 MI.eraseFromParent();
22359 return DoneMBB;
22360}
22361
22364 MachineBasicBlock *BB) const {
22365 switch (MI.getOpcode()) {
22366 default:
22367 llvm_unreachable("Unexpected instr type to insert");
22368 case RISCV::ReadCounterWide:
22369 assert(!Subtarget.is64Bit() &&
22370 "ReadCounterWide is only to be used on riscv32");
22371 return emitReadCounterWidePseudo(MI, BB);
22372 case RISCV::Select_GPR_Using_CC_GPR:
22373 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22374 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22375 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22376 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22377 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22378 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22379 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22380 case RISCV::Select_FPR16_Using_CC_GPR:
22381 case RISCV::Select_FPR16INX_Using_CC_GPR:
22382 case RISCV::Select_FPR32_Using_CC_GPR:
22383 case RISCV::Select_FPR32INX_Using_CC_GPR:
22384 case RISCV::Select_FPR64_Using_CC_GPR:
22385 case RISCV::Select_FPR64INX_Using_CC_GPR:
22386 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22387 return emitSelectPseudo(MI, BB, Subtarget);
22388 case RISCV::BuildPairF64Pseudo:
22389 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22390 case RISCV::SplitF64Pseudo:
22391 return emitSplitF64Pseudo(MI, BB, Subtarget);
22392 case RISCV::PseudoQuietFLE_H:
22393 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22394 case RISCV::PseudoQuietFLE_H_INX:
22395 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22396 case RISCV::PseudoQuietFLT_H:
22397 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22398 case RISCV::PseudoQuietFLT_H_INX:
22399 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22400 case RISCV::PseudoQuietFLE_S:
22401 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22402 case RISCV::PseudoQuietFLE_S_INX:
22403 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22404 case RISCV::PseudoQuietFLT_S:
22405 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22406 case RISCV::PseudoQuietFLT_S_INX:
22407 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22408 case RISCV::PseudoQuietFLE_D:
22409 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22410 case RISCV::PseudoQuietFLE_D_INX:
22411 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22412 case RISCV::PseudoQuietFLE_D_IN32X:
22413 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22414 Subtarget);
22415 case RISCV::PseudoQuietFLT_D:
22416 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22417 case RISCV::PseudoQuietFLT_D_INX:
22418 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22419 case RISCV::PseudoQuietFLT_D_IN32X:
22420 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22421 Subtarget);
22422
22423 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22424 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22425 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22426 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22427 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22428 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22429 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22430 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22431 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22432 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22433 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22434 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22435 case RISCV::PseudoFROUND_H:
22436 case RISCV::PseudoFROUND_H_INX:
22437 case RISCV::PseudoFROUND_S:
22438 case RISCV::PseudoFROUND_S_INX:
22439 case RISCV::PseudoFROUND_D:
22440 case RISCV::PseudoFROUND_D_INX:
22441 case RISCV::PseudoFROUND_D_IN32X:
22442 return emitFROUND(MI, BB, Subtarget);
22443 case RISCV::PROBED_STACKALLOC_DYN:
22444 return emitDynamicProbedAlloc(MI, BB);
22445 case TargetOpcode::STATEPOINT:
22446 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22447 // while jal call instruction (where statepoint will be lowered at the end)
22448 // has implicit def. This def is early-clobber as it will be set at
22449 // the moment of the call and earlier than any use is read.
22450 // Add this implicit dead def here as a workaround.
22451 MI.addOperand(*MI.getMF(),
22453 RISCV::X1, /*isDef*/ true,
22454 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22455 /*isUndef*/ false, /*isEarlyClobber*/ true));
22456 [[fallthrough]];
22457 case TargetOpcode::STACKMAP:
22458 case TargetOpcode::PATCHPOINT:
22459 if (!Subtarget.is64Bit())
22460 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22461 "supported on 64-bit targets");
22462 return emitPatchPoint(MI, BB);
22463 }
22464}
22465
22467 SDNode *Node) const {
22468 // If instruction defines FRM operand, conservatively set it as non-dead to
22469 // express data dependency with FRM users and prevent incorrect instruction
22470 // reordering.
22471 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22472 FRMDef->setIsDead(false);
22473 return;
22474 }
22475 // Add FRM dependency to any instructions with dynamic rounding mode.
22476 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22477 if (Idx < 0) {
22478 // Vector pseudos have FRM index indicated by TSFlags.
22479 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22480 if (Idx < 0)
22481 return;
22482 }
22483 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22484 return;
22485 // If the instruction already reads FRM, don't add another read.
22486 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22487 return;
22488 MI.addOperand(
22489 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22490}
22491
22492void RISCVTargetLowering::analyzeInputArgs(
22493 MachineFunction &MF, CCState &CCInfo,
22494 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22495 RISCVCCAssignFn Fn) const {
22496 for (const auto &[Idx, In] : enumerate(Ins)) {
22497 MVT ArgVT = In.VT;
22498 ISD::ArgFlagsTy ArgFlags = In.Flags;
22499
22500 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22501 In.OrigTy)) {
22502 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22503 << ArgVT << '\n');
22504 llvm_unreachable(nullptr);
22505 }
22506 }
22507}
22508
22509void RISCVTargetLowering::analyzeOutputArgs(
22510 MachineFunction &MF, CCState &CCInfo,
22511 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22512 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22513 for (const auto &[Idx, Out] : enumerate(Outs)) {
22514 MVT ArgVT = Out.VT;
22515 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22516
22517 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22518 Out.OrigTy)) {
22519 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22520 << ArgVT << "\n");
22521 llvm_unreachable(nullptr);
22522 }
22523 }
22524}
22525
22526// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22527// values.
22529 const CCValAssign &VA, const SDLoc &DL,
22530 const RISCVSubtarget &Subtarget) {
22531 if (VA.needsCustom()) {
22532 if (VA.getLocVT().isInteger() &&
22533 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22534 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22535 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22536 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22538 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22539 llvm_unreachable("Unexpected Custom handling.");
22540 }
22541
22542 switch (VA.getLocInfo()) {
22543 default:
22544 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22545 case CCValAssign::Full:
22546 break;
22547 case CCValAssign::BCvt:
22548 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22549 break;
22550 }
22551 return Val;
22552}
22553
22554// The caller is responsible for loading the full value if the argument is
22555// passed with CCValAssign::Indirect.
22557 const CCValAssign &VA, const SDLoc &DL,
22558 const ISD::InputArg &In,
22559 const RISCVTargetLowering &TLI) {
22562 EVT LocVT = VA.getLocVT();
22563 SDValue Val;
22564 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22565 Register VReg = RegInfo.createVirtualRegister(RC);
22566 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22567 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22568
22569 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22570 if (In.isOrigArg()) {
22571 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22572 if (OrigArg->getType()->isIntegerTy()) {
22573 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22574 // An input zero extended from i31 can also be considered sign extended.
22575 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22576 (BitWidth < 32 && In.Flags.isZExt())) {
22578 RVFI->addSExt32Register(VReg);
22579 }
22580 }
22581 }
22582
22584 return Val;
22585
22586 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22587}
22588
22590 const CCValAssign &VA, const SDLoc &DL,
22591 const RISCVSubtarget &Subtarget) {
22592 EVT LocVT = VA.getLocVT();
22593
22594 if (VA.needsCustom()) {
22595 if (LocVT.isInteger() &&
22596 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22597 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22598 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22599 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22600 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22601 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22602 llvm_unreachable("Unexpected Custom handling.");
22603 }
22604
22605 switch (VA.getLocInfo()) {
22606 default:
22607 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22608 case CCValAssign::Full:
22609 break;
22610 case CCValAssign::BCvt:
22611 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22612 break;
22613 }
22614 return Val;
22615}
22616
22617// The caller is responsible for loading the full value if the argument is
22618// passed with CCValAssign::Indirect.
22620 const CCValAssign &VA, const SDLoc &DL) {
22622 MachineFrameInfo &MFI = MF.getFrameInfo();
22623 EVT LocVT = VA.getLocVT();
22624 EVT ValVT = VA.getValVT();
22626 if (VA.getLocInfo() == CCValAssign::Indirect) {
22627 // When the value is a scalable vector, we save the pointer which points to
22628 // the scalable vector value in the stack. The ValVT will be the pointer
22629 // type, instead of the scalable vector type.
22630 ValVT = LocVT;
22631 }
22632 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22633 /*IsImmutable=*/true);
22634 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22635 SDValue Val;
22636
22638 switch (VA.getLocInfo()) {
22639 default:
22640 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22641 case CCValAssign::Full:
22643 case CCValAssign::BCvt:
22644 break;
22645 }
22646 Val = DAG.getExtLoad(
22647 ExtType, DL, LocVT, Chain, FIN,
22649 return Val;
22650}
22651
22653 const CCValAssign &VA,
22654 const CCValAssign &HiVA,
22655 const SDLoc &DL) {
22656 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22657 "Unexpected VA");
22659 MachineFrameInfo &MFI = MF.getFrameInfo();
22661
22662 assert(VA.isRegLoc() && "Expected register VA assignment");
22663
22664 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22665 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22666 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22667 SDValue Hi;
22668 if (HiVA.isMemLoc()) {
22669 // Second half of f64 is passed on the stack.
22670 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22671 /*IsImmutable=*/true);
22672 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22673 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22675 } else {
22676 // Second half of f64 is passed in another GPR.
22677 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22678 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22679 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22680 }
22681 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22682}
22683
22684// Transform physical registers into virtual registers.
22686 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22687 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22688 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22689
22691
22692 switch (CallConv) {
22693 default:
22694 reportFatalUsageError("Unsupported calling convention");
22695 case CallingConv::C:
22696 case CallingConv::Fast:
22699 case CallingConv::GRAAL:
22701#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22702 CC_VLS_CASE(32)
22703 CC_VLS_CASE(64)
22704 CC_VLS_CASE(128)
22705 CC_VLS_CASE(256)
22706 CC_VLS_CASE(512)
22707 CC_VLS_CASE(1024)
22708 CC_VLS_CASE(2048)
22709 CC_VLS_CASE(4096)
22710 CC_VLS_CASE(8192)
22711 CC_VLS_CASE(16384)
22712 CC_VLS_CASE(32768)
22713 CC_VLS_CASE(65536)
22714#undef CC_VLS_CASE
22715 break;
22716 case CallingConv::GHC:
22717 if (Subtarget.hasStdExtE())
22718 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22719 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22720 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22721 "(Zdinx/D) instruction set extensions");
22722 }
22723
22724 const Function &Func = MF.getFunction();
22725 if (Func.hasFnAttribute("interrupt")) {
22726 if (!Func.arg_empty())
22728 "Functions with the interrupt attribute cannot have arguments!");
22729
22730 StringRef Kind =
22731 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22732
22733 constexpr StringLiteral SupportedInterruptKinds[] = {
22734 "machine",
22735 "supervisor",
22736 "rnmi",
22737 "qci-nest",
22738 "qci-nonest",
22739 "SiFive-CLIC-preemptible",
22740 "SiFive-CLIC-stack-swap",
22741 "SiFive-CLIC-preemptible-stack-swap",
22742 };
22743 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22745 "Function interrupt attribute argument not supported!");
22746
22747 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22749 "'qci-*' interrupt kinds require Xqciint extension");
22750
22751 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22753 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22754
22755 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22756 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22757 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22758 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22759 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22760 "have a frame pointer");
22761 }
22762
22763 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22764 MVT XLenVT = Subtarget.getXLenVT();
22765 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22766 // Used with vargs to accumulate store chains.
22767 std::vector<SDValue> OutChains;
22768
22769 // Assign locations to all of the incoming arguments.
22771 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22772
22773 if (CallConv == CallingConv::GHC)
22775 else
22776 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22778 : CC_RISCV);
22779
22780 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22781 CCValAssign &VA = ArgLocs[i];
22782 SDValue ArgValue;
22783 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22784 // case.
22785 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22786 assert(VA.needsCustom());
22787 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22788 } else if (VA.isRegLoc())
22789 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22790 else
22791 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22792
22793 if (VA.getLocInfo() == CCValAssign::Indirect) {
22794 // If the original argument was split and passed by reference (e.g. i128
22795 // on RV32), we need to load all parts of it here (using the same
22796 // address). Vectors may be partly split to registers and partly to the
22797 // stack, in which case the base address is partly offset and subsequent
22798 // stores are relative to that.
22799 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22801 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22802 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22803 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22804 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22805 CCValAssign &PartVA = ArgLocs[i + 1];
22806 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22807 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22808 if (PartVA.getValVT().isScalableVector())
22809 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22810 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22811 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22813 ++i;
22814 ++InsIdx;
22815 }
22816 continue;
22817 }
22818 InVals.push_back(ArgValue);
22819 }
22820
22821 if (any_of(ArgLocs,
22822 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22823 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
22824
22825 if (IsVarArg) {
22826 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22827 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22828 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22829 MachineFrameInfo &MFI = MF.getFrameInfo();
22830 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22832
22833 // Size of the vararg save area. For now, the varargs save area is either
22834 // zero or large enough to hold a0-a7.
22835 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22836 int FI;
22837
22838 // If all registers are allocated, then all varargs must be passed on the
22839 // stack and we don't need to save any argregs.
22840 if (VarArgsSaveSize == 0) {
22841 int VaArgOffset = CCInfo.getStackSize();
22842 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
22843 } else {
22844 int VaArgOffset = -VarArgsSaveSize;
22845 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
22846
22847 // If saving an odd number of registers then create an extra stack slot to
22848 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
22849 // offsets to even-numbered registered remain 2*XLEN-aligned.
22850 if (Idx % 2) {
22852 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
22853 VarArgsSaveSize += XLenInBytes;
22854 }
22855
22856 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22857
22858 // Copy the integer registers that may have been used for passing varargs
22859 // to the vararg save area.
22860 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22861 const Register Reg = RegInfo.createVirtualRegister(RC);
22862 RegInfo.addLiveIn(ArgRegs[I], Reg);
22863 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
22864 SDValue Store = DAG.getStore(
22865 Chain, DL, ArgValue, FIN,
22866 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
22867 OutChains.push_back(Store);
22868 FIN =
22869 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
22870 }
22871 }
22872
22873 // Record the frame index of the first variable argument
22874 // which is a value necessary to VASTART.
22875 RVFI->setVarArgsFrameIndex(FI);
22876 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
22877 }
22878
22879 // All stores are grouped in one node to allow the matching between
22880 // the size of Ins and InVals. This only happens for vararg functions.
22881 if (!OutChains.empty()) {
22882 OutChains.push_back(Chain);
22883 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
22884 }
22885
22886 return Chain;
22887}
22888
22889/// isEligibleForTailCallOptimization - Check whether the call is eligible
22890/// for tail call optimization.
22891/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
22892bool RISCVTargetLowering::isEligibleForTailCallOptimization(
22893 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
22894 const SmallVector<CCValAssign, 16> &ArgLocs) const {
22895
22896 auto CalleeCC = CLI.CallConv;
22897 auto &Outs = CLI.Outs;
22898 auto &Caller = MF.getFunction();
22899 auto CallerCC = Caller.getCallingConv();
22900
22901 // Exception-handling functions need a special set of instructions to
22902 // indicate a return to the hardware. Tail-calling another function would
22903 // probably break this.
22904 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
22905 // should be expanded as new function attributes are introduced.
22906 if (Caller.hasFnAttribute("interrupt"))
22907 return false;
22908
22909 // Do not tail call opt if the stack is used to pass parameters.
22910 if (CCInfo.getStackSize() != 0)
22911 return false;
22912
22913 // Do not tail call opt if any parameters need to be passed indirectly.
22914 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
22915 // passed indirectly. So the address of the value will be passed in a
22916 // register, or if not available, then the address is put on the stack. In
22917 // order to pass indirectly, space on the stack often needs to be allocated
22918 // in order to store the value. In this case the CCInfo.getNextStackOffset()
22919 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
22920 // are passed CCValAssign::Indirect.
22921 for (auto &VA : ArgLocs)
22922 if (VA.getLocInfo() == CCValAssign::Indirect)
22923 return false;
22924
22925 // Do not tail call opt if either caller or callee uses struct return
22926 // semantics.
22927 auto IsCallerStructRet = Caller.hasStructRetAttr();
22928 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
22929 if (IsCallerStructRet || IsCalleeStructRet)
22930 return false;
22931
22932 // The callee has to preserve all registers the caller needs to preserve.
22933 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
22934 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
22935 if (CalleeCC != CallerCC) {
22936 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
22937 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
22938 return false;
22939 }
22940
22941 // Byval parameters hand the function a pointer directly into the stack area
22942 // we want to reuse during a tail call. Working around this *is* possible
22943 // but less efficient and uglier in LowerCall.
22944 for (auto &Arg : Outs)
22945 if (Arg.Flags.isByVal())
22946 return false;
22947
22948 return true;
22949}
22950
22952 return DAG.getDataLayout().getPrefTypeAlign(
22953 VT.getTypeForEVT(*DAG.getContext()));
22954}
22955
22956// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
22957// and output parameter nodes.
22959 SmallVectorImpl<SDValue> &InVals) const {
22960 SelectionDAG &DAG = CLI.DAG;
22961 SDLoc &DL = CLI.DL;
22963 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
22965 SDValue Chain = CLI.Chain;
22966 SDValue Callee = CLI.Callee;
22967 bool &IsTailCall = CLI.IsTailCall;
22968 CallingConv::ID CallConv = CLI.CallConv;
22969 bool IsVarArg = CLI.IsVarArg;
22970 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22971 MVT XLenVT = Subtarget.getXLenVT();
22972 const CallBase *CB = CLI.CB;
22973
22976
22977 // Set type id for call site info.
22978 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
22979 CSInfo = MachineFunction::CallSiteInfo(*CB);
22980
22981 // Analyze the operands of the call, assigning locations to each operand.
22983 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22984
22985 if (CallConv == CallingConv::GHC) {
22986 if (Subtarget.hasStdExtE())
22987 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22988 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
22989 } else
22990 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
22992 : CC_RISCV);
22993
22994 // Check if it's really possible to do a tail call.
22995 if (IsTailCall)
22996 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
22997
22998 if (IsTailCall)
22999 ++NumTailCalls;
23000 else if (CLI.CB && CLI.CB->isMustTailCall())
23001 reportFatalInternalError("failed to perform tail call elimination on a "
23002 "call site marked musttail");
23003
23004 // Get a count of how many bytes are to be pushed on the stack.
23005 unsigned NumBytes = ArgCCInfo.getStackSize();
23006
23007 // Create local copies for byval args
23008 SmallVector<SDValue, 8> ByValArgs;
23009 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23010 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23011 if (!Flags.isByVal())
23012 continue;
23013
23014 SDValue Arg = OutVals[i];
23015 unsigned Size = Flags.getByValSize();
23016 Align Alignment = Flags.getNonZeroByValAlign();
23017
23018 int FI =
23019 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23020 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23021 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23022
23023 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23024 /*IsVolatile=*/false,
23025 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23027 ByValArgs.push_back(FIPtr);
23028 }
23029
23030 if (!IsTailCall)
23031 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23032
23033 // Copy argument values to their designated locations.
23035 SmallVector<SDValue, 8> MemOpChains;
23036 SDValue StackPtr;
23037 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23038 ++i, ++OutIdx) {
23039 CCValAssign &VA = ArgLocs[i];
23040 SDValue ArgValue = OutVals[OutIdx];
23041 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23042
23043 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23044 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23045 assert(VA.isRegLoc() && "Expected register VA assignment");
23046 assert(VA.needsCustom());
23047 SDValue SplitF64 = DAG.getNode(
23048 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23049 SDValue Lo = SplitF64.getValue(0);
23050 SDValue Hi = SplitF64.getValue(1);
23051
23052 Register RegLo = VA.getLocReg();
23053 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23054
23055 // Get the CCValAssign for the Hi part.
23056 CCValAssign &HiVA = ArgLocs[++i];
23057
23058 if (HiVA.isMemLoc()) {
23059 // Second half of f64 is passed on the stack.
23060 if (!StackPtr.getNode())
23061 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23063 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23064 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23065 // Emit the store.
23066 MemOpChains.push_back(DAG.getStore(
23067 Chain, DL, Hi, Address,
23069 } else {
23070 // Second half of f64 is passed in another GPR.
23071 Register RegHigh = HiVA.getLocReg();
23072 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23073 }
23074 continue;
23075 }
23076
23077 // Promote the value if needed.
23078 // For now, only handle fully promoted and indirect arguments.
23079 if (VA.getLocInfo() == CCValAssign::Indirect) {
23080 // Store the argument in a stack slot and pass its address.
23081 Align StackAlign =
23082 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23083 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23084 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23085 // If the original argument was split (e.g. i128), we need
23086 // to store the required parts of it here (and pass just one address).
23087 // Vectors may be partly split to registers and partly to the stack, in
23088 // which case the base address is partly offset and subsequent stores are
23089 // relative to that.
23090 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23091 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23092 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23093 // Calculate the total size to store. We don't have access to what we're
23094 // actually storing other than performing the loop and collecting the
23095 // info.
23097 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23098 SDValue PartValue = OutVals[OutIdx + 1];
23099 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23100 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23101 EVT PartVT = PartValue.getValueType();
23102 if (PartVT.isScalableVector())
23103 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23104 StoredSize += PartVT.getStoreSize();
23105 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23106 Parts.push_back(std::make_pair(PartValue, Offset));
23107 ++i;
23108 ++OutIdx;
23109 }
23110 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23111 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23112 MemOpChains.push_back(
23113 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23115 for (const auto &Part : Parts) {
23116 SDValue PartValue = Part.first;
23117 SDValue PartOffset = Part.second;
23119 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23120 MemOpChains.push_back(
23121 DAG.getStore(Chain, DL, PartValue, Address,
23123 }
23124 ArgValue = SpillSlot;
23125 } else {
23126 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23127 }
23128
23129 // Use local copy if it is a byval arg.
23130 if (Flags.isByVal())
23131 ArgValue = ByValArgs[j++];
23132
23133 if (VA.isRegLoc()) {
23134 // Queue up the argument copies and emit them at the end.
23135 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23136 } else {
23137 assert(VA.isMemLoc() && "Argument not register or memory");
23138 assert(!IsTailCall && "Tail call not allowed if stack is used "
23139 "for passing parameters");
23140
23141 // Work out the address of the stack slot.
23142 if (!StackPtr.getNode())
23143 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23145 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23147
23148 // Emit the store.
23149 MemOpChains.push_back(
23150 DAG.getStore(Chain, DL, ArgValue, Address,
23152 }
23153 }
23154
23155 // Join the stores, which are independent of one another.
23156 if (!MemOpChains.empty())
23157 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23158
23159 SDValue Glue;
23160
23161 // Build a sequence of copy-to-reg nodes, chained and glued together.
23162 for (auto &Reg : RegsToPass) {
23163 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23164 Glue = Chain.getValue(1);
23165 }
23166
23167 // Validate that none of the argument registers have been marked as
23168 // reserved, if so report an error. Do the same for the return address if this
23169 // is not a tailcall.
23170 validateCCReservedRegs(RegsToPass, MF);
23171 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23173 MF.getFunction(),
23174 "Return address register required, but has been reserved."});
23175
23176 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23177 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23178 // split it and then direct call can be matched by PseudoCALL.
23179 bool CalleeIsLargeExternalSymbol = false;
23181 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23182 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23183 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23184 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23185 CalleeIsLargeExternalSymbol = true;
23186 }
23187 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23188 const GlobalValue *GV = S->getGlobal();
23189 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23190 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23191 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23192 }
23193
23194 // The first call operand is the chain and the second is the target address.
23196 Ops.push_back(Chain);
23197 Ops.push_back(Callee);
23198
23199 // Add argument registers to the end of the list so that they are
23200 // known live into the call.
23201 for (auto &Reg : RegsToPass)
23202 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23203
23204 // Add a register mask operand representing the call-preserved registers.
23205 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23206 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23207 assert(Mask && "Missing call preserved mask for calling convention");
23208 Ops.push_back(DAG.getRegisterMask(Mask));
23209
23210 // Glue the call to the argument copies, if any.
23211 if (Glue.getNode())
23212 Ops.push_back(Glue);
23213
23214 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23215 "Unexpected CFI type for a direct call");
23216
23217 // Emit the call.
23218 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23219
23220 // Use software guarded branch for large code model non-indirect calls
23221 // Tail call to external symbol will have a null CLI.CB and we need another
23222 // way to determine the callsite type
23223 bool NeedSWGuarded = false;
23225 Subtarget.hasStdExtZicfilp() &&
23226 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23227 NeedSWGuarded = true;
23228
23229 if (IsTailCall) {
23231 unsigned CallOpc =
23232 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23233 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23234 if (CLI.CFIType)
23235 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23236 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23237 if (MF.getTarget().Options.EmitCallGraphSection && CB &&
23238 CB->isIndirectCall())
23239 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23240 return Ret;
23241 }
23242
23243 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23244 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23245 if (CLI.CFIType)
23246 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23247
23248 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23249 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23250
23251 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23252 Glue = Chain.getValue(1);
23253
23254 // Mark the end of the call, which is glued to the call itself.
23255 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23256 Glue = Chain.getValue(1);
23257
23258 // Assign locations to each value returned by this call.
23260 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23261 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23262
23263 // Copy all of the result registers out of their specified physreg.
23264 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23265 auto &VA = RVLocs[i];
23266 // Copy the value out
23267 SDValue RetValue =
23268 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23269 // Glue the RetValue to the end of the call sequence
23270 Chain = RetValue.getValue(1);
23271 Glue = RetValue.getValue(2);
23272
23273 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23274 assert(VA.needsCustom());
23275 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23276 MVT::i32, Glue);
23277 Chain = RetValue2.getValue(1);
23278 Glue = RetValue2.getValue(2);
23279 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23280 RetValue2);
23281 } else
23282 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23283
23284 InVals.push_back(RetValue);
23285 }
23286
23287 return Chain;
23288}
23289
23291 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23292 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23293 const Type *RetTy) const {
23295 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23296
23297 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23298 MVT VT = Outs[i].VT;
23299 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23300 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23301 /*IsRet=*/true, Outs[i].OrigTy))
23302 return false;
23303 }
23304 return true;
23305}
23306
23307SDValue
23309 bool IsVarArg,
23311 const SmallVectorImpl<SDValue> &OutVals,
23312 const SDLoc &DL, SelectionDAG &DAG) const {
23314 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23315
23316 // Stores the assignment of the return value to a location.
23318
23319 // Info about the registers and stack slot.
23320 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23321 *DAG.getContext());
23322
23323 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23324 nullptr, CC_RISCV);
23325
23326 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23327 reportFatalUsageError("GHC functions return void only");
23328
23329 SDValue Glue;
23330 SmallVector<SDValue, 4> RetOps(1, Chain);
23331
23332 // Copy the result values into the output registers.
23333 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23334 SDValue Val = OutVals[OutIdx];
23335 CCValAssign &VA = RVLocs[i];
23336 assert(VA.isRegLoc() && "Can only return in registers!");
23337
23338 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23339 // Handle returning f64 on RV32D with a soft float ABI.
23340 assert(VA.isRegLoc() && "Expected return via registers");
23341 assert(VA.needsCustom());
23342 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23343 DAG.getVTList(MVT::i32, MVT::i32), Val);
23344 SDValue Lo = SplitF64.getValue(0);
23345 SDValue Hi = SplitF64.getValue(1);
23346 Register RegLo = VA.getLocReg();
23347 Register RegHi = RVLocs[++i].getLocReg();
23348
23349 if (STI.isRegisterReservedByUser(RegLo) ||
23350 STI.isRegisterReservedByUser(RegHi))
23352 MF.getFunction(),
23353 "Return value register required, but has been reserved."});
23354
23355 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23356 Glue = Chain.getValue(1);
23357 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23358 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23359 Glue = Chain.getValue(1);
23360 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23361 } else {
23362 // Handle a 'normal' return.
23363 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23364 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23365
23366 if (STI.isRegisterReservedByUser(VA.getLocReg()))
23368 MF.getFunction(),
23369 "Return value register required, but has been reserved."});
23370
23371 // Guarantee that all emitted copies are stuck together.
23372 Glue = Chain.getValue(1);
23373 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23374 }
23375 }
23376
23377 RetOps[0] = Chain; // Update chain.
23378
23379 // Add the glue node if we have it.
23380 if (Glue.getNode()) {
23381 RetOps.push_back(Glue);
23382 }
23383
23384 if (any_of(RVLocs,
23385 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23386 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
23387
23388 unsigned RetOpc = RISCVISD::RET_GLUE;
23389 // Interrupt service routines use different return instructions.
23390 const Function &Func = DAG.getMachineFunction().getFunction();
23391 if (Func.hasFnAttribute("interrupt")) {
23392 if (!Func.getReturnType()->isVoidTy())
23394 "Functions with the interrupt attribute must have void return type!");
23395
23397 StringRef Kind =
23398 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23399
23400 if (Kind == "supervisor")
23401 RetOpc = RISCVISD::SRET_GLUE;
23402 else if (Kind == "rnmi") {
23403 assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23404 "Need Smrnmi extension for rnmi");
23405 RetOpc = RISCVISD::MNRET_GLUE;
23406 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23407 assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
23408 "Need Xqciint for qci-(no)nest");
23409 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23410 } else
23411 RetOpc = RISCVISD::MRET_GLUE;
23412 }
23413
23414 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23415}
23416
23417void RISCVTargetLowering::validateCCReservedRegs(
23418 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23419 MachineFunction &MF) const {
23420 const Function &F = MF.getFunction();
23421 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23422
23423 if (llvm::any_of(Regs, [&STI](auto Reg) {
23424 return STI.isRegisterReservedByUser(Reg.first);
23425 }))
23426 F.getContext().diagnose(DiagnosticInfoUnsupported{
23427 F, "Argument register required, but has been reserved."});
23428}
23429
23430// Check if the result of the node is only used as a return value, as
23431// otherwise we can't perform a tail-call.
23433 if (N->getNumValues() != 1)
23434 return false;
23435 if (!N->hasNUsesOfValue(1, 0))
23436 return false;
23437
23438 SDNode *Copy = *N->user_begin();
23439
23440 if (Copy->getOpcode() == ISD::BITCAST) {
23441 return isUsedByReturnOnly(Copy, Chain);
23442 }
23443
23444 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23445 // with soft float ABIs.
23446 if (Copy->getOpcode() != ISD::CopyToReg) {
23447 return false;
23448 }
23449
23450 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23451 // isn't safe to perform a tail call.
23452 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23453 return false;
23454
23455 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23456 bool HasRet = false;
23457 for (SDNode *Node : Copy->users()) {
23458 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23459 return false;
23460 HasRet = true;
23461 }
23462 if (!HasRet)
23463 return false;
23464
23465 Chain = Copy->getOperand(0);
23466 return true;
23467}
23468
23470 return CI->isTailCall();
23471}
23472
23473/// getConstraintType - Given a constraint letter, return the type of
23474/// constraint it is for this target.
23477 if (Constraint.size() == 1) {
23478 switch (Constraint[0]) {
23479 default:
23480 break;
23481 case 'f':
23482 case 'R':
23483 return C_RegisterClass;
23484 case 'I':
23485 case 'J':
23486 case 'K':
23487 return C_Immediate;
23488 case 'A':
23489 return C_Memory;
23490 case 's':
23491 case 'S': // A symbolic address
23492 return C_Other;
23493 }
23494 } else {
23495 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23496 return C_RegisterClass;
23497 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23498 return C_RegisterClass;
23499 }
23500 return TargetLowering::getConstraintType(Constraint);
23501}
23502
23503std::pair<unsigned, const TargetRegisterClass *>
23505 StringRef Constraint,
23506 MVT VT) const {
23507 // First, see if this is a constraint that directly corresponds to a RISC-V
23508 // register class.
23509 if (Constraint.size() == 1) {
23510 switch (Constraint[0]) {
23511 case 'r':
23512 // TODO: Support fixed vectors up to XLen for P extension?
23513 if (VT.isVector())
23514 break;
23515 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23516 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23517 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23518 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23519 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23520 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23521 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23522 case 'f':
23523 if (VT == MVT::f16) {
23524 if (Subtarget.hasStdExtZfhmin())
23525 return std::make_pair(0U, &RISCV::FPR16RegClass);
23526 if (Subtarget.hasStdExtZhinxmin())
23527 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23528 } else if (VT == MVT::f32) {
23529 if (Subtarget.hasStdExtF())
23530 return std::make_pair(0U, &RISCV::FPR32RegClass);
23531 if (Subtarget.hasStdExtZfinx())
23532 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23533 } else if (VT == MVT::f64) {
23534 if (Subtarget.hasStdExtD())
23535 return std::make_pair(0U, &RISCV::FPR64RegClass);
23536 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23537 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23538 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23539 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23540 }
23541 break;
23542 case 'R':
23543 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23544 (VT == MVT::i128 && Subtarget.is64Bit()))
23545 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23546 break;
23547 default:
23548 break;
23549 }
23550 } else if (Constraint == "vr") {
23551 for (const auto *RC :
23552 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23553 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23554 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23555 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23556 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23557 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23558 &RISCV::VRN2M4RegClass}) {
23559 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23560 return std::make_pair(0U, RC);
23561
23562 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23563 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23564 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23565 return std::make_pair(0U, RC);
23566 }
23567 }
23568 } else if (Constraint == "vd") {
23569 for (const auto *RC :
23570 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23571 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23572 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23573 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23574 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23575 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23576 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23577 &RISCV::VRN2M4NoV0RegClass}) {
23578 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23579 return std::make_pair(0U, RC);
23580
23581 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23582 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23583 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23584 return std::make_pair(0U, RC);
23585 }
23586 }
23587 } else if (Constraint == "vm") {
23588 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23589 return std::make_pair(0U, &RISCV::VMV0RegClass);
23590
23591 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23592 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23593 // VT here might be coerced to vector with i8 elements, so we need to
23594 // check if this is a M1 register here instead of checking VMV0RegClass.
23595 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23596 return std::make_pair(0U, &RISCV::VMV0RegClass);
23597 }
23598 } else if (Constraint == "cr") {
23599 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23600 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23601 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23602 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23603 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23604 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23605 if (!VT.isVector())
23606 return std::make_pair(0U, &RISCV::GPRCRegClass);
23607 } else if (Constraint == "cR") {
23608 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23609 (VT == MVT::i128 && Subtarget.is64Bit()))
23610 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23611 } else if (Constraint == "cf") {
23612 if (VT == MVT::f16) {
23613 if (Subtarget.hasStdExtZfhmin())
23614 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23615 if (Subtarget.hasStdExtZhinxmin())
23616 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23617 } else if (VT == MVT::f32) {
23618 if (Subtarget.hasStdExtF())
23619 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23620 if (Subtarget.hasStdExtZfinx())
23621 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23622 } else if (VT == MVT::f64) {
23623 if (Subtarget.hasStdExtD())
23624 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23625 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23626 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23627 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23628 return std::make_pair(0U, &RISCV::GPRCRegClass);
23629 }
23630 }
23631
23632 // Clang will correctly decode the usage of register name aliases into their
23633 // official names. However, other frontends like `rustc` do not. This allows
23634 // users of these frontends to use the ABI names for registers in LLVM-style
23635 // register constraints.
23636 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23637 .Case("{zero}", RISCV::X0)
23638 .Case("{ra}", RISCV::X1)
23639 .Case("{sp}", RISCV::X2)
23640 .Case("{gp}", RISCV::X3)
23641 .Case("{tp}", RISCV::X4)
23642 .Case("{t0}", RISCV::X5)
23643 .Case("{t1}", RISCV::X6)
23644 .Case("{t2}", RISCV::X7)
23645 .Cases("{s0}", "{fp}", RISCV::X8)
23646 .Case("{s1}", RISCV::X9)
23647 .Case("{a0}", RISCV::X10)
23648 .Case("{a1}", RISCV::X11)
23649 .Case("{a2}", RISCV::X12)
23650 .Case("{a3}", RISCV::X13)
23651 .Case("{a4}", RISCV::X14)
23652 .Case("{a5}", RISCV::X15)
23653 .Case("{a6}", RISCV::X16)
23654 .Case("{a7}", RISCV::X17)
23655 .Case("{s2}", RISCV::X18)
23656 .Case("{s3}", RISCV::X19)
23657 .Case("{s4}", RISCV::X20)
23658 .Case("{s5}", RISCV::X21)
23659 .Case("{s6}", RISCV::X22)
23660 .Case("{s7}", RISCV::X23)
23661 .Case("{s8}", RISCV::X24)
23662 .Case("{s9}", RISCV::X25)
23663 .Case("{s10}", RISCV::X26)
23664 .Case("{s11}", RISCV::X27)
23665 .Case("{t3}", RISCV::X28)
23666 .Case("{t4}", RISCV::X29)
23667 .Case("{t5}", RISCV::X30)
23668 .Case("{t6}", RISCV::X31)
23669 .Default(RISCV::NoRegister);
23670 if (XRegFromAlias != RISCV::NoRegister)
23671 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23672
23673 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23674 // TableGen record rather than the AsmName to choose registers for InlineAsm
23675 // constraints, plus we want to match those names to the widest floating point
23676 // register type available, manually select floating point registers here.
23677 //
23678 // The second case is the ABI name of the register, so that frontends can also
23679 // use the ABI names in register constraint lists.
23680 if (Subtarget.hasStdExtF()) {
23681 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23682 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23683 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23684 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23685 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23686 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23687 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23688 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23689 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23690 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23691 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23692 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23693 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23694 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23695 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23696 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23697 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23698 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23699 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23700 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23701 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23702 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23703 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23704 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23705 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23706 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23707 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23708 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23709 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23710 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23711 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23712 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23713 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23714 .Default(RISCV::NoRegister);
23715 if (FReg != RISCV::NoRegister) {
23716 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23717 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23718 unsigned RegNo = FReg - RISCV::F0_F;
23719 unsigned DReg = RISCV::F0_D + RegNo;
23720 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23721 }
23722 if (VT == MVT::f32 || VT == MVT::Other)
23723 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23724 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23725 unsigned RegNo = FReg - RISCV::F0_F;
23726 unsigned HReg = RISCV::F0_H + RegNo;
23727 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23728 }
23729 }
23730 }
23731
23732 if (Subtarget.hasVInstructions()) {
23733 Register VReg = StringSwitch<Register>(Constraint.lower())
23734 .Case("{v0}", RISCV::V0)
23735 .Case("{v1}", RISCV::V1)
23736 .Case("{v2}", RISCV::V2)
23737 .Case("{v3}", RISCV::V3)
23738 .Case("{v4}", RISCV::V4)
23739 .Case("{v5}", RISCV::V5)
23740 .Case("{v6}", RISCV::V6)
23741 .Case("{v7}", RISCV::V7)
23742 .Case("{v8}", RISCV::V8)
23743 .Case("{v9}", RISCV::V9)
23744 .Case("{v10}", RISCV::V10)
23745 .Case("{v11}", RISCV::V11)
23746 .Case("{v12}", RISCV::V12)
23747 .Case("{v13}", RISCV::V13)
23748 .Case("{v14}", RISCV::V14)
23749 .Case("{v15}", RISCV::V15)
23750 .Case("{v16}", RISCV::V16)
23751 .Case("{v17}", RISCV::V17)
23752 .Case("{v18}", RISCV::V18)
23753 .Case("{v19}", RISCV::V19)
23754 .Case("{v20}", RISCV::V20)
23755 .Case("{v21}", RISCV::V21)
23756 .Case("{v22}", RISCV::V22)
23757 .Case("{v23}", RISCV::V23)
23758 .Case("{v24}", RISCV::V24)
23759 .Case("{v25}", RISCV::V25)
23760 .Case("{v26}", RISCV::V26)
23761 .Case("{v27}", RISCV::V27)
23762 .Case("{v28}", RISCV::V28)
23763 .Case("{v29}", RISCV::V29)
23764 .Case("{v30}", RISCV::V30)
23765 .Case("{v31}", RISCV::V31)
23766 .Default(RISCV::NoRegister);
23767 if (VReg != RISCV::NoRegister) {
23768 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23769 return std::make_pair(VReg, &RISCV::VMRegClass);
23770 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23771 return std::make_pair(VReg, &RISCV::VRRegClass);
23772 for (const auto *RC :
23773 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23774 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23775 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23776 return std::make_pair(VReg, RC);
23777 }
23778 }
23779 }
23780 }
23781
23782 std::pair<Register, const TargetRegisterClass *> Res =
23784
23785 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23786 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23787 // Subtarget into account.
23788 if (Res.second == &RISCV::GPRF16RegClass ||
23789 Res.second == &RISCV::GPRF32RegClass ||
23790 Res.second == &RISCV::GPRPairRegClass)
23791 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23792
23793 return Res;
23794}
23795
23798 // Currently only support length 1 constraints.
23799 if (ConstraintCode.size() == 1) {
23800 switch (ConstraintCode[0]) {
23801 case 'A':
23803 default:
23804 break;
23805 }
23806 }
23807
23808 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23809}
23810
23812 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23813 SelectionDAG &DAG) const {
23814 // Currently only support length 1 constraints.
23815 if (Constraint.size() == 1) {
23816 switch (Constraint[0]) {
23817 case 'I':
23818 // Validate & create a 12-bit signed immediate operand.
23819 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23820 uint64_t CVal = C->getSExtValue();
23821 if (isInt<12>(CVal))
23822 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23823 Subtarget.getXLenVT()));
23824 }
23825 return;
23826 case 'J':
23827 // Validate & create an integer zero operand.
23828 if (isNullConstant(Op))
23829 Ops.push_back(
23830 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23831 return;
23832 case 'K':
23833 // Validate & create a 5-bit unsigned immediate operand.
23834 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23835 uint64_t CVal = C->getZExtValue();
23836 if (isUInt<5>(CVal))
23837 Ops.push_back(
23838 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
23839 }
23840 return;
23841 case 'S':
23843 return;
23844 default:
23845 break;
23846 }
23847 }
23848 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
23849}
23850
23852 Instruction *Inst,
23853 AtomicOrdering Ord) const {
23854 if (Subtarget.hasStdExtZtso()) {
23855 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23856 return Builder.CreateFence(Ord);
23857 return nullptr;
23858 }
23859
23860 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23861 return Builder.CreateFence(Ord);
23862 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
23863 return Builder.CreateFence(AtomicOrdering::Release);
23864 return nullptr;
23865}
23866
23868 Instruction *Inst,
23869 AtomicOrdering Ord) const {
23870 if (Subtarget.hasStdExtZtso()) {
23871 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23872 return Builder.CreateFence(Ord);
23873 return nullptr;
23874 }
23875
23876 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
23877 return Builder.CreateFence(AtomicOrdering::Acquire);
23878 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
23881 return nullptr;
23882}
23883
23886 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
23887 // point operations can't be used in an lr/sc sequence without breaking the
23888 // forward-progress guarantee.
23889 if (AI->isFloatingPointOperation() ||
23895
23896 // Don't expand forced atomics, we want to have __sync libcalls instead.
23897 if (Subtarget.hasForcedAtomics())
23899
23900 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
23901 if (AI->getOperation() == AtomicRMWInst::Nand) {
23902 if (Subtarget.hasStdExtZacas() &&
23903 (Size >= 32 || Subtarget.hasStdExtZabha()))
23905 if (Size < 32)
23907 }
23908
23909 if (Size < 32 && !Subtarget.hasStdExtZabha())
23911
23913}
23914
23915static Intrinsic::ID
23917 switch (BinOp) {
23918 default:
23919 llvm_unreachable("Unexpected AtomicRMW BinOp");
23921 return Intrinsic::riscv_masked_atomicrmw_xchg;
23922 case AtomicRMWInst::Add:
23923 return Intrinsic::riscv_masked_atomicrmw_add;
23924 case AtomicRMWInst::Sub:
23925 return Intrinsic::riscv_masked_atomicrmw_sub;
23927 return Intrinsic::riscv_masked_atomicrmw_nand;
23928 case AtomicRMWInst::Max:
23929 return Intrinsic::riscv_masked_atomicrmw_max;
23930 case AtomicRMWInst::Min:
23931 return Intrinsic::riscv_masked_atomicrmw_min;
23933 return Intrinsic::riscv_masked_atomicrmw_umax;
23935 return Intrinsic::riscv_masked_atomicrmw_umin;
23936 }
23937}
23938
23940 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
23941 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
23942 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
23943 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
23944 // mask, as this produces better code than the LR/SC loop emitted by
23945 // int_riscv_masked_atomicrmw_xchg.
23946 if (AI->getOperation() == AtomicRMWInst::Xchg &&
23947 isa<ConstantInt>(AI->getValOperand())) {
23948 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
23949 if (CVal->isZero())
23950 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
23951 Builder.CreateNot(Mask, "Inv_Mask"),
23952 AI->getAlign(), Ord);
23953 if (CVal->isMinusOne())
23954 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
23955 AI->getAlign(), Ord);
23956 }
23957
23958 unsigned XLen = Subtarget.getXLen();
23959 Value *Ordering =
23960 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
23961 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
23963 AI->getModule(),
23965
23966 if (XLen == 64) {
23967 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
23968 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
23969 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
23970 }
23971
23972 Value *Result;
23973
23974 // Must pass the shift amount needed to sign extend the loaded value prior
23975 // to performing a signed comparison for min/max. ShiftAmt is the number of
23976 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
23977 // is the number of bits to left+right shift the value in order to
23978 // sign-extend.
23979 if (AI->getOperation() == AtomicRMWInst::Min ||
23981 const DataLayout &DL = AI->getDataLayout();
23982 unsigned ValWidth =
23983 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
23984 Value *SextShamt =
23985 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
23986 Result = Builder.CreateCall(LrwOpScwLoop,
23987 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
23988 } else {
23989 Result =
23990 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
23991 }
23992
23993 if (XLen == 64)
23994 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
23995 return Result;
23996}
23997
24000 AtomicCmpXchgInst *CI) const {
24001 // Don't expand forced atomics, we want to have __sync libcalls instead.
24002 if (Subtarget.hasForcedAtomics())
24004
24006 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24007 (Size == 8 || Size == 16))
24010}
24011
24013 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24014 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24015 unsigned XLen = Subtarget.getXLen();
24016 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24017 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24018 if (XLen == 64) {
24019 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24020 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24021 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24022 }
24023 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24024 Value *Result = Builder.CreateIntrinsic(
24025 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24026 if (XLen == 64)
24027 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24028 return Result;
24029}
24030
24032 EVT DataVT) const {
24033 // We have indexed loads for all supported EEW types. Indices are always
24034 // zero extended.
24035 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24036 isTypeLegal(Extend.getValueType()) &&
24037 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24038 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24039}
24040
24042 EVT VT) const {
24043 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24044 return false;
24045
24046 switch (FPVT.getSimpleVT().SimpleTy) {
24047 case MVT::f16:
24048 return Subtarget.hasStdExtZfhmin();
24049 case MVT::f32:
24050 return Subtarget.hasStdExtF();
24051 case MVT::f64:
24052 return Subtarget.hasStdExtD();
24053 default:
24054 return false;
24055 }
24056}
24057
24059 // If we are using the small code model, we can reduce size of jump table
24060 // entry to 4 bytes.
24061 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24064 }
24066}
24067
24069 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24070 unsigned uid, MCContext &Ctx) const {
24071 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24073 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24074}
24075
24077 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24078 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24079 // a power of two as well.
24080 // FIXME: This doesn't work for zve32, but that's already broken
24081 // elsewhere for the same reason.
24082 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24083 static_assert(RISCV::RVVBitsPerBlock == 64,
24084 "RVVBitsPerBlock changed, audit needed");
24085 return true;
24086}
24087
24089 SDValue &Offset,
24091 SelectionDAG &DAG) const {
24092 // Target does not support indexed loads.
24093 if (!Subtarget.hasVendorXTHeadMemIdx())
24094 return false;
24095
24096 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24097 return false;
24098
24099 Base = Op->getOperand(0);
24100 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24101 int64_t RHSC = RHS->getSExtValue();
24102 if (Op->getOpcode() == ISD::SUB)
24103 RHSC = -(uint64_t)RHSC;
24104
24105 // The constants that can be encoded in the THeadMemIdx instructions
24106 // are of the form (sign_extend(imm5) << imm2).
24107 bool isLegalIndexedOffset = false;
24108 for (unsigned i = 0; i < 4; i++)
24109 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24110 isLegalIndexedOffset = true;
24111 break;
24112 }
24113
24114 if (!isLegalIndexedOffset)
24115 return false;
24116
24117 Offset = Op->getOperand(1);
24118 return true;
24119 }
24120
24121 return false;
24122}
24123
24125 SDValue &Offset,
24127 SelectionDAG &DAG) const {
24128 EVT VT;
24129 SDValue Ptr;
24130 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24131 VT = LD->getMemoryVT();
24132 Ptr = LD->getBasePtr();
24133 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24134 VT = ST->getMemoryVT();
24135 Ptr = ST->getBasePtr();
24136 } else
24137 return false;
24138
24139 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24140 return false;
24141
24142 AM = ISD::PRE_INC;
24143 return true;
24144}
24145
24147 SDValue &Base,
24148 SDValue &Offset,
24150 SelectionDAG &DAG) const {
24151 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24152 if (Op->getOpcode() != ISD::ADD)
24153 return false;
24154
24155 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
24156 Base = LS->getBasePtr();
24157 else
24158 return false;
24159
24160 if (Base == Op->getOperand(0))
24161 Offset = Op->getOperand(1);
24162 else if (Base == Op->getOperand(1))
24163 Offset = Op->getOperand(0);
24164 else
24165 return false;
24166
24167 AM = ISD::POST_INC;
24168 return true;
24169 }
24170
24171 EVT VT;
24172 SDValue Ptr;
24173 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24174 VT = LD->getMemoryVT();
24175 Ptr = LD->getBasePtr();
24176 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24177 VT = ST->getMemoryVT();
24178 Ptr = ST->getBasePtr();
24179 } else
24180 return false;
24181
24182 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24183 return false;
24184 // Post-indexing updates the base, so it's not a valid transform
24185 // if that's not the same as the load's pointer.
24186 if (Ptr != Base)
24187 return false;
24188
24189 AM = ISD::POST_INC;
24190 return true;
24191}
24192
24194 EVT VT) const {
24195 EVT SVT = VT.getScalarType();
24196
24197 if (!SVT.isSimple())
24198 return false;
24199
24200 switch (SVT.getSimpleVT().SimpleTy) {
24201 case MVT::f16:
24202 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24203 : Subtarget.hasStdExtZfhOrZhinx();
24204 case MVT::f32:
24205 return Subtarget.hasStdExtFOrZfinx();
24206 case MVT::f64:
24207 return Subtarget.hasStdExtDOrZdinx();
24208 default:
24209 break;
24210 }
24211
24212 return false;
24213}
24214
24216 // Zacas will use amocas.w which does not require extension.
24217 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24218}
24219
24221 const Constant *PersonalityFn) const {
24222 return RISCV::X10;
24223}
24224
24226 const Constant *PersonalityFn) const {
24227 return RISCV::X11;
24228}
24229
24231 // Return false to suppress the unnecessary extensions if the LibCall
24232 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24233 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24234 Type.getSizeInBits() < Subtarget.getXLen()))
24235 return false;
24236
24237 return true;
24238}
24239
24241 bool IsSigned) const {
24242 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24243 return true;
24244
24245 return IsSigned;
24246}
24247
24249 SDValue C) const {
24250 // Check integral scalar types.
24251 if (!VT.isScalarInteger())
24252 return false;
24253
24254 // Omit the optimization if the sub target has the M extension and the data
24255 // size exceeds XLen.
24256 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24257 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24258 return false;
24259
24260 auto *ConstNode = cast<ConstantSDNode>(C);
24261 const APInt &Imm = ConstNode->getAPIntValue();
24262
24263 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24264 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24265 return false;
24266
24267 // Break the MUL to a SLLI and an ADD/SUB.
24268 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24269 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24270 return true;
24271
24272 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24273 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
24274 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24275 (Imm - 8).isPowerOf2()))
24276 return true;
24277
24278 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24279 // a pair of LUI/ADDI.
24280 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24281 ConstNode->hasOneUse()) {
24282 APInt ImmS = Imm.ashr(Imm.countr_zero());
24283 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24284 (1 - ImmS).isPowerOf2())
24285 return true;
24286 }
24287
24288 return false;
24289}
24290
24292 SDValue ConstNode) const {
24293 // Let the DAGCombiner decide for vectors.
24294 EVT VT = AddNode.getValueType();
24295 if (VT.isVector())
24296 return true;
24297
24298 // Let the DAGCombiner decide for larger types.
24299 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24300 return true;
24301
24302 // It is worse if c1 is simm12 while c1*c2 is not.
24303 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24304 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24305 const APInt &C1 = C1Node->getAPIntValue();
24306 const APInt &C2 = C2Node->getAPIntValue();
24307 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24308 return false;
24309
24310 // Default to true and let the DAGCombiner decide.
24311 return true;
24312}
24313
24315 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24316 unsigned *Fast) const {
24317 if (!VT.isVector()) {
24318 if (Fast)
24319 *Fast = Subtarget.enableUnalignedScalarMem();
24320 return Subtarget.enableUnalignedScalarMem();
24321 }
24322
24323 // All vector implementations must support element alignment
24324 EVT ElemVT = VT.getVectorElementType();
24325 if (Alignment >= ElemVT.getStoreSize()) {
24326 if (Fast)
24327 *Fast = 1;
24328 return true;
24329 }
24330
24331 // Note: We lower an unmasked unaligned vector access to an equally sized
24332 // e8 element type access. Given this, we effectively support all unmasked
24333 // misaligned accesses. TODO: Work through the codegen implications of
24334 // allowing such accesses to be formed, and considered fast.
24335 if (Fast)
24336 *Fast = Subtarget.enableUnalignedVectorMem();
24337 return Subtarget.enableUnalignedVectorMem();
24338}
24339
24341 LLVMContext &Context, const MemOp &Op,
24342 const AttributeList &FuncAttributes) const {
24343 if (!Subtarget.hasVInstructions())
24344 return MVT::Other;
24345
24346 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24347 return MVT::Other;
24348
24349 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24350 // has an expansion threshold, and we want the number of hardware memory
24351 // operations to correspond roughly to that threshold. LMUL>1 operations
24352 // are typically expanded linearly internally, and thus correspond to more
24353 // than one actual memory operation. Note that store merging and load
24354 // combining will typically form larger LMUL operations from the LMUL1
24355 // operations emitted here, and that's okay because combining isn't
24356 // introducing new memory operations; it's just merging existing ones.
24357 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24358 const unsigned MinVLenInBytes =
24359 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24360
24361 if (Op.size() < MinVLenInBytes)
24362 // TODO: Figure out short memops. For the moment, do the default thing
24363 // which ends up using scalar sequences.
24364 return MVT::Other;
24365
24366 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24367 // fixed vectors.
24368 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24369 return MVT::Other;
24370
24371 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24372 // a large scalar constant and instead use vmv.v.x/i to do the
24373 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24374 // maximize the chance we can encode the size in the vsetvli.
24375 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24376 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24377
24378 // Do we have sufficient alignment for our preferred VT? If not, revert
24379 // to largest size allowed by our alignment criteria.
24380 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24381 Align RequiredAlign(PreferredVT.getStoreSize());
24382 if (Op.isFixedDstAlign())
24383 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24384 if (Op.isMemcpy())
24385 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24386 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24387 }
24388 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24389}
24390
24392 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24393 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24394 bool IsABIRegCopy = CC.has_value();
24395 EVT ValueVT = Val.getValueType();
24396
24397 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24398 if ((ValueVT == PairVT ||
24399 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24400 ValueVT == MVT::f64)) &&
24401 NumParts == 1 && PartVT == MVT::Untyped) {
24402 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24403 MVT XLenVT = Subtarget.getXLenVT();
24404 if (ValueVT == MVT::f64)
24405 Val = DAG.getBitcast(MVT::i64, Val);
24406 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24407 // Always creating an MVT::Untyped part, so always use
24408 // RISCVISD::BuildGPRPair.
24409 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24410 return true;
24411 }
24412
24413 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24414 PartVT == MVT::f32) {
24415 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24416 // nan, and cast to f32.
24417 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24418 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24419 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24420 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24421 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24422 Parts[0] = Val;
24423 return true;
24424 }
24425
24426 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24427#ifndef NDEBUG
24428 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24429 [[maybe_unused]] unsigned ValLMUL =
24431 ValNF * RISCV::RVVBitsPerBlock);
24432 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24433 [[maybe_unused]] unsigned PartLMUL =
24435 PartNF * RISCV::RVVBitsPerBlock);
24436 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24437 "RISC-V vector tuple type only accepts same register class type "
24438 "TUPLE_INSERT");
24439#endif
24440
24441 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24442 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24443 Parts[0] = Val;
24444 return true;
24445 }
24446
24447 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24448 PartVT.isScalableVector()) {
24449 if (ValueVT.isFixedLengthVector()) {
24450 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24451 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24452 }
24453 LLVMContext &Context = *DAG.getContext();
24454 EVT ValueEltVT = ValueVT.getVectorElementType();
24455 EVT PartEltVT = PartVT.getVectorElementType();
24456 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24457 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24458 if (PartVTBitSize % ValueVTBitSize == 0) {
24459 assert(PartVTBitSize >= ValueVTBitSize);
24460 // If the element types are different, bitcast to the same element type of
24461 // PartVT first.
24462 // Give an example here, we want copy a <vscale x 1 x i8> value to
24463 // <vscale x 4 x i16>.
24464 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24465 // subvector, then we can bitcast to <vscale x 4 x i16>.
24466 if (ValueEltVT != PartEltVT) {
24467 if (PartVTBitSize > ValueVTBitSize) {
24468 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24469 assert(Count != 0 && "The number of element should not be zero.");
24470 EVT SameEltTypeVT =
24471 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24472 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24473 }
24474 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24475 } else {
24476 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24477 }
24478 Parts[0] = Val;
24479 return true;
24480 }
24481 }
24482
24483 return false;
24484}
24485
24487 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24488 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24489 bool IsABIRegCopy = CC.has_value();
24490
24491 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24492 if ((ValueVT == PairVT ||
24493 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24494 ValueVT == MVT::f64)) &&
24495 NumParts == 1 && PartVT == MVT::Untyped) {
24496 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24497 MVT XLenVT = Subtarget.getXLenVT();
24498
24499 SDValue Val = Parts[0];
24500 // Always starting with an MVT::Untyped part, so always use
24501 // RISCVISD::SplitGPRPair
24502 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24503 Val);
24504 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24505 Val.getValue(1));
24506 if (ValueVT == MVT::f64)
24507 Val = DAG.getBitcast(ValueVT, Val);
24508 return Val;
24509 }
24510
24511 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24512 PartVT == MVT::f32) {
24513 SDValue Val = Parts[0];
24514
24515 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24516 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24517 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24518 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24519 return Val;
24520 }
24521
24522 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24523 PartVT.isScalableVector()) {
24524 LLVMContext &Context = *DAG.getContext();
24525 SDValue Val = Parts[0];
24526 EVT ValueEltVT = ValueVT.getVectorElementType();
24527 EVT PartEltVT = PartVT.getVectorElementType();
24528 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24529 if (ValueVT.isFixedLengthVector())
24530 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24531 .getSizeInBits()
24533 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24534 if (PartVTBitSize % ValueVTBitSize == 0) {
24535 assert(PartVTBitSize >= ValueVTBitSize);
24536 EVT SameEltTypeVT = ValueVT;
24537 // If the element types are different, convert it to the same element type
24538 // of PartVT.
24539 // Give an example here, we want copy a <vscale x 1 x i8> value from
24540 // <vscale x 4 x i16>.
24541 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24542 // then we can extract <vscale x 1 x i8>.
24543 if (ValueEltVT != PartEltVT) {
24544 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24545 assert(Count != 0 && "The number of element should not be zero.");
24546 SameEltTypeVT =
24547 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24548 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24549 }
24550 if (ValueVT.isFixedLengthVector())
24551 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24552 else
24553 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24554 return Val;
24555 }
24556 }
24557 return SDValue();
24558}
24559
24561 // When aggressively optimizing for code size, we prefer to use a div
24562 // instruction, as it is usually smaller than the alternative sequence.
24563 // TODO: Add vector division?
24564 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24565 return OptSize && !VT.isVector();
24566}
24567
24569 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24570 // some situation.
24571 unsigned Opc = N->getOpcode();
24573 return false;
24574 return true;
24575}
24576
24577static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24578 Module *M = IRB.GetInsertBlock()->getModule();
24579 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24580 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24581 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24582 IRB.CreateCall(ThreadPointerFunc), Offset);
24583}
24584
24586 // Fuchsia provides a fixed TLS slot for the stack cookie.
24587 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24588 if (Subtarget.isTargetFuchsia())
24589 return useTpOffset(IRB, -0x10);
24590
24591 // Android provides a fixed TLS slot for the stack cookie. See the definition
24592 // of TLS_SLOT_STACK_GUARD in
24593 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24594 if (Subtarget.isTargetAndroid())
24595 return useTpOffset(IRB, -0x18);
24596
24597 Module *M = IRB.GetInsertBlock()->getModule();
24598
24599 if (M->getStackProtectorGuard() == "tls") {
24600 // Users must specify the offset explicitly
24601 int Offset = M->getStackProtectorGuardOffset();
24602 return useTpOffset(IRB, Offset);
24603 }
24604
24606}
24607
24609 Align Alignment) const {
24610 if (!Subtarget.hasVInstructions())
24611 return false;
24612
24613 // Only support fixed vectors if we know the minimum vector size.
24614 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24615 return false;
24616
24617 EVT ScalarType = DataType.getScalarType();
24618 if (!isLegalElementTypeForRVV(ScalarType))
24619 return false;
24620
24621 if (!Subtarget.enableUnalignedVectorMem() &&
24622 Alignment < ScalarType.getStoreSize())
24623 return false;
24624
24625 return true;
24626}
24627
24631 const TargetInstrInfo *TII) const {
24632 assert(MBBI->isCall() && MBBI->getCFIType() &&
24633 "Invalid call instruction for a KCFI check");
24634 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24635 MBBI->getOpcode()));
24636
24637 MachineOperand &Target = MBBI->getOperand(0);
24638 Target.setIsRenamable(false);
24639
24640 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24641 .addReg(Target.getReg())
24642 .addImm(MBBI->getCFIType())
24643 .getInstr();
24644}
24645
24646#define GET_REGISTER_MATCHER
24647#include "RISCVGenAsmMatcher.inc"
24648
24651 const MachineFunction &MF) const {
24653 if (!Reg)
24655 if (!Reg)
24656 return Reg;
24657
24658 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24659 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24660 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24661 StringRef(RegName) + "\"."));
24662 return Reg;
24663}
24664
24667 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24668
24669 if (NontemporalInfo == nullptr)
24671
24672 // 1 for default value work as __RISCV_NTLH_ALL
24673 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24674 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24675 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24676 // 5 -> __RISCV_NTLH_ALL
24677 int NontemporalLevel = 5;
24678 const MDNode *RISCVNontemporalInfo =
24679 I.getMetadata("riscv-nontemporal-domain");
24680 if (RISCVNontemporalInfo != nullptr)
24681 NontemporalLevel =
24682 cast<ConstantInt>(
24683 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24684 ->getValue())
24685 ->getZExtValue();
24686
24687 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24688 "RISC-V target doesn't support this non-temporal domain.");
24689
24690 NontemporalLevel -= 2;
24692 if (NontemporalLevel & 0b1)
24693 Flags |= MONontemporalBit0;
24694 if (NontemporalLevel & 0b10)
24695 Flags |= MONontemporalBit1;
24696
24697 return Flags;
24698}
24699
24702
24703 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24705 TargetFlags |= (NodeFlags & MONontemporalBit0);
24706 TargetFlags |= (NodeFlags & MONontemporalBit1);
24707 return TargetFlags;
24708}
24709
24711 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24712 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24713}
24714
24716 if (VT.isScalableVector())
24717 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
24718 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
24719 return true;
24720 return Subtarget.hasStdExtZbb() &&
24721 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
24722}
24723
24725 ISD::CondCode Cond) const {
24726 return isCtpopFast(VT) ? 0 : 1;
24727}
24728
24730 const Instruction *I) const {
24731 if (Subtarget.hasStdExtZalasr()) {
24732 if (Subtarget.hasStdExtZtso()) {
24733 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24734 // should be lowered to plain load/store. The easiest way to do this is
24735 // to say we should insert fences for them, and the fence insertion code
24736 // will just not insert any fences
24737 auto *LI = dyn_cast<LoadInst>(I);
24738 auto *SI = dyn_cast<StoreInst>(I);
24739 if ((LI &&
24740 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24741 (SI &&
24742 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24743 // Here, this is a load or store which is seq_cst, and needs a .aq or
24744 // .rl therefore we shouldn't try to insert fences
24745 return false;
24746 }
24747 // Here, we are a TSO inst that isn't a seq_cst load/store
24748 return isa<LoadInst>(I) || isa<StoreInst>(I);
24749 }
24750 return false;
24751 }
24752 // Note that one specific case requires fence insertion for an
24753 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24754 // than this hook due to limitations in the interface here.
24755 return isa<LoadInst>(I) || isa<StoreInst>(I);
24756}
24757
24759
24760 // GISel support is in progress or complete for these opcodes.
24761 unsigned Op = Inst.getOpcode();
24762 if (Op == Instruction::Add || Op == Instruction::Sub ||
24763 Op == Instruction::And || Op == Instruction::Or ||
24764 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24765 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24766 Op == Instruction::Freeze || Op == Instruction::Store)
24767 return false;
24768
24769 if (Inst.getType()->isScalableTy())
24770 return true;
24771
24772 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24773 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24774 !isa<ReturnInst>(&Inst))
24775 return true;
24776
24777 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24778 if (AI->getAllocatedType()->isScalableTy())
24779 return true;
24780 }
24781
24782 return false;
24783}
24784
24785SDValue
24786RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24787 SelectionDAG &DAG,
24788 SmallVectorImpl<SDNode *> &Created) const {
24790 if (isIntDivCheap(N->getValueType(0), Attr))
24791 return SDValue(N, 0); // Lower SDIV as SDIV
24792
24793 // Only perform this transform if short forward branch opt is supported.
24794 if (!Subtarget.hasShortForwardBranchOpt())
24795 return SDValue();
24796 EVT VT = N->getValueType(0);
24797 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24798 return SDValue();
24799
24800 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24801 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24802 return SDValue();
24803 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24804}
24805
24806bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24807 EVT VT, const APInt &AndMask) const {
24808 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
24809 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
24811}
24812
24813unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
24814 return Subtarget.getMinimumJumpTableEntries();
24815}
24816
24819 int JTI,
24820 SelectionDAG &DAG) const {
24821 if (Subtarget.hasStdExtZicfilp()) {
24822 // When Zicfilp enabled, we need to use software guarded branch for jump
24823 // table branch.
24824 SDValue Chain = Value;
24825 // Jump table debug info is only needed if CodeView is enabled.
24827 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
24828 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
24829 }
24830 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24831}
24832
24833// If an output pattern produces multiple instructions tablegen may pick an
24834// arbitrary type from an instructions destination register class to use for the
24835// VT of that MachineSDNode. This VT may be used to look up the representative
24836// register class. If the type isn't legal, the default implementation will
24837// not find a register class.
24838//
24839// Some integer types smaller than XLen are listed in the GPR register class to
24840// support isel patterns for GISel, but are not legal in SelectionDAG. The
24841// arbitrary type tablegen picks may be one of these smaller types.
24842//
24843// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24844// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24845std::pair<const TargetRegisterClass *, uint8_t>
24846RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24847 MVT VT) const {
24848 switch (VT.SimpleTy) {
24849 default:
24850 break;
24851 case MVT::i8:
24852 case MVT::i16:
24853 case MVT::i32:
24855 case MVT::bf16:
24856 case MVT::f16:
24858 }
24859
24861}
24862
24864
24865#define GET_RISCVVIntrinsicsTable_IMPL
24866#include "RISCVGenSearchableTables.inc"
24867
24868} // namespace llvm::RISCVVIntrinsicsTable
24869
24871
24872 // If the function specifically requests inline stack probes, emit them.
24873 if (MF.getFunction().hasFnAttribute("probe-stack"))
24874 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
24875 "inline-asm";
24876
24877 return false;
24878}
24879
24881 Align StackAlign) const {
24882 // The default stack probe size is 4096 if the function has no
24883 // stack-probe-size attribute.
24884 const Function &Fn = MF.getFunction();
24885 unsigned StackProbeSize =
24886 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
24887 // Round down to the stack alignment.
24888 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
24889 return StackProbeSize ? StackProbeSize : StackAlign.value();
24890}
24891
24892SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
24893 SelectionDAG &DAG) const {
24895 if (!hasInlineStackProbe(MF))
24896 return SDValue();
24897
24898 MVT XLenVT = Subtarget.getXLenVT();
24899 // Get the inputs.
24900 SDValue Chain = Op.getOperand(0);
24901 SDValue Size = Op.getOperand(1);
24902
24904 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
24905 SDLoc dl(Op);
24906 EVT VT = Op.getValueType();
24907
24908 // Construct the new SP value in a GPR.
24909 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
24910 Chain = SP.getValue(1);
24911 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
24912 if (Align)
24913 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
24914 DAG.getSignedConstant(-Align->value(), dl, VT));
24915
24916 // Set the real SP to the new value with a probing loop.
24917 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
24918 return DAG.getMergeValues({SP, Chain}, dl);
24919}
24920
24923 MachineBasicBlock *MBB) const {
24924 MachineFunction &MF = *MBB->getParent();
24925 MachineBasicBlock::iterator MBBI = MI.getIterator();
24927 Register TargetReg = MI.getOperand(0).getReg();
24928
24929 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
24930 bool IsRV64 = Subtarget.is64Bit();
24931 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
24932 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
24933 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
24934
24935 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
24936 MachineBasicBlock *LoopTestMBB =
24938 MF.insert(MBBInsertPoint, LoopTestMBB);
24940 MF.insert(MBBInsertPoint, ExitMBB);
24941 Register SPReg = RISCV::X2;
24942 Register ScratchReg =
24943 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
24944
24945 // ScratchReg = ProbeSize
24946 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
24947
24948 // LoopTest:
24949 // SUB SP, SP, ProbeSize
24950 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
24951 .addReg(SPReg)
24952 .addReg(ScratchReg);
24953
24954 // s[d|w] zero, 0(sp)
24955 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
24956 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
24957 .addReg(RISCV::X0)
24958 .addReg(SPReg)
24959 .addImm(0);
24960
24961 // BLT TargetReg, SP, LoopTest
24962 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
24963 .addReg(TargetReg)
24964 .addReg(SPReg)
24965 .addMBB(LoopTestMBB);
24966
24967 // Adjust with: MV SP, TargetReg.
24968 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
24969 .addReg(TargetReg)
24970 .addImm(0);
24971
24972 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
24974
24975 LoopTestMBB->addSuccessor(ExitMBB);
24976 LoopTestMBB->addSuccessor(LoopTestMBB);
24977 MBB->addSuccessor(LoopTestMBB);
24978
24979 MI.eraseFromParent();
24980 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
24981 return ExitMBB->begin()->getParent();
24982}
24983
24985 if (Subtarget.hasStdExtFOrZfinx()) {
24986 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
24987 return RCRegs;
24988 }
24989 return {};
24990}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition: CostModel.cpp:74
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition: SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1109
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
an instruction to allocate memory on the stack
Definition: Instructions.h:64
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:843
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
bool isFloatingPointOperation() const
Definition: Instructions.h:898
BinOp getOperation() const
Definition: Instructions.h:819
Value * getValOperand()
Definition: Instructions.h:890
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:863
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:248
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition: DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
A debug info location.
Definition: DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:229
unsigned size() const
Definition: DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:205
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:315
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
Tagged union holding either a T or a Error.
Definition: Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:39
bool isDSOLocal() const
Definition: GlobalValue.h:307
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:531
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1936
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:575
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1891
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:247
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:317
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:416
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition: Module.cpp:352
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
RISCVVRGatherCostModelEnum getVRGatherCostModel() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
const RISCVFrameLowering * getFrameLowering() const override
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
Definition: SelectionDAG.h:941
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
Definition: SelectionDAG.h:963
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
Definition: SelectionDAG.h:956
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:398
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:506
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:902
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
Definition: SelectionDAG.h:949
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:918
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:287
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
LLVM_ABI std::string lower() const
Definition: StringRef.cpp:112
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:87
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:772
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:184
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:233
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:259
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr bool isZero() const
Definition: TypeSize.h:157
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
self_iterator getIterator()
Definition: ilist_node.h:134
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1108
@ PARTIAL_REDUCE_SMLA
Definition: ISDOpcodes.h:1510
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1458
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1401
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1379
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1381
@ STRICT_FCEIL
Definition: ISDOpcodes.h:454
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1382
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1112
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1131
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1476
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1480
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1338
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1343
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1135
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1490
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1377
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1378
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1309
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:1018
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1574
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:957
@ PARTIAL_REDUCE_UMLA
Definition: ISDOpcodes.h:1511
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1298
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1473
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1477
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ STRICT_LROUND
Definition: ISDOpcodes.h:459
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1380
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition: ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1492
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:343
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:458
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:952
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:988
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1126
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:987
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1103
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1375
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1321
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:928
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1448
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1383
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:379
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ STRICT_LRINT
Definition: ISDOpcodes.h:461
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:627
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ STRICT_FROUND
Definition: ISDOpcodes.h:456
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:477
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1413
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1493
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:455
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:994
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1373
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1374
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1292
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ STRICT_LLRINT
Definition: ISDOpcodes.h:462
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:648
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1372
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1025
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:690
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:434
@ STRICT_LLROUND
Definition: ISDOpcodes.h:460
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:927
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1481
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ PARTIAL_REDUCE_SUMLA
Definition: ISDOpcodes.h:1512
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ STRICT_FRINT
Definition: ISDOpcodes.h:450
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition: ISDOpcodes.h:611
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1086
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1315
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1762
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1653
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1653
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1640
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1691
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1671
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1736
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:58
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
uint32_t read32le(const void *P)
Definition: Endian.h:429
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:477
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:270
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1587
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition: Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:390
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
#define NC
Definition: regutils.h:42
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
Definition: KnownBits.cpp:427
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1056
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:267
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:289
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:173
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1016
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:273
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)