LLVM 21.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
37#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
44#include "llvm/Support/Debug.h"
50#include <optional>
51
52using namespace llvm;
53
54#define DEBUG_TYPE "riscv-lower"
55
56STATISTIC(NumTailCalls, "Number of tail calls");
57
59 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
60 cl::desc("Give the maximum size (in number of nodes) of the web of "
61 "instructions that we will consider for VW expansion"),
62 cl::init(18));
63
64static cl::opt<bool>
65 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
66 cl::desc("Allow the formation of VW_W operations (e.g., "
67 "VWADD_W) with splat constants"),
68 cl::init(false));
69
71 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
72 cl::desc("Set the minimum number of repetitions of a divisor to allow "
73 "transformation to multiplications by the reciprocal"),
74 cl::init(2));
75
76static cl::opt<int>
78 cl::desc("Give the maximum number of instructions that we will "
79 "use for creating a floating-point immediate value"),
80 cl::init(2));
81
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
85
86 RISCVABI::ABI ABI = Subtarget.getTargetABI();
87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
88
89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
90 !Subtarget.hasStdExtF()) {
91 errs() << "Hard-float 'f' ABI can't be used for a target that "
92 "doesn't support the F instruction set extension (ignoring "
93 "target-abi)\n";
95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
96 !Subtarget.hasStdExtD()) {
97 errs() << "Hard-float 'd' ABI can't be used for a target that "
98 "doesn't support the D instruction set extension (ignoring "
99 "target-abi)\n";
100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
101 }
102
103 switch (ABI) {
104 default:
105 report_fatal_error("Don't know how to lower this ABI");
114 break;
115 }
116
117 MVT XLenVT = Subtarget.getXLenVT();
118
119 // Set up the register classes.
120 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
121
122 if (Subtarget.hasStdExtZfhmin())
123 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
124 if (Subtarget.hasStdExtZfbfmin())
125 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
126 if (Subtarget.hasStdExtF())
127 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
128 if (Subtarget.hasStdExtD())
129 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
130 if (Subtarget.hasStdExtZhinxmin())
131 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
132 if (Subtarget.hasStdExtZfinx())
133 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
134 if (Subtarget.hasStdExtZdinx()) {
135 if (Subtarget.is64Bit())
136 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
137 else
138 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
139 }
140
141 static const MVT::SimpleValueType BoolVecVTs[] = {
142 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
143 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
144 static const MVT::SimpleValueType IntVecVTs[] = {
145 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
146 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
147 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
148 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
149 MVT::nxv4i64, MVT::nxv8i64};
150 static const MVT::SimpleValueType F16VecVTs[] = {
151 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
152 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
153 static const MVT::SimpleValueType BF16VecVTs[] = {
154 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
155 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
156 static const MVT::SimpleValueType F32VecVTs[] = {
157 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
158 static const MVT::SimpleValueType F64VecVTs[] = {
159 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
160 static const MVT::SimpleValueType VecTupleVTs[] = {
161 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
162 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
163 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
164 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
165 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
166 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
167 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
168 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
169 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
170 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
171 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
172
173 if (Subtarget.hasVInstructions()) {
174 auto addRegClassForRVV = [this](MVT VT) {
175 // Disable the smallest fractional LMUL types if ELEN is less than
176 // RVVBitsPerBlock.
177 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
178 if (VT.getVectorMinNumElements() < MinElts)
179 return;
180
181 unsigned Size = VT.getSizeInBits().getKnownMinValue();
182 const TargetRegisterClass *RC;
184 RC = &RISCV::VRRegClass;
185 else if (Size == 2 * RISCV::RVVBitsPerBlock)
186 RC = &RISCV::VRM2RegClass;
187 else if (Size == 4 * RISCV::RVVBitsPerBlock)
188 RC = &RISCV::VRM4RegClass;
189 else if (Size == 8 * RISCV::RVVBitsPerBlock)
190 RC = &RISCV::VRM8RegClass;
191 else
192 llvm_unreachable("Unexpected size");
193
194 addRegisterClass(VT, RC);
195 };
196
197 for (MVT VT : BoolVecVTs)
198 addRegClassForRVV(VT);
199 for (MVT VT : IntVecVTs) {
200 if (VT.getVectorElementType() == MVT::i64 &&
201 !Subtarget.hasVInstructionsI64())
202 continue;
203 addRegClassForRVV(VT);
204 }
205
206 if (Subtarget.hasVInstructionsF16Minimal())
207 for (MVT VT : F16VecVTs)
208 addRegClassForRVV(VT);
209
210 if (Subtarget.hasVInstructionsBF16Minimal())
211 for (MVT VT : BF16VecVTs)
212 addRegClassForRVV(VT);
213
214 if (Subtarget.hasVInstructionsF32())
215 for (MVT VT : F32VecVTs)
216 addRegClassForRVV(VT);
217
218 if (Subtarget.hasVInstructionsF64())
219 for (MVT VT : F64VecVTs)
220 addRegClassForRVV(VT);
221
222 if (Subtarget.useRVVForFixedLengthVectors()) {
223 auto addRegClassForFixedVectors = [this](MVT VT) {
224 MVT ContainerVT = getContainerForFixedLengthVector(VT);
225 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
226 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
227 addRegisterClass(VT, TRI.getRegClass(RCID));
228 };
230 if (useRVVForFixedLengthVectorVT(VT))
231 addRegClassForFixedVectors(VT);
232
234 if (useRVVForFixedLengthVectorVT(VT))
235 addRegClassForFixedVectors(VT);
236 }
237
238 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
244 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
251 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
258 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
265 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
268 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
269 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
270 }
271
272 // Compute derived properties from the register classes.
274
276
278 MVT::i1, Promote);
279 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
281 MVT::i1, Promote);
282
283 // TODO: add all necessary setOperationAction calls.
285
290
295 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
298 }
299
301
304
305 if (!Subtarget.hasVendorXTHeadBb())
307
309
310 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
311 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
312 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
313
314 if (Subtarget.is64Bit()) {
316
319 MVT::i32, Custom);
321 if (!Subtarget.hasStdExtZbb())
324 Custom);
326 }
327 if (!Subtarget.hasStdExtZmmul()) {
329 } else if (Subtarget.is64Bit()) {
332 } else {
334 }
335
336 if (!Subtarget.hasStdExtM()) {
338 Expand);
339 } else if (Subtarget.is64Bit()) {
341 {MVT::i8, MVT::i16, MVT::i32}, Custom);
342 }
343
346 Expand);
347
349 Custom);
350
351 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
352 if (Subtarget.is64Bit())
354 } else if (Subtarget.hasVendorXTHeadBb()) {
355 if (Subtarget.is64Bit())
358 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
360 } else {
362 }
363
364 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
365 // pattern match it directly in isel.
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Legal
370 : Expand);
371
372 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
374 } else {
375 // Zbkb can use rev8+brev8 to implement bitreverse.
377 Subtarget.hasStdExtZbkb() ? Custom : Expand);
378 }
379
380 if (Subtarget.hasStdExtZbb() ||
381 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
383 Legal);
384 }
385
386 if (Subtarget.hasStdExtZbb() ||
387 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
388 if (Subtarget.is64Bit())
390 } else {
392 }
393
394 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
395 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
396 // We need the custom lowering to make sure that the resulting sequence
397 // for the 32bit case is efficient on 64bit targets.
398 if (Subtarget.is64Bit())
400 } else {
402 }
403
404 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
406 } else if (Subtarget.hasShortForwardBranchOpt()) {
407 // We can use PseudoCCSUB to implement ABS.
409 } else if (Subtarget.is64Bit()) {
411 }
412
413 if (Subtarget.useCCMovInsn())
415 else if (!Subtarget.hasVendorXTHeadCondMov())
417
418 static const unsigned FPLegalNodeTypes[] = {
426
427 static const ISD::CondCode FPCCToExpand[] = {
431
432 static const unsigned FPOpToExpand[] = {
434 ISD::FREM};
435
436 static const unsigned FPRndMode[] = {
439
440 static const unsigned ZfhminZfbfminPromoteOps[] = {
450
451 if (Subtarget.hasStdExtZfbfmin()) {
457 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
464 }
465
466 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
467 if (Subtarget.hasStdExtZfhOrZhinx()) {
468 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
469 setOperationAction(FPRndMode, MVT::f16,
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
473 Subtarget.hasStdExtZfa() ? Legal : Custom);
474 if (Subtarget.hasStdExtZfa())
476 } else {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
482 setOperationAction(Op, MVT::f16, Custom);
488 }
489
491
494 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
498
500 ISD::FNEARBYINT, MVT::f16,
501 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
506 MVT::f16, Promote);
507
508 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
509 // complete support for all operations in LegalizeDAG.
514 MVT::f16, Promote);
515
516 // We need to custom promote this.
517 if (Subtarget.is64Bit())
519 }
520
521 if (Subtarget.hasStdExtFOrZfinx()) {
522 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
523 setOperationAction(FPRndMode, MVT::f32,
524 Subtarget.hasStdExtZfa() ? Legal : Custom);
525 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
529 setOperationAction(FPOpToExpand, MVT::f32, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
531 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
532 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
533 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
537 Subtarget.isSoftFPABI() ? LibCall : Custom);
542
543 if (Subtarget.hasStdExtZfa()) {
547 } else {
549 }
550 }
551
552 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
554
555 if (Subtarget.hasStdExtDOrZdinx()) {
556 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
557
558 if (!Subtarget.is64Bit())
560
561 if (Subtarget.hasStdExtZfa()) {
563 setOperationAction(FPRndMode, MVT::f64, Legal);
566 } else {
567 if (Subtarget.is64Bit())
568 setOperationAction(FPRndMode, MVT::f64, Custom);
569
571 }
572
575 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
581 setOperationAction(FPOpToExpand, MVT::f64, Expand);
582 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
583 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
584 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
585 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
589 Subtarget.isSoftFPABI() ? LibCall : Custom);
594 }
595
596 if (Subtarget.is64Bit()) {
599 MVT::i32, Custom);
601 }
602
603 if (Subtarget.hasStdExtFOrZfinx()) {
605 Custom);
606
607 // f16/bf16 require custom handling.
609 Custom);
611 Custom);
612
615 }
616
619 XLenVT, Custom);
620
622
623 if (Subtarget.is64Bit())
625
626 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
627 // Unfortunately this can't be determined just from the ISA naming string.
629 Subtarget.is64Bit() ? Legal : Custom);
631 Subtarget.is64Bit() ? Legal : Custom);
632
633 if (Subtarget.is64Bit()) {
636 }
637
640 if (Subtarget.is64Bit())
642
643 if (Subtarget.hasStdExtZicbop()) {
645 }
646
647 if (Subtarget.hasStdExtA()) {
649 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
651 else
653 } else if (Subtarget.hasForcedAtomics()) {
655 } else {
657 }
658
660
662
663 if (getTargetMachine().getTargetTriple().isOSLinux()) {
664 // Custom lowering of llvm.clear_cache.
666 }
667
668 if (Subtarget.hasVInstructions()) {
670
672
673 // RVV intrinsics may have illegal operands.
674 // We also need to custom legalize vmv.x.s.
677 {MVT::i8, MVT::i16}, Custom);
678 if (Subtarget.is64Bit())
680 MVT::i32, Custom);
681 else
683 MVT::i64, Custom);
684
686 MVT::Other, Custom);
687
688 static const unsigned IntegerVPOps[] = {
689 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
690 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
691 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
692 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
693 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
694 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
695 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
696 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
697 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
698 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
699 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
700 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
701 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
702 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
703 ISD::EXPERIMENTAL_VP_SPLAT};
704
705 static const unsigned FloatingPointVPOps[] = {
706 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
707 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
708 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
709 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
710 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
711 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
712 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
713 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
714 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
715 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
716 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
717 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
718 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
719 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
720
721 static const unsigned IntegerVecReduceOps[] = {
725
726 static const unsigned FloatingPointVecReduceOps[] = {
729
730 static const unsigned FloatingPointLibCallOps[] = {
733
734 if (!Subtarget.is64Bit()) {
735 // We must custom-lower certain vXi64 operations on RV32 due to the vector
736 // element type being illegal.
738 MVT::i64, Custom);
739
740 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
741
742 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
743 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
744 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
745 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
746 MVT::i64, Custom);
747 }
748
749 for (MVT VT : BoolVecVTs) {
750 if (!isTypeLegal(VT))
751 continue;
752
754
755 // Mask VTs are custom-expanded into a series of standard nodes
759 VT, Custom);
760
762 Custom);
763
765 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
766 Expand);
767 setOperationAction(ISD::VP_MERGE, VT, Custom);
768
769 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
770 Custom);
771
772 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
773
776 Custom);
777
779 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
780 Custom);
781
782 // RVV has native int->float & float->int conversions where the
783 // element type sizes are within one power-of-two of each other. Any
784 // wider distances between type sizes have to be lowered as sequences
785 // which progressively narrow the gap in stages.
790 VT, Custom);
792 Custom);
793
794 // Expand all extending loads to types larger than this, and truncating
795 // stores from types larger than this.
797 setTruncStoreAction(VT, OtherVT, Expand);
799 OtherVT, Expand);
800 }
801
802 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
803 ISD::VP_TRUNCATE, ISD::VP_SETCC},
804 VT, Custom);
805
808
810
811 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
812 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
813
816 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
817 }
818
819 for (MVT VT : IntVecVTs) {
820 if (!isTypeLegal(VT))
821 continue;
822
825
826 // Vectors implement MULHS/MULHU.
828
829 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
830 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
832
834 Legal);
835
837
838 // Custom-lower extensions and truncations from/to mask types.
840 VT, Custom);
841
842 // RVV has native int->float & float->int conversions where the
843 // element type sizes are within one power-of-two of each other. Any
844 // wider distances between type sizes have to be lowered as sequences
845 // which progressively narrow the gap in stages.
850 VT, Custom);
852 Custom);
856 VT, Legal);
857
858 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
859 // nodes which truncate by one power of two at a time.
862 Custom);
863
864 // Custom-lower insert/extract operations to simplify patterns.
866 Custom);
867
868 // Custom-lower reduction operations to set up the corresponding custom
869 // nodes' operands.
870 setOperationAction(IntegerVecReduceOps, VT, Custom);
871
872 setOperationAction(IntegerVPOps, VT, Custom);
873
875
877 VT, Custom);
878
880 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
881 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
882 VT, Custom);
883
886 VT, Custom);
887
890
892
894 setTruncStoreAction(VT, OtherVT, Expand);
896 OtherVT, Expand);
897 }
898
901
902 // Splice
904
905 if (Subtarget.hasStdExtZvkb()) {
907 setOperationAction(ISD::VP_BSWAP, VT, Custom);
908 } else {
909 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
911 }
912
913 if (Subtarget.hasStdExtZvbb()) {
915 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
916 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
917 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
918 VT, Custom);
919 } else {
920 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
922 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
923 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
924 VT, Expand);
925
926 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
927 // range of f32.
928 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
929 if (isTypeLegal(FloatVT)) {
931 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
932 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
933 VT, Custom);
934 }
935 }
936
938 }
939
940 for (MVT VT : VecTupleVTs) {
941 if (!isTypeLegal(VT))
942 continue;
943
945 }
946
947 // Expand various CCs to best match the RVV ISA, which natively supports UNE
948 // but no other unordered comparisons, and supports all ordered comparisons
949 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
950 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
951 // and we pattern-match those back to the "original", swapping operands once
952 // more. This way we catch both operations and both "vf" and "fv" forms with
953 // fewer patterns.
954 static const ISD::CondCode VFPCCToExpand[] = {
958 };
959
960 // TODO: support more ops.
961 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
969
970 // TODO: support more vp ops.
971 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
972 ISD::VP_FADD,
973 ISD::VP_FSUB,
974 ISD::VP_FMUL,
975 ISD::VP_FDIV,
976 ISD::VP_FMA,
977 ISD::VP_REDUCE_FMIN,
978 ISD::VP_REDUCE_FMAX,
979 ISD::VP_SQRT,
980 ISD::VP_FMINNUM,
981 ISD::VP_FMAXNUM,
982 ISD::VP_FCEIL,
983 ISD::VP_FFLOOR,
984 ISD::VP_FROUND,
985 ISD::VP_FROUNDEVEN,
986 ISD::VP_FROUNDTOZERO,
987 ISD::VP_FRINT,
988 ISD::VP_FNEARBYINT,
989 ISD::VP_SETCC,
990 ISD::VP_FMINIMUM,
991 ISD::VP_FMAXIMUM,
992 ISD::VP_REDUCE_FMINIMUM,
993 ISD::VP_REDUCE_FMAXIMUM};
994
995 // Sets common operation actions on RVV floating-point vector types.
996 const auto SetCommonVFPActions = [&](MVT VT) {
998 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
999 // sizes are within one power-of-two of each other. Therefore conversions
1000 // between vXf16 and vXf64 must be lowered as sequences which convert via
1001 // vXf32.
1004 // Custom-lower insert/extract operations to simplify patterns.
1006 Custom);
1007 // Expand various condition codes (explained above).
1008 setCondCodeAction(VFPCCToExpand, VT, Expand);
1009
1012
1016 VT, Custom);
1017
1018 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1019
1020 // Expand FP operations that need libcalls.
1021 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1022
1024
1026
1028 VT, Custom);
1029
1031 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1032 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1033 VT, Custom);
1034
1037
1040 VT, Custom);
1041
1044
1046
1047 setOperationAction(FloatingPointVPOps, VT, Custom);
1048
1050 Custom);
1053 VT, Legal);
1058 VT, Custom);
1059
1061 };
1062
1063 // Sets common extload/truncstore actions on RVV floating-point vector
1064 // types.
1065 const auto SetCommonVFPExtLoadTruncStoreActions =
1066 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1067 for (auto SmallVT : SmallerVTs) {
1068 setTruncStoreAction(VT, SmallVT, Expand);
1069 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1070 }
1071 };
1072
1073 // Sets common actions for f16 and bf16 for when there's only
1074 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1075 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1078 Custom);
1079 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1080 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1081 Custom);
1083 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1089 VT, Custom);
1090 MVT EltVT = VT.getVectorElementType();
1091 if (isTypeLegal(EltVT))
1092 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1094 VT, Custom);
1095 else
1096 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1097 EltVT, Custom);
1099 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1100 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1101 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1102 ISD::VP_SCATTER},
1103 VT, Custom);
1104
1108
1109 // Expand FP operations that need libcalls.
1110 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1111
1112 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1113 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1114 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1115 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1116 } else {
1117 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1118 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1119 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1120 }
1121 };
1122
1123 if (Subtarget.hasVInstructionsF16()) {
1124 for (MVT VT : F16VecVTs) {
1125 if (!isTypeLegal(VT))
1126 continue;
1127 SetCommonVFPActions(VT);
1128 }
1129 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1130 for (MVT VT : F16VecVTs) {
1131 if (!isTypeLegal(VT))
1132 continue;
1133 SetCommonPromoteToF32Actions(VT);
1134 }
1135 }
1136
1137 if (Subtarget.hasVInstructionsBF16Minimal()) {
1138 for (MVT VT : BF16VecVTs) {
1139 if (!isTypeLegal(VT))
1140 continue;
1141 SetCommonPromoteToF32Actions(VT);
1142 }
1143 }
1144
1145 if (Subtarget.hasVInstructionsF32()) {
1146 for (MVT VT : F32VecVTs) {
1147 if (!isTypeLegal(VT))
1148 continue;
1149 SetCommonVFPActions(VT);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1151 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1152 }
1153 }
1154
1155 if (Subtarget.hasVInstructionsF64()) {
1156 for (MVT VT : F64VecVTs) {
1157 if (!isTypeLegal(VT))
1158 continue;
1159 SetCommonVFPActions(VT);
1160 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1161 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1162 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1163 }
1164 }
1165
1166 if (Subtarget.useRVVForFixedLengthVectors()) {
1168 if (!useRVVForFixedLengthVectorVT(VT))
1169 continue;
1170
1171 // By default everything must be expanded.
1172 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1175 setTruncStoreAction(VT, OtherVT, Expand);
1177 OtherVT, Expand);
1178 }
1179
1180 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1181 // expansion to a build_vector of 0s.
1183
1184 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1186 Custom);
1187
1190 Custom);
1191
1193 VT, Custom);
1194
1196 VT, Custom);
1197
1199
1201
1203
1205
1208 Custom);
1209
1211
1214 Custom);
1215
1217 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1218 Custom);
1219
1221 {
1230 },
1231 VT, Custom);
1233 Custom);
1234
1236
1237 // Operations below are different for between masks and other vectors.
1238 if (VT.getVectorElementType() == MVT::i1) {
1239 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1240 ISD::OR, ISD::XOR},
1241 VT, Custom);
1242
1243 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1244 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1245 VT, Custom);
1246
1247 setOperationAction(ISD::VP_MERGE, VT, Custom);
1248
1249 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1250 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1251 continue;
1252 }
1253
1254 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1255 // it before type legalization for i64 vectors on RV32. It will then be
1256 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1257 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1258 // improvements first.
1259 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1262 }
1263
1266
1267 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1268 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1269 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1270 ISD::VP_SCATTER},
1271 VT, Custom);
1272
1276 VT, Custom);
1277
1280
1282
1283 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1284 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1286
1290 VT, Custom);
1291
1293
1296
1297 // Custom-lower reduction operations to set up the corresponding custom
1298 // nodes' operands.
1302 VT, Custom);
1303
1304 setOperationAction(IntegerVPOps, VT, Custom);
1305
1306 if (Subtarget.hasStdExtZvkb())
1308
1309 if (Subtarget.hasStdExtZvbb()) {
1312 VT, Custom);
1313 } else {
1314 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1315 // range of f32.
1316 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1317 if (isTypeLegal(FloatVT))
1320 Custom);
1321 }
1322
1324 }
1325
1327 // There are no extending loads or truncating stores.
1328 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1329 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1330 setTruncStoreAction(VT, InnerVT, Expand);
1331 }
1332
1333 if (!useRVVForFixedLengthVectorVT(VT))
1334 continue;
1335
1336 // By default everything must be expanded.
1337 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1339
1340 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1341 // expansion to a build_vector of 0s.
1343
1348 VT, Custom);
1349
1351 VT, Custom);
1352
1355 VT, Custom);
1356 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1357 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1358 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1359 VT, Custom);
1360
1363 Custom);
1364
1365 if (VT.getVectorElementType() == MVT::f16 &&
1366 !Subtarget.hasVInstructionsF16()) {
1368 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1370 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1371 Custom);
1372 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1373 Custom);
1374 if (Subtarget.hasStdExtZfhmin()) {
1376 } else {
1377 // We need to custom legalize f16 build vectors if Zfhmin isn't
1378 // available.
1380 }
1384 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1385 // Don't promote f16 vector operations to f32 if f32 vector type is
1386 // not legal.
1387 // TODO: could split the f16 vector into two vectors and do promotion.
1388 if (!isTypeLegal(F32VecVT))
1389 continue;
1390 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1391 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1392 continue;
1393 }
1394
1395 if (VT.getVectorElementType() == MVT::bf16) {
1397 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1398 if (Subtarget.hasStdExtZfbfmin()) {
1400 } else {
1401 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1402 // available.
1404 }
1406 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1407 Custom);
1408 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1409 // Don't promote f16 vector operations to f32 if f32 vector type is
1410 // not legal.
1411 // TODO: could split the f16 vector into two vectors and do promotion.
1412 if (!isTypeLegal(F32VecVT))
1413 continue;
1414 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1415 // TODO: Promote VP ops to fp32.
1416 continue;
1417 }
1418
1420 Custom);
1421
1426 VT, Custom);
1427
1430 VT, Custom);
1431
1432 setCondCodeAction(VFPCCToExpand, VT, Expand);
1433
1436
1438
1439 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1440
1441 setOperationAction(FloatingPointVPOps, VT, Custom);
1442
1449 VT, Custom);
1450 }
1451
1452 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1453 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1454 if (Subtarget.is64Bit())
1456 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1458 if (Subtarget.hasStdExtZfbfmin())
1460 if (Subtarget.hasStdExtFOrZfinx())
1462 if (Subtarget.hasStdExtDOrZdinx())
1464 }
1465 }
1466
1467 if (Subtarget.hasStdExtA())
1469
1470 if (Subtarget.hasForcedAtomics()) {
1471 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1477 XLenVT, LibCall);
1478 }
1479
1480 if (Subtarget.hasVendorXTHeadMemIdx()) {
1481 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1482 setIndexedLoadAction(im, MVT::i8, Legal);
1483 setIndexedStoreAction(im, MVT::i8, Legal);
1484 setIndexedLoadAction(im, MVT::i16, Legal);
1485 setIndexedStoreAction(im, MVT::i16, Legal);
1486 setIndexedLoadAction(im, MVT::i32, Legal);
1487 setIndexedStoreAction(im, MVT::i32, Legal);
1488
1489 if (Subtarget.is64Bit()) {
1490 setIndexedLoadAction(im, MVT::i64, Legal);
1491 setIndexedStoreAction(im, MVT::i64, Legal);
1492 }
1493 }
1494 }
1495
1496 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1500
1504 }
1505
1506 // Function alignments.
1507 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1508 setMinFunctionAlignment(FunctionAlignment);
1509 // Set preferred alignments.
1512
1518
1519 if (Subtarget.hasStdExtFOrZfinx())
1521
1522 if (Subtarget.hasStdExtZbb())
1524
1525 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1526 Subtarget.hasVInstructions())
1528
1529 if (Subtarget.hasStdExtZbkb())
1531
1532 if (Subtarget.hasStdExtFOrZfinx())
1535 if (Subtarget.hasVInstructions())
1538 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1541 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1545 ISD::VSELECT});
1546
1547 if (Subtarget.hasVendorXTHeadMemPair())
1549 if (Subtarget.useRVVForFixedLengthVectors())
1551
1552 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1553 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1554
1555 // Disable strict node mutation.
1556 IsStrictFPEnabled = true;
1557 EnableExtLdPromotion = true;
1558
1559 // Let the subtarget decide if a predictable select is more expensive than the
1560 // corresponding branch. This information is used in CGP/SelectOpt to decide
1561 // when to convert selects into branches.
1562 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1563
1564 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1565 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1566
1568 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1569 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1570
1572 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1573 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1574
1575 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1576 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1577}
1578
1580 LLVMContext &Context,
1581 EVT VT) const {
1582 if (!VT.isVector())
1583 return getPointerTy(DL);
1584 if (Subtarget.hasVInstructions() &&
1585 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1586 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1588}
1589
1590MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1591 return Subtarget.getXLenVT();
1592}
1593
1594// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1595bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1596 unsigned VF,
1597 bool IsScalable) const {
1598 if (!Subtarget.hasVInstructions())
1599 return true;
1600
1601 if (!IsScalable)
1602 return true;
1603
1604 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1605 return true;
1606
1607 // Don't allow VF=1 if those types are't legal.
1608 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1609 return true;
1610
1611 // VLEN=32 support is incomplete.
1612 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1613 return true;
1614
1615 // The maximum VF is for the smallest element width with LMUL=8.
1616 // VF must be a power of 2.
1617 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1618 return VF > MaxVF || !isPowerOf2_32(VF);
1619}
1620
1622 return !Subtarget.hasVInstructions() ||
1623 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1624}
1625
1627 const CallInst &I,
1628 MachineFunction &MF,
1629 unsigned Intrinsic) const {
1630 auto &DL = I.getDataLayout();
1631
1632 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1633 bool IsUnitStrided, bool UsePtrVal = false) {
1635 // We can't use ptrVal if the intrinsic can access memory before the
1636 // pointer. This means we can't use it for strided or indexed intrinsics.
1637 if (UsePtrVal)
1638 Info.ptrVal = I.getArgOperand(PtrOp);
1639 else
1640 Info.fallbackAddressSpace =
1641 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1642 Type *MemTy;
1643 if (IsStore) {
1644 // Store value is the first operand.
1645 MemTy = I.getArgOperand(0)->getType();
1646 } else {
1647 // Use return type. If it's segment load, return type is a struct.
1648 MemTy = I.getType();
1649 if (MemTy->isStructTy())
1650 MemTy = MemTy->getStructElementType(0);
1651 }
1652 if (!IsUnitStrided)
1653 MemTy = MemTy->getScalarType();
1654
1655 Info.memVT = getValueType(DL, MemTy);
1656 if (MemTy->isTargetExtTy()) {
1657 // RISC-V vector tuple type's alignment type should be its element type.
1658 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1659 MemTy = Type::getIntNTy(
1660 MemTy->getContext(),
1661 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1662 ->getZExtValue());
1663 Info.align = DL.getABITypeAlign(MemTy);
1664 } else {
1665 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1666 }
1668 Info.flags |=
1670 return true;
1671 };
1672
1673 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1675
1677 switch (Intrinsic) {
1678 default:
1679 return false;
1680 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1681 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1682 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1683 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1684 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1685 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1686 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1687 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1688 case Intrinsic::riscv_masked_cmpxchg_i32:
1690 Info.memVT = MVT::i32;
1691 Info.ptrVal = I.getArgOperand(0);
1692 Info.offset = 0;
1693 Info.align = Align(4);
1696 return true;
1697 case Intrinsic::riscv_seg2_load:
1698 case Intrinsic::riscv_seg3_load:
1699 case Intrinsic::riscv_seg4_load:
1700 case Intrinsic::riscv_seg5_load:
1701 case Intrinsic::riscv_seg6_load:
1702 case Intrinsic::riscv_seg7_load:
1703 case Intrinsic::riscv_seg8_load:
1704 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1705 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1706 case Intrinsic::riscv_seg2_store:
1707 case Intrinsic::riscv_seg3_store:
1708 case Intrinsic::riscv_seg4_store:
1709 case Intrinsic::riscv_seg5_store:
1710 case Intrinsic::riscv_seg6_store:
1711 case Intrinsic::riscv_seg7_store:
1712 case Intrinsic::riscv_seg8_store:
1713 // Operands are (vec, ..., vec, ptr, vl)
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vle:
1718 case Intrinsic::riscv_vle_mask:
1719 case Intrinsic::riscv_vleff:
1720 case Intrinsic::riscv_vleff_mask:
1721 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1722 /*IsStore*/ false,
1723 /*IsUnitStrided*/ true,
1724 /*UsePtrVal*/ true);
1725 case Intrinsic::riscv_vse:
1726 case Intrinsic::riscv_vse_mask:
1727 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1728 /*IsStore*/ true,
1729 /*IsUnitStrided*/ true,
1730 /*UsePtrVal*/ true);
1731 case Intrinsic::riscv_vlse:
1732 case Intrinsic::riscv_vlse_mask:
1733 case Intrinsic::riscv_vloxei:
1734 case Intrinsic::riscv_vloxei_mask:
1735 case Intrinsic::riscv_vluxei:
1736 case Intrinsic::riscv_vluxei_mask:
1737 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1738 /*IsStore*/ false,
1739 /*IsUnitStrided*/ false);
1740 case Intrinsic::riscv_vsse:
1741 case Intrinsic::riscv_vsse_mask:
1742 case Intrinsic::riscv_vsoxei:
1743 case Intrinsic::riscv_vsoxei_mask:
1744 case Intrinsic::riscv_vsuxei:
1745 case Intrinsic::riscv_vsuxei_mask:
1746 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1747 /*IsStore*/ true,
1748 /*IsUnitStrided*/ false);
1749 case Intrinsic::riscv_vlseg2:
1750 case Intrinsic::riscv_vlseg3:
1751 case Intrinsic::riscv_vlseg4:
1752 case Intrinsic::riscv_vlseg5:
1753 case Intrinsic::riscv_vlseg6:
1754 case Intrinsic::riscv_vlseg7:
1755 case Intrinsic::riscv_vlseg8:
1756 case Intrinsic::riscv_vlseg2ff:
1757 case Intrinsic::riscv_vlseg3ff:
1758 case Intrinsic::riscv_vlseg4ff:
1759 case Intrinsic::riscv_vlseg5ff:
1760 case Intrinsic::riscv_vlseg6ff:
1761 case Intrinsic::riscv_vlseg7ff:
1762 case Intrinsic::riscv_vlseg8ff:
1763 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1764 /*IsStore*/ false,
1765 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1766 case Intrinsic::riscv_vlseg2_mask:
1767 case Intrinsic::riscv_vlseg3_mask:
1768 case Intrinsic::riscv_vlseg4_mask:
1769 case Intrinsic::riscv_vlseg5_mask:
1770 case Intrinsic::riscv_vlseg6_mask:
1771 case Intrinsic::riscv_vlseg7_mask:
1772 case Intrinsic::riscv_vlseg8_mask:
1773 case Intrinsic::riscv_vlseg2ff_mask:
1774 case Intrinsic::riscv_vlseg3ff_mask:
1775 case Intrinsic::riscv_vlseg4ff_mask:
1776 case Intrinsic::riscv_vlseg5ff_mask:
1777 case Intrinsic::riscv_vlseg6ff_mask:
1778 case Intrinsic::riscv_vlseg7ff_mask:
1779 case Intrinsic::riscv_vlseg8ff_mask:
1780 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1781 /*IsStore*/ false,
1782 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1783 case Intrinsic::riscv_vlsseg2:
1784 case Intrinsic::riscv_vlsseg3:
1785 case Intrinsic::riscv_vlsseg4:
1786 case Intrinsic::riscv_vlsseg5:
1787 case Intrinsic::riscv_vlsseg6:
1788 case Intrinsic::riscv_vlsseg7:
1789 case Intrinsic::riscv_vlsseg8:
1790 case Intrinsic::riscv_vloxseg2:
1791 case Intrinsic::riscv_vloxseg3:
1792 case Intrinsic::riscv_vloxseg4:
1793 case Intrinsic::riscv_vloxseg5:
1794 case Intrinsic::riscv_vloxseg6:
1795 case Intrinsic::riscv_vloxseg7:
1796 case Intrinsic::riscv_vloxseg8:
1797 case Intrinsic::riscv_vluxseg2:
1798 case Intrinsic::riscv_vluxseg3:
1799 case Intrinsic::riscv_vluxseg4:
1800 case Intrinsic::riscv_vluxseg5:
1801 case Intrinsic::riscv_vluxseg6:
1802 case Intrinsic::riscv_vluxseg7:
1803 case Intrinsic::riscv_vluxseg8:
1804 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1805 /*IsStore*/ false,
1806 /*IsUnitStrided*/ false);
1807 case Intrinsic::riscv_vlsseg2_mask:
1808 case Intrinsic::riscv_vlsseg3_mask:
1809 case Intrinsic::riscv_vlsseg4_mask:
1810 case Intrinsic::riscv_vlsseg5_mask:
1811 case Intrinsic::riscv_vlsseg6_mask:
1812 case Intrinsic::riscv_vlsseg7_mask:
1813 case Intrinsic::riscv_vlsseg8_mask:
1814 case Intrinsic::riscv_vloxseg2_mask:
1815 case Intrinsic::riscv_vloxseg3_mask:
1816 case Intrinsic::riscv_vloxseg4_mask:
1817 case Intrinsic::riscv_vloxseg5_mask:
1818 case Intrinsic::riscv_vloxseg6_mask:
1819 case Intrinsic::riscv_vloxseg7_mask:
1820 case Intrinsic::riscv_vloxseg8_mask:
1821 case Intrinsic::riscv_vluxseg2_mask:
1822 case Intrinsic::riscv_vluxseg3_mask:
1823 case Intrinsic::riscv_vluxseg4_mask:
1824 case Intrinsic::riscv_vluxseg5_mask:
1825 case Intrinsic::riscv_vluxseg6_mask:
1826 case Intrinsic::riscv_vluxseg7_mask:
1827 case Intrinsic::riscv_vluxseg8_mask:
1828 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1829 /*IsStore*/ false,
1830 /*IsUnitStrided*/ false);
1831 case Intrinsic::riscv_vsseg2:
1832 case Intrinsic::riscv_vsseg3:
1833 case Intrinsic::riscv_vsseg4:
1834 case Intrinsic::riscv_vsseg5:
1835 case Intrinsic::riscv_vsseg6:
1836 case Intrinsic::riscv_vsseg7:
1837 case Intrinsic::riscv_vsseg8:
1838 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1839 /*IsStore*/ true,
1840 /*IsUnitStrided*/ false);
1841 case Intrinsic::riscv_vsseg2_mask:
1842 case Intrinsic::riscv_vsseg3_mask:
1843 case Intrinsic::riscv_vsseg4_mask:
1844 case Intrinsic::riscv_vsseg5_mask:
1845 case Intrinsic::riscv_vsseg6_mask:
1846 case Intrinsic::riscv_vsseg7_mask:
1847 case Intrinsic::riscv_vsseg8_mask:
1848 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1849 /*IsStore*/ true,
1850 /*IsUnitStrided*/ false);
1851 case Intrinsic::riscv_vssseg2:
1852 case Intrinsic::riscv_vssseg3:
1853 case Intrinsic::riscv_vssseg4:
1854 case Intrinsic::riscv_vssseg5:
1855 case Intrinsic::riscv_vssseg6:
1856 case Intrinsic::riscv_vssseg7:
1857 case Intrinsic::riscv_vssseg8:
1858 case Intrinsic::riscv_vsoxseg2:
1859 case Intrinsic::riscv_vsoxseg3:
1860 case Intrinsic::riscv_vsoxseg4:
1861 case Intrinsic::riscv_vsoxseg5:
1862 case Intrinsic::riscv_vsoxseg6:
1863 case Intrinsic::riscv_vsoxseg7:
1864 case Intrinsic::riscv_vsoxseg8:
1865 case Intrinsic::riscv_vsuxseg2:
1866 case Intrinsic::riscv_vsuxseg3:
1867 case Intrinsic::riscv_vsuxseg4:
1868 case Intrinsic::riscv_vsuxseg5:
1869 case Intrinsic::riscv_vsuxseg6:
1870 case Intrinsic::riscv_vsuxseg7:
1871 case Intrinsic::riscv_vsuxseg8:
1872 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1873 /*IsStore*/ true,
1874 /*IsUnitStrided*/ false);
1875 case Intrinsic::riscv_vssseg2_mask:
1876 case Intrinsic::riscv_vssseg3_mask:
1877 case Intrinsic::riscv_vssseg4_mask:
1878 case Intrinsic::riscv_vssseg5_mask:
1879 case Intrinsic::riscv_vssseg6_mask:
1880 case Intrinsic::riscv_vssseg7_mask:
1881 case Intrinsic::riscv_vssseg8_mask:
1882 case Intrinsic::riscv_vsoxseg2_mask:
1883 case Intrinsic::riscv_vsoxseg3_mask:
1884 case Intrinsic::riscv_vsoxseg4_mask:
1885 case Intrinsic::riscv_vsoxseg5_mask:
1886 case Intrinsic::riscv_vsoxseg6_mask:
1887 case Intrinsic::riscv_vsoxseg7_mask:
1888 case Intrinsic::riscv_vsoxseg8_mask:
1889 case Intrinsic::riscv_vsuxseg2_mask:
1890 case Intrinsic::riscv_vsuxseg3_mask:
1891 case Intrinsic::riscv_vsuxseg4_mask:
1892 case Intrinsic::riscv_vsuxseg5_mask:
1893 case Intrinsic::riscv_vsuxseg6_mask:
1894 case Intrinsic::riscv_vsuxseg7_mask:
1895 case Intrinsic::riscv_vsuxseg8_mask:
1896 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1897 /*IsStore*/ true,
1898 /*IsUnitStrided*/ false);
1899 }
1900}
1901
1903 const AddrMode &AM, Type *Ty,
1904 unsigned AS,
1905 Instruction *I) const {
1906 // No global is ever allowed as a base.
1907 if (AM.BaseGV)
1908 return false;
1909
1910 // None of our addressing modes allows a scalable offset
1911 if (AM.ScalableOffset)
1912 return false;
1913
1914 // RVV instructions only support register addressing.
1915 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1916 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1917
1918 // Require a 12-bit signed offset.
1919 if (!isInt<12>(AM.BaseOffs))
1920 return false;
1921
1922 switch (AM.Scale) {
1923 case 0: // "r+i" or just "i", depending on HasBaseReg.
1924 break;
1925 case 1:
1926 if (!AM.HasBaseReg) // allow "r+i".
1927 break;
1928 return false; // disallow "r+r" or "r+r+i".
1929 default:
1930 return false;
1931 }
1932
1933 return true;
1934}
1935
1937 return isInt<12>(Imm);
1938}
1939
1941 return isInt<12>(Imm);
1942}
1943
1944// On RV32, 64-bit integers are split into their high and low parts and held
1945// in two different registers, so the trunc is free since the low register can
1946// just be used.
1947// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1948// isTruncateFree?
1950 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1951 return false;
1952 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1953 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1954 return (SrcBits == 64 && DestBits == 32);
1955}
1956
1958 // We consider i64->i32 free on RV64 since we have good selection of W
1959 // instructions that make promoting operations back to i64 free in many cases.
1960 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1961 !DstVT.isInteger())
1962 return false;
1963 unsigned SrcBits = SrcVT.getSizeInBits();
1964 unsigned DestBits = DstVT.getSizeInBits();
1965 return (SrcBits == 64 && DestBits == 32);
1966}
1967
1969 EVT SrcVT = Val.getValueType();
1970 // free truncate from vnsrl and vnsra
1971 if (Subtarget.hasVInstructions() &&
1972 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1973 SrcVT.isVector() && VT2.isVector()) {
1974 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1975 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1976 if (SrcBits == DestBits * 2) {
1977 return true;
1978 }
1979 }
1980 return TargetLowering::isTruncateFree(Val, VT2);
1981}
1982
1984 // Zexts are free if they can be combined with a load.
1985 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1986 // poorly with type legalization of compares preferring sext.
1987 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1988 EVT MemVT = LD->getMemoryVT();
1989 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1990 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1991 LD->getExtensionType() == ISD::ZEXTLOAD))
1992 return true;
1993 }
1994
1995 return TargetLowering::isZExtFree(Val, VT2);
1996}
1997
1999 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2000}
2001
2003 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2004}
2005
2007 return Subtarget.hasStdExtZbb() ||
2008 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2009}
2010
2012 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2013 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2014}
2015
2017 const Instruction &AndI) const {
2018 // We expect to be able to match a bit extraction instruction if the Zbs
2019 // extension is supported and the mask is a power of two. However, we
2020 // conservatively return false if the mask would fit in an ANDI instruction,
2021 // on the basis that it's possible the sinking+duplication of the AND in
2022 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2023 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2024 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2025 return false;
2026 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2027 if (!Mask)
2028 return false;
2029 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2030}
2031
2033 EVT VT = Y.getValueType();
2034
2035 // FIXME: Support vectors once we have tests.
2036 if (VT.isVector())
2037 return false;
2038
2039 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2040 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2041}
2042
2044 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2045 if (Subtarget.hasStdExtZbs())
2046 return X.getValueType().isScalarInteger();
2047 auto *C = dyn_cast<ConstantSDNode>(Y);
2048 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2049 if (Subtarget.hasVendorXTHeadBs())
2050 return C != nullptr;
2051 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2052 return C && C->getAPIntValue().ule(10);
2053}
2054
2056 EVT VT) const {
2057 // Only enable for rvv.
2058 if (!VT.isVector() || !Subtarget.hasVInstructions())
2059 return false;
2060
2061 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2062 return false;
2063
2064 return true;
2065}
2066
2068 Type *Ty) const {
2069 assert(Ty->isIntegerTy());
2070
2071 unsigned BitSize = Ty->getIntegerBitWidth();
2072 if (BitSize > Subtarget.getXLen())
2073 return false;
2074
2075 // Fast path, assume 32-bit immediates are cheap.
2076 int64_t Val = Imm.getSExtValue();
2077 if (isInt<32>(Val))
2078 return true;
2079
2080 // A constant pool entry may be more aligned than the load we're trying to
2081 // replace. If we don't support unaligned scalar mem, prefer the constant
2082 // pool.
2083 // TODO: Can the caller pass down the alignment?
2084 if (!Subtarget.enableUnalignedScalarMem())
2085 return true;
2086
2087 // Prefer to keep the load if it would require many instructions.
2088 // This uses the same threshold we use for constant pools but doesn't
2089 // check useConstantPoolForLargeInts.
2090 // TODO: Should we keep the load only when we're definitely going to emit a
2091 // constant pool?
2092
2094 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2095}
2096
2100 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2101 SelectionDAG &DAG) const {
2102 // One interesting pattern that we'd want to form is 'bit extract':
2103 // ((1 >> Y) & 1) ==/!= 0
2104 // But we also need to be careful not to try to reverse that fold.
2105
2106 // Is this '((1 >> Y) & 1)'?
2107 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2108 return false; // Keep the 'bit extract' pattern.
2109
2110 // Will this be '((1 >> Y) & 1)' after the transform?
2111 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2112 return true; // Do form the 'bit extract' pattern.
2113
2114 // If 'X' is a constant, and we transform, then we will immediately
2115 // try to undo the fold, thus causing endless combine loop.
2116 // So only do the transform if X is not a constant. This matches the default
2117 // implementation of this function.
2118 return !XC;
2119}
2120
2122 unsigned Opc = VecOp.getOpcode();
2123
2124 // Assume target opcodes can't be scalarized.
2125 // TODO - do we have any exceptions?
2126 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2127 return false;
2128
2129 // If the vector op is not supported, try to convert to scalar.
2130 EVT VecVT = VecOp.getValueType();
2131 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2132 return true;
2133
2134 // If the vector op is supported, but the scalar op is not, the transform may
2135 // not be worthwhile.
2136 // Permit a vector binary operation can be converted to scalar binary
2137 // operation which is custom lowered with illegal type.
2138 EVT ScalarVT = VecVT.getScalarType();
2139 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2140 isOperationCustom(Opc, ScalarVT);
2141}
2142
2144 const GlobalAddressSDNode *GA) const {
2145 // In order to maximise the opportunity for common subexpression elimination,
2146 // keep a separate ADD node for the global address offset instead of folding
2147 // it in the global address node. Later peephole optimisations may choose to
2148 // fold it back in when profitable.
2149 return false;
2150}
2151
2152// Returns 0-31 if the fli instruction is available for the type and this is
2153// legal FP immediate for the type. Returns -1 otherwise.
2155 if (!Subtarget.hasStdExtZfa())
2156 return -1;
2157
2158 bool IsSupportedVT = false;
2159 if (VT == MVT::f16) {
2160 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2161 } else if (VT == MVT::f32) {
2162 IsSupportedVT = true;
2163 } else if (VT == MVT::f64) {
2164 assert(Subtarget.hasStdExtD() && "Expect D extension");
2165 IsSupportedVT = true;
2166 }
2167
2168 if (!IsSupportedVT)
2169 return -1;
2170
2171 return RISCVLoadFPImm::getLoadFPImm(Imm);
2172}
2173
2175 bool ForCodeSize) const {
2176 bool IsLegalVT = false;
2177 if (VT == MVT::f16)
2178 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2179 else if (VT == MVT::f32)
2180 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2181 else if (VT == MVT::f64)
2182 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2183 else if (VT == MVT::bf16)
2184 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2185
2186 if (!IsLegalVT)
2187 return false;
2188
2189 if (getLegalZfaFPImm(Imm, VT) >= 0)
2190 return true;
2191
2192 // Cannot create a 64 bit floating-point immediate value for rv32.
2193 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2194 // td can handle +0.0 or -0.0 already.
2195 // -0.0 can be created by fmv + fneg.
2196 return Imm.isZero();
2197 }
2198
2199 // Special case: fmv + fneg
2200 if (Imm.isNegZero())
2201 return true;
2202
2203 // Building an integer and then converting requires a fmv at the end of
2204 // the integer sequence. The fmv is not required for Zfinx.
2205 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2206 const int Cost =
2207 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2208 Subtarget.getXLen(), Subtarget);
2209 return Cost <= FPImmCost;
2210}
2211
2212// TODO: This is very conservative.
2214 unsigned Index) const {
2216 return false;
2217
2218 // Only support extracting a fixed from a fixed vector for now.
2219 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2220 return false;
2221
2222 EVT EltVT = ResVT.getVectorElementType();
2223 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2224
2225 // The smallest type we can slide is i8.
2226 // TODO: We can extract index 0 from a mask vector without a slide.
2227 if (EltVT == MVT::i1)
2228 return false;
2229
2230 unsigned ResElts = ResVT.getVectorNumElements();
2231 unsigned SrcElts = SrcVT.getVectorNumElements();
2232
2233 unsigned MinVLen = Subtarget.getRealMinVLen();
2234 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2235
2236 // If we're extracting only data from the first VLEN bits of the source
2237 // then we can always do this with an m1 vslidedown.vx. Restricting the
2238 // Index ensures we can use a vslidedown.vi.
2239 // TODO: We can generalize this when the exact VLEN is known.
2240 if (Index + ResElts <= MinVLMAX && Index < 31)
2241 return true;
2242
2243 // Convervatively only handle extracting half of a vector.
2244 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2245 // the upper half of a vector until we have more test coverage.
2246 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2247 // a cheap extract. However, this case is important in practice for
2248 // shuffled extracts of longer vectors. How resolve?
2249 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2250}
2251
2254 EVT VT) const {
2255 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2256 // We might still end up using a GPR but that will be decided based on ABI.
2257 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2258 !Subtarget.hasStdExtZfhminOrZhinxmin())
2259 return MVT::f32;
2260
2262
2263 return PartVT;
2264}
2265
2266unsigned
2268 std::optional<MVT> RegisterVT) const {
2269 // Pair inline assembly operand
2270 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2271 *RegisterVT == MVT::Untyped)
2272 return 1;
2273
2274 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2275}
2276
2279 EVT VT) const {
2280 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2281 // We might still end up using a GPR but that will be decided based on ABI.
2282 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2283 !Subtarget.hasStdExtZfhminOrZhinxmin())
2284 return 1;
2285
2287}
2288
2290 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2291 unsigned &NumIntermediates, MVT &RegisterVT) const {
2293 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2294
2295 return NumRegs;
2296}
2297
2298// Changes the condition code and swaps operands if necessary, so the SetCC
2299// operation matches one of the comparisons supported directly by branches
2300// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2301// with 1/-1.
2302static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2303 ISD::CondCode &CC, SelectionDAG &DAG) {
2304 // If this is a single bit test that can't be handled by ANDI, shift the
2305 // bit to be tested to the MSB and perform a signed compare with 0.
2306 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2307 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2308 isa<ConstantSDNode>(LHS.getOperand(1))) {
2309 uint64_t Mask = LHS.getConstantOperandVal(1);
2310 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2311 unsigned ShAmt = 0;
2312 if (isPowerOf2_64(Mask)) {
2314 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2315 } else {
2316 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2317 }
2318
2319 LHS = LHS.getOperand(0);
2320 if (ShAmt != 0)
2321 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2322 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2323 return;
2324 }
2325 }
2326
2327 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2328 int64_t C = RHSC->getSExtValue();
2329 switch (CC) {
2330 default: break;
2331 case ISD::SETGT:
2332 // Convert X > -1 to X >= 0.
2333 if (C == -1) {
2334 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2335 CC = ISD::SETGE;
2336 return;
2337 }
2338 break;
2339 case ISD::SETLT:
2340 // Convert X < 1 to 0 >= X.
2341 if (C == 1) {
2342 RHS = LHS;
2343 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2344 CC = ISD::SETGE;
2345 return;
2346 }
2347 break;
2348 }
2349 }
2350
2351 switch (CC) {
2352 default:
2353 break;
2354 case ISD::SETGT:
2355 case ISD::SETLE:
2356 case ISD::SETUGT:
2357 case ISD::SETULE:
2359 std::swap(LHS, RHS);
2360 break;
2361 }
2362}
2363
2365 if (VT.isRISCVVectorTuple()) {
2366 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2367 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2368 return RISCVII::LMUL_F8;
2369 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2370 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2371 return RISCVII::LMUL_F4;
2372 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2373 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2374 return RISCVII::LMUL_F2;
2375 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2376 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2377 return RISCVII::LMUL_1;
2378 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2379 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2380 return RISCVII::LMUL_2;
2381 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2382 return RISCVII::LMUL_4;
2383 llvm_unreachable("Invalid vector tuple type LMUL.");
2384 }
2385
2386 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2387 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2388 if (VT.getVectorElementType() == MVT::i1)
2389 KnownSize *= 8;
2390
2391 switch (KnownSize) {
2392 default:
2393 llvm_unreachable("Invalid LMUL.");
2394 case 8:
2396 case 16:
2398 case 32:
2400 case 64:
2402 case 128:
2404 case 256:
2406 case 512:
2408 }
2409}
2410
2412 switch (LMul) {
2413 default:
2414 llvm_unreachable("Invalid LMUL.");
2419 return RISCV::VRRegClassID;
2421 return RISCV::VRM2RegClassID;
2423 return RISCV::VRM4RegClassID;
2425 return RISCV::VRM8RegClassID;
2426 }
2427}
2428
2429unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2430 RISCVII::VLMUL LMUL = getLMUL(VT);
2431 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2432 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2433 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2434 LMUL == RISCVII::VLMUL::LMUL_1) {
2435 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2436 "Unexpected subreg numbering");
2437 return RISCV::sub_vrm1_0 + Index;
2438 }
2439 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2440 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2441 "Unexpected subreg numbering");
2442 return RISCV::sub_vrm2_0 + Index;
2443 }
2444 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2445 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2446 "Unexpected subreg numbering");
2447 return RISCV::sub_vrm4_0 + Index;
2448 }
2449 llvm_unreachable("Invalid vector type.");
2450}
2451
2453 if (VT.isRISCVVectorTuple()) {
2454 unsigned NF = VT.getRISCVVectorTupleNumFields();
2455 unsigned RegsPerField =
2456 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2457 (NF * RISCV::RVVBitsPerBlock));
2458 switch (RegsPerField) {
2459 case 1:
2460 if (NF == 2)
2461 return RISCV::VRN2M1RegClassID;
2462 if (NF == 3)
2463 return RISCV::VRN3M1RegClassID;
2464 if (NF == 4)
2465 return RISCV::VRN4M1RegClassID;
2466 if (NF == 5)
2467 return RISCV::VRN5M1RegClassID;
2468 if (NF == 6)
2469 return RISCV::VRN6M1RegClassID;
2470 if (NF == 7)
2471 return RISCV::VRN7M1RegClassID;
2472 if (NF == 8)
2473 return RISCV::VRN8M1RegClassID;
2474 break;
2475 case 2:
2476 if (NF == 2)
2477 return RISCV::VRN2M2RegClassID;
2478 if (NF == 3)
2479 return RISCV::VRN3M2RegClassID;
2480 if (NF == 4)
2481 return RISCV::VRN4M2RegClassID;
2482 break;
2483 case 4:
2484 assert(NF == 2);
2485 return RISCV::VRN2M4RegClassID;
2486 default:
2487 break;
2488 }
2489 llvm_unreachable("Invalid vector tuple type RegClass.");
2490 }
2491
2492 if (VT.getVectorElementType() == MVT::i1)
2493 return RISCV::VRRegClassID;
2494 return getRegClassIDForLMUL(getLMUL(VT));
2495}
2496
2497// Attempt to decompose a subvector insert/extract between VecVT and
2498// SubVecVT via subregister indices. Returns the subregister index that
2499// can perform the subvector insert/extract with the given element index, as
2500// well as the index corresponding to any leftover subvectors that must be
2501// further inserted/extracted within the register class for SubVecVT.
2502std::pair<unsigned, unsigned>
2504 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2505 const RISCVRegisterInfo *TRI) {
2506 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2507 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2508 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2509 "Register classes not ordered");
2510 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2511 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2512
2513 // If VecVT is a vector tuple type, either it's the tuple type with same
2514 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2515 if (VecVT.isRISCVVectorTuple()) {
2516 if (VecRegClassID == SubRegClassID)
2517 return {RISCV::NoSubRegister, 0};
2518
2519 assert(SubVecVT.isScalableVector() &&
2520 "Only allow scalable vector subvector.");
2521 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2522 "Invalid vector tuple insert/extract for vector and subvector with "
2523 "different LMUL.");
2524 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2525 }
2526
2527 // Try to compose a subregister index that takes us from the incoming
2528 // LMUL>1 register class down to the outgoing one. At each step we half
2529 // the LMUL:
2530 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2531 // Note that this is not guaranteed to find a subregister index, such as
2532 // when we are extracting from one VR type to another.
2533 unsigned SubRegIdx = RISCV::NoSubRegister;
2534 for (const unsigned RCID :
2535 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2536 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2537 VecVT = VecVT.getHalfNumVectorElementsVT();
2538 bool IsHi =
2539 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2540 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2541 getSubregIndexByMVT(VecVT, IsHi));
2542 if (IsHi)
2543 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2544 }
2545 return {SubRegIdx, InsertExtractIdx};
2546}
2547
2548// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2549// stores for those types.
2550bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2551 return !Subtarget.useRVVForFixedLengthVectors() ||
2552 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2553}
2554
2556 if (!ScalarTy.isSimple())
2557 return false;
2558 switch (ScalarTy.getSimpleVT().SimpleTy) {
2559 case MVT::iPTR:
2560 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2561 case MVT::i8:
2562 case MVT::i16:
2563 case MVT::i32:
2564 return true;
2565 case MVT::i64:
2566 return Subtarget.hasVInstructionsI64();
2567 case MVT::f16:
2568 return Subtarget.hasVInstructionsF16Minimal();
2569 case MVT::bf16:
2570 return Subtarget.hasVInstructionsBF16Minimal();
2571 case MVT::f32:
2572 return Subtarget.hasVInstructionsF32();
2573 case MVT::f64:
2574 return Subtarget.hasVInstructionsF64();
2575 default:
2576 return false;
2577 }
2578}
2579
2580
2581unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2582 return NumRepeatedDivisors;
2583}
2584
2586 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2587 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2588 "Unexpected opcode");
2589 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2590 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2592 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2593 if (!II)
2594 return SDValue();
2595 return Op.getOperand(II->VLOperand + 1 + HasChain);
2596}
2597
2599 const RISCVSubtarget &Subtarget) {
2600 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2601 if (!Subtarget.useRVVForFixedLengthVectors())
2602 return false;
2603
2604 // We only support a set of vector types with a consistent maximum fixed size
2605 // across all supported vector element types to avoid legalization issues.
2606 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2607 // fixed-length vector type we support is 1024 bytes.
2608 if (VT.getFixedSizeInBits() > 1024 * 8)
2609 return false;
2610
2611 unsigned MinVLen = Subtarget.getRealMinVLen();
2612
2613 MVT EltVT = VT.getVectorElementType();
2614
2615 // Don't use RVV for vectors we cannot scalarize if required.
2616 switch (EltVT.SimpleTy) {
2617 // i1 is supported but has different rules.
2618 default:
2619 return false;
2620 case MVT::i1:
2621 // Masks can only use a single register.
2622 if (VT.getVectorNumElements() > MinVLen)
2623 return false;
2624 MinVLen /= 8;
2625 break;
2626 case MVT::i8:
2627 case MVT::i16:
2628 case MVT::i32:
2629 break;
2630 case MVT::i64:
2631 if (!Subtarget.hasVInstructionsI64())
2632 return false;
2633 break;
2634 case MVT::f16:
2635 if (!Subtarget.hasVInstructionsF16Minimal())
2636 return false;
2637 break;
2638 case MVT::bf16:
2639 if (!Subtarget.hasVInstructionsBF16Minimal())
2640 return false;
2641 break;
2642 case MVT::f32:
2643 if (!Subtarget.hasVInstructionsF32())
2644 return false;
2645 break;
2646 case MVT::f64:
2647 if (!Subtarget.hasVInstructionsF64())
2648 return false;
2649 break;
2650 }
2651
2652 // Reject elements larger than ELEN.
2653 if (EltVT.getSizeInBits() > Subtarget.getELen())
2654 return false;
2655
2656 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2657 // Don't use RVV for types that don't fit.
2658 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2659 return false;
2660
2661 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2662 // the base fixed length RVV support in place.
2663 if (!VT.isPow2VectorType())
2664 return false;
2665
2666 return true;
2667}
2668
2669bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2670 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2671}
2672
2673// Return the largest legal scalable vector type that matches VT's element type.
2675 const RISCVSubtarget &Subtarget) {
2676 // This may be called before legal types are setup.
2677 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2678 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2679 "Expected legal fixed length vector!");
2680
2681 unsigned MinVLen = Subtarget.getRealMinVLen();
2682 unsigned MaxELen = Subtarget.getELen();
2683
2684 MVT EltVT = VT.getVectorElementType();
2685 switch (EltVT.SimpleTy) {
2686 default:
2687 llvm_unreachable("unexpected element type for RVV container");
2688 case MVT::i1:
2689 case MVT::i8:
2690 case MVT::i16:
2691 case MVT::i32:
2692 case MVT::i64:
2693 case MVT::bf16:
2694 case MVT::f16:
2695 case MVT::f32:
2696 case MVT::f64: {
2697 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2698 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2699 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2700 unsigned NumElts =
2702 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2703 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2704 return MVT::getScalableVectorVT(EltVT, NumElts);
2705 }
2706 }
2707}
2708
2710 const RISCVSubtarget &Subtarget) {
2712 Subtarget);
2713}
2714
2716 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2717}
2718
2719// Grow V to consume an entire RVV register.
2721 const RISCVSubtarget &Subtarget) {
2722 assert(VT.isScalableVector() &&
2723 "Expected to convert into a scalable vector!");
2724 assert(V.getValueType().isFixedLengthVector() &&
2725 "Expected a fixed length vector operand!");
2726 SDLoc DL(V);
2727 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2728 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2729}
2730
2731// Shrink V so it's just big enough to maintain a VT's worth of data.
2733 const RISCVSubtarget &Subtarget) {
2735 "Expected to convert into a fixed length vector!");
2736 assert(V.getValueType().isScalableVector() &&
2737 "Expected a scalable vector operand!");
2738 SDLoc DL(V);
2739 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2740 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2741}
2742
2743/// Return the type of the mask type suitable for masking the provided
2744/// vector type. This is simply an i1 element type vector of the same
2745/// (possibly scalable) length.
2746static MVT getMaskTypeFor(MVT VecVT) {
2747 assert(VecVT.isVector());
2749 return MVT::getVectorVT(MVT::i1, EC);
2750}
2751
2752/// Creates an all ones mask suitable for masking a vector of type VecTy with
2753/// vector length VL. .
2754static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2755 SelectionDAG &DAG) {
2756 MVT MaskVT = getMaskTypeFor(VecVT);
2757 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2758}
2759
2760static std::pair<SDValue, SDValue>
2762 const RISCVSubtarget &Subtarget) {
2763 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2764 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2765 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2766 return {Mask, VL};
2767}
2768
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2771 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2772 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2773 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2774 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2775 return {Mask, VL};
2776}
2777
2778// Gets the two common "VL" operands: an all-ones mask and the vector length.
2779// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2780// the vector type that the fixed-length vector is contained in. Otherwise if
2781// VecVT is scalable, then ContainerVT should be the same as VecVT.
2782static std::pair<SDValue, SDValue>
2783getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2784 const RISCVSubtarget &Subtarget) {
2785 if (VecVT.isFixedLengthVector())
2786 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2787 Subtarget);
2788 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2789 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2790}
2791
2793 SelectionDAG &DAG) const {
2794 assert(VecVT.isScalableVector() && "Expected scalable vector");
2795 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2796 VecVT.getVectorElementCount());
2797}
2798
2799std::pair<unsigned, unsigned>
2801 const RISCVSubtarget &Subtarget) {
2802 assert(VecVT.isScalableVector() && "Expected scalable vector");
2803
2804 unsigned EltSize = VecVT.getScalarSizeInBits();
2805 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2806
2807 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2808 unsigned MaxVLMAX =
2809 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2810
2811 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2812 unsigned MinVLMAX =
2813 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2814
2815 return std::make_pair(MinVLMAX, MaxVLMAX);
2816}
2817
2818// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2819// of either is (currently) supported. This can get us into an infinite loop
2820// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2821// as a ..., etc.
2822// Until either (or both) of these can reliably lower any node, reporting that
2823// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2824// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2825// which is not desirable.
2827 EVT VT, unsigned DefinedValues) const {
2828 return false;
2829}
2830
2832 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2833 // implementation-defined.
2834 if (!VT.isVector())
2836 unsigned DLenFactor = Subtarget.getDLenFactor();
2837 unsigned Cost;
2838 if (VT.isScalableVector()) {
2839 unsigned LMul;
2840 bool Fractional;
2841 std::tie(LMul, Fractional) =
2843 if (Fractional)
2844 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2845 else
2846 Cost = (LMul * DLenFactor);
2847 } else {
2848 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2849 }
2850 return Cost;
2851}
2852
2853
2854/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2855/// is generally quadratic in the number of vreg implied by LMUL. Note that
2856/// operand (index and possibly mask) are handled separately.
2858 return getLMULCost(VT) * getLMULCost(VT);
2859}
2860
2861/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2862/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2863/// or may track the vrgather.vv cost. It is implementation-dependent.
2865 return getLMULCost(VT);
2866}
2867
2868/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2869/// for the type VT. (This does not cover the vslide1up or vslide1down
2870/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2871/// or may track the vrgather.vv cost. It is implementation-dependent.
2873 return getLMULCost(VT);
2874}
2875
2876/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2877/// for the type VT. (This does not cover the vslide1up or vslide1down
2878/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2879/// or may track the vrgather.vv cost. It is implementation-dependent.
2881 return getLMULCost(VT);
2882}
2883
2885 const RISCVSubtarget &Subtarget) {
2886 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2887 // bf16 conversions are always promoted to f32.
2888 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2889 Op.getValueType() == MVT::bf16) {
2890 bool IsStrict = Op->isStrictFPOpcode();
2891
2892 SDLoc DL(Op);
2893 if (IsStrict) {
2894 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2895 {Op.getOperand(0), Op.getOperand(1)});
2896 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2897 {Op.getValueType(), MVT::Other},
2898 {Val.getValue(1), Val.getValue(0),
2899 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2900 }
2901 return DAG.getNode(
2902 ISD::FP_ROUND, DL, Op.getValueType(),
2903 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2904 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2905 }
2906
2907 // Other operations are legal.
2908 return Op;
2909}
2910
2912 const RISCVSubtarget &Subtarget) {
2913 // RISC-V FP-to-int conversions saturate to the destination register size, but
2914 // don't produce 0 for nan. We can use a conversion instruction and fix the
2915 // nan case with a compare and a select.
2916 SDValue Src = Op.getOperand(0);
2917
2918 MVT DstVT = Op.getSimpleValueType();
2919 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2920
2921 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2922
2923 if (!DstVT.isVector()) {
2924 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
2925 // the result.
2926 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2927 Src.getValueType() == MVT::bf16) {
2928 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2929 }
2930
2931 unsigned Opc;
2932 if (SatVT == DstVT)
2933 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2934 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2936 else
2937 return SDValue();
2938 // FIXME: Support other SatVTs by clamping before or after the conversion.
2939
2940 SDLoc DL(Op);
2941 SDValue FpToInt = DAG.getNode(
2942 Opc, DL, DstVT, Src,
2944
2945 if (Opc == RISCVISD::FCVT_WU_RV64)
2946 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2947
2948 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2949 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2951 }
2952
2953 // Vectors.
2954
2955 MVT DstEltVT = DstVT.getVectorElementType();
2956 MVT SrcVT = Src.getSimpleValueType();
2957 MVT SrcEltVT = SrcVT.getVectorElementType();
2958 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2959 unsigned DstEltSize = DstEltVT.getSizeInBits();
2960
2961 // Only handle saturating to the destination type.
2962 if (SatVT != DstEltVT)
2963 return SDValue();
2964
2965 MVT DstContainerVT = DstVT;
2966 MVT SrcContainerVT = SrcVT;
2967 if (DstVT.isFixedLengthVector()) {
2968 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2969 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2970 assert(DstContainerVT.getVectorElementCount() ==
2971 SrcContainerVT.getVectorElementCount() &&
2972 "Expected same element count");
2973 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2974 }
2975
2976 SDLoc DL(Op);
2977
2978 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2979
2980 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2981 {Src, Src, DAG.getCondCode(ISD::SETNE),
2982 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2983
2984 // Need to widen by more than 1 step, promote the FP type, then do a widening
2985 // convert.
2986 if (DstEltSize > (2 * SrcEltSize)) {
2987 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2988 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2989 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2990 }
2991
2992 MVT CvtContainerVT = DstContainerVT;
2993 MVT CvtEltVT = DstEltVT;
2994 if (SrcEltSize > (2 * DstEltSize)) {
2995 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2996 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2997 }
2998
2999 unsigned RVVOpc =
3001 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3002
3003 while (CvtContainerVT != DstContainerVT) {
3004 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3005 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3006 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3007 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3009 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3010 }
3011
3012 SDValue SplatZero = DAG.getNode(
3013 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3014 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3015 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3016 Res, DAG.getUNDEF(DstContainerVT), VL);
3017
3018 if (DstVT.isFixedLengthVector())
3019 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3020
3021 return Res;
3022}
3023
3025 const RISCVSubtarget &Subtarget) {
3026 bool IsStrict = Op->isStrictFPOpcode();
3027 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3028
3029 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3030 // bf16 conversions are always promoted to f32.
3031 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3032 SrcVal.getValueType() == MVT::bf16) {
3033 SDLoc DL(Op);
3034 if (IsStrict) {
3035 SDValue Ext =
3036 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3037 {Op.getOperand(0), SrcVal});
3038 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3039 {Ext.getValue(1), Ext.getValue(0)});
3040 }
3041 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3042 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3043 }
3044
3045 // Other operations are legal.
3046 return Op;
3047}
3048
3050 switch (Opc) {
3051 case ISD::FROUNDEVEN:
3053 case ISD::VP_FROUNDEVEN:
3054 return RISCVFPRndMode::RNE;
3055 case ISD::FTRUNC:
3056 case ISD::STRICT_FTRUNC:
3057 case ISD::VP_FROUNDTOZERO:
3058 return RISCVFPRndMode::RTZ;
3059 case ISD::FFLOOR:
3060 case ISD::STRICT_FFLOOR:
3061 case ISD::VP_FFLOOR:
3062 return RISCVFPRndMode::RDN;
3063 case ISD::FCEIL:
3064 case ISD::STRICT_FCEIL:
3065 case ISD::VP_FCEIL:
3066 return RISCVFPRndMode::RUP;
3067 case ISD::FROUND:
3068 case ISD::STRICT_FROUND:
3069 case ISD::VP_FROUND:
3070 return RISCVFPRndMode::RMM;
3071 case ISD::FRINT:
3072 case ISD::VP_FRINT:
3073 return RISCVFPRndMode::DYN;
3074 }
3075
3077}
3078
3079// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3080// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3081// the integer domain and back. Taking care to avoid converting values that are
3082// nan or already correct.
3083static SDValue
3085 const RISCVSubtarget &Subtarget) {
3086 MVT VT = Op.getSimpleValueType();
3087 assert(VT.isVector() && "Unexpected type");
3088
3089 SDLoc DL(Op);
3090
3091 SDValue Src = Op.getOperand(0);
3092
3093 MVT ContainerVT = VT;
3094 if (VT.isFixedLengthVector()) {
3095 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3096 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3097 }
3098
3099 SDValue Mask, VL;
3100 if (Op->isVPOpcode()) {
3101 Mask = Op.getOperand(1);
3102 if (VT.isFixedLengthVector())
3103 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3104 Subtarget);
3105 VL = Op.getOperand(2);
3106 } else {
3107 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3108 }
3109
3110 // Freeze the source since we are increasing the number of uses.
3111 Src = DAG.getFreeze(Src);
3112
3113 // We do the conversion on the absolute value and fix the sign at the end.
3114 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3115
3116 // Determine the largest integer that can be represented exactly. This and
3117 // values larger than it don't have any fractional bits so don't need to
3118 // be converted.
3119 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3120 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3121 APFloat MaxVal = APFloat(FltSem);
3122 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3123 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3124 SDValue MaxValNode =
3125 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3126 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3127 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3128
3129 // If abs(Src) was larger than MaxVal or nan, keep it.
3130 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3131 Mask =
3132 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3133 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3134 Mask, Mask, VL});
3135
3136 // Truncate to integer and convert back to FP.
3137 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3138 MVT XLenVT = Subtarget.getXLenVT();
3139 SDValue Truncated;
3140
3141 switch (Op.getOpcode()) {
3142 default:
3143 llvm_unreachable("Unexpected opcode");
3144 case ISD::FRINT:
3145 case ISD::VP_FRINT:
3146 case ISD::FCEIL:
3147 case ISD::VP_FCEIL:
3148 case ISD::FFLOOR:
3149 case ISD::VP_FFLOOR:
3150 case ISD::FROUND:
3151 case ISD::FROUNDEVEN:
3152 case ISD::VP_FROUND:
3153 case ISD::VP_FROUNDEVEN:
3154 case ISD::VP_FROUNDTOZERO: {
3157 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3158 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3159 break;
3160 }
3161 case ISD::FTRUNC:
3162 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3163 Mask, VL);
3164 break;
3165 case ISD::FNEARBYINT:
3166 case ISD::VP_FNEARBYINT:
3167 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3168 Mask, VL);
3169 break;
3170 }
3171
3172 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3173 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3174 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3175 Mask, VL);
3176
3177 // Restore the original sign so that -0.0 is preserved.
3178 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3179 Src, Src, Mask, VL);
3180
3181 if (!VT.isFixedLengthVector())
3182 return Truncated;
3183
3184 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3185}
3186
3187// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3188// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3189// qNan and converting the new source to integer and back to FP.
3190static SDValue
3192 const RISCVSubtarget &Subtarget) {
3193 SDLoc DL(Op);
3194 MVT VT = Op.getSimpleValueType();
3195 SDValue Chain = Op.getOperand(0);
3196 SDValue Src = Op.getOperand(1);
3197
3198 MVT ContainerVT = VT;
3199 if (VT.isFixedLengthVector()) {
3200 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3201 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3202 }
3203
3204 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3205
3206 // Freeze the source since we are increasing the number of uses.
3207 Src = DAG.getFreeze(Src);
3208
3209 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3210 MVT MaskVT = Mask.getSimpleValueType();
3212 DAG.getVTList(MaskVT, MVT::Other),
3213 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3214 DAG.getUNDEF(MaskVT), Mask, VL});
3215 Chain = Unorder.getValue(1);
3217 DAG.getVTList(ContainerVT, MVT::Other),
3218 {Chain, Src, Src, Src, Unorder, VL});
3219 Chain = Src.getValue(1);
3220
3221 // We do the conversion on the absolute value and fix the sign at the end.
3222 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3223
3224 // Determine the largest integer that can be represented exactly. This and
3225 // values larger than it don't have any fractional bits so don't need to
3226 // be converted.
3227 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3228 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3229 APFloat MaxVal = APFloat(FltSem);
3230 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3231 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3232 SDValue MaxValNode =
3233 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3234 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3235 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3236
3237 // If abs(Src) was larger than MaxVal or nan, keep it.
3238 Mask = DAG.getNode(
3239 RISCVISD::SETCC_VL, DL, MaskVT,
3240 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3241
3242 // Truncate to integer and convert back to FP.
3243 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3244 MVT XLenVT = Subtarget.getXLenVT();
3245 SDValue Truncated;
3246
3247 switch (Op.getOpcode()) {
3248 default:
3249 llvm_unreachable("Unexpected opcode");
3250 case ISD::STRICT_FCEIL:
3251 case ISD::STRICT_FFLOOR:
3252 case ISD::STRICT_FROUND:
3256 Truncated = DAG.getNode(
3257 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3258 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3259 break;
3260 }
3261 case ISD::STRICT_FTRUNC:
3262 Truncated =
3264 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3265 break;
3268 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3269 Mask, VL);
3270 break;
3271 }
3272 Chain = Truncated.getValue(1);
3273
3274 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3275 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3276 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3277 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3278 Truncated, Mask, VL);
3279 Chain = Truncated.getValue(1);
3280 }
3281
3282 // Restore the original sign so that -0.0 is preserved.
3283 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3284 Src, Src, Mask, VL);
3285
3286 if (VT.isFixedLengthVector())
3287 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3288 return DAG.getMergeValues({Truncated, Chain}, DL);
3289}
3290
3291static SDValue
3293 const RISCVSubtarget &Subtarget) {
3294 MVT VT = Op.getSimpleValueType();
3295 if (VT.isVector())
3296 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3297
3298 if (DAG.shouldOptForSize())
3299 return SDValue();
3300
3301 SDLoc DL(Op);
3302 SDValue Src = Op.getOperand(0);
3303
3304 // Create an integer the size of the mantissa with the MSB set. This and all
3305 // values larger than it don't have any fractional bits so don't need to be
3306 // converted.
3307 const fltSemantics &FltSem = VT.getFltSemantics();
3308 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3309 APFloat MaxVal = APFloat(FltSem);
3310 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3311 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3312 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3313
3315 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3316 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3317}
3318
3319// Expand vector LRINT and LLRINT by converting to the integer domain.
3321 const RISCVSubtarget &Subtarget) {
3322 MVT VT = Op.getSimpleValueType();
3323 assert(VT.isVector() && "Unexpected type");
3324
3325 SDLoc DL(Op);
3326 SDValue Src = Op.getOperand(0);
3327 MVT ContainerVT = VT;
3328
3329 if (VT.isFixedLengthVector()) {
3330 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3331 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3332 }
3333
3334 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3335 SDValue Truncated = DAG.getNode(
3336 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3338 VL);
3339
3340 if (!VT.isFixedLengthVector())
3341 return Truncated;
3342
3343 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3344}
3345
3346static SDValue
3348 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3349 SDValue Offset, SDValue Mask, SDValue VL,
3351 if (Passthru.isUndef())
3353 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3354 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3355 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3356}
3357
3358static SDValue
3359getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3360 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3361 SDValue VL,
3363 if (Passthru.isUndef())
3365 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3366 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3367 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3368}
3369
3370static MVT getLMUL1VT(MVT VT) {
3372 "Unexpected vector MVT");
3376}
3377
3381 int64_t Addend;
3382};
3383
3384static std::optional<APInt> getExactInteger(const APFloat &APF,
3386 // We will use a SINT_TO_FP to materialize this constant so we should use a
3387 // signed APSInt here.
3388 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3389 // We use an arbitrary rounding mode here. If a floating-point is an exact
3390 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3391 // the rounding mode changes the output value, then it is not an exact
3392 // integer.
3394 bool IsExact;
3395 // If it is out of signed integer range, it will return an invalid operation.
3396 // If it is not an exact integer, IsExact is false.
3397 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3399 !IsExact)
3400 return std::nullopt;
3401 return ValInt.extractBits(BitWidth, 0);
3402}
3403
3404// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3405// to the (non-zero) step S and start value X. This can be then lowered as the
3406// RVV sequence (VID * S) + X, for example.
3407// The step S is represented as an integer numerator divided by a positive
3408// denominator. Note that the implementation currently only identifies
3409// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3410// cannot detect 2/3, for example.
3411// Note that this method will also match potentially unappealing index
3412// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3413// determine whether this is worth generating code for.
3414//
3415// EltSizeInBits is the size of the type that the sequence will be calculated
3416// in, i.e. SEW for build_vectors or XLEN for address calculations.
3417static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3418 unsigned EltSizeInBits) {
3419 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3420 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3421 return std::nullopt;
3422 bool IsInteger = Op.getValueType().isInteger();
3423
3424 std::optional<unsigned> SeqStepDenom;
3425 std::optional<APInt> SeqStepNum;
3426 std::optional<APInt> SeqAddend;
3427 std::optional<std::pair<APInt, unsigned>> PrevElt;
3428 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3429
3430 // First extract the ops into a list of constant integer values. This may not
3431 // be possible for floats if they're not all representable as integers.
3433 const unsigned OpSize = Op.getScalarValueSizeInBits();
3434 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3435 if (Elt.isUndef()) {
3436 Elts[Idx] = std::nullopt;
3437 continue;
3438 }
3439 if (IsInteger) {
3440 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3441 } else {
3442 auto ExactInteger =
3443 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3444 if (!ExactInteger)
3445 return std::nullopt;
3446 Elts[Idx] = *ExactInteger;
3447 }
3448 }
3449
3450 for (auto [Idx, Elt] : enumerate(Elts)) {
3451 // Assume undef elements match the sequence; we just have to be careful
3452 // when interpolating across them.
3453 if (!Elt)
3454 continue;
3455
3456 if (PrevElt) {
3457 // Calculate the step since the last non-undef element, and ensure
3458 // it's consistent across the entire sequence.
3459 unsigned IdxDiff = Idx - PrevElt->second;
3460 APInt ValDiff = *Elt - PrevElt->first;
3461
3462 // A zero-value value difference means that we're somewhere in the middle
3463 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3464 // step change before evaluating the sequence.
3465 if (ValDiff == 0)
3466 continue;
3467
3468 int64_t Remainder = ValDiff.srem(IdxDiff);
3469 // Normalize the step if it's greater than 1.
3470 if (Remainder != ValDiff.getSExtValue()) {
3471 // The difference must cleanly divide the element span.
3472 if (Remainder != 0)
3473 return std::nullopt;
3474 ValDiff = ValDiff.sdiv(IdxDiff);
3475 IdxDiff = 1;
3476 }
3477
3478 if (!SeqStepNum)
3479 SeqStepNum = ValDiff;
3480 else if (ValDiff != SeqStepNum)
3481 return std::nullopt;
3482
3483 if (!SeqStepDenom)
3484 SeqStepDenom = IdxDiff;
3485 else if (IdxDiff != *SeqStepDenom)
3486 return std::nullopt;
3487 }
3488
3489 // Record this non-undef element for later.
3490 if (!PrevElt || PrevElt->first != *Elt)
3491 PrevElt = std::make_pair(*Elt, Idx);
3492 }
3493
3494 // We need to have logged a step for this to count as a legal index sequence.
3495 if (!SeqStepNum || !SeqStepDenom)
3496 return std::nullopt;
3497
3498 // Loop back through the sequence and validate elements we might have skipped
3499 // while waiting for a valid step. While doing this, log any sequence addend.
3500 for (auto [Idx, Elt] : enumerate(Elts)) {
3501 if (!Elt)
3502 continue;
3503 APInt ExpectedVal =
3504 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3505 *SeqStepNum)
3506 .sdiv(*SeqStepDenom);
3507
3508 APInt Addend = *Elt - ExpectedVal;
3509 if (!SeqAddend)
3510 SeqAddend = Addend;
3511 else if (Addend != SeqAddend)
3512 return std::nullopt;
3513 }
3514
3515 assert(SeqAddend && "Must have an addend if we have a step");
3516
3517 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3518 SeqAddend->getSExtValue()};
3519}
3520
3521// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3522// and lower it as a VRGATHER_VX_VL from the source vector.
3523static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3524 SelectionDAG &DAG,
3525 const RISCVSubtarget &Subtarget) {
3526 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3527 return SDValue();
3528 SDValue Src = SplatVal.getOperand(0);
3529 // Don't perform this optimization for i1 vectors, or if the element types are
3530 // different
3531 // FIXME: Support i1 vectors, maybe by promoting to i8?
3532 MVT EltTy = VT.getVectorElementType();
3533 MVT SrcVT = Src.getSimpleValueType();
3534 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3535 return SDValue();
3536 SDValue Idx = SplatVal.getOperand(1);
3537 // The index must be a legal type.
3538 if (Idx.getValueType() != Subtarget.getXLenVT())
3539 return SDValue();
3540
3541 // Check that we know Idx lies within VT
3542 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3543 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3544 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3545 return SDValue();
3546 }
3547
3548 // Convert fixed length vectors to scalable
3549 MVT ContainerVT = VT;
3550 if (VT.isFixedLengthVector())
3551 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3552
3553 MVT SrcContainerVT = SrcVT;
3554 if (SrcVT.isFixedLengthVector()) {
3555 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3556 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3557 }
3558
3559 // Put Vec in a VT sized vector
3560 if (SrcContainerVT.getVectorMinNumElements() <
3561 ContainerVT.getVectorMinNumElements())
3562 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3563 DAG.getUNDEF(ContainerVT), Src,
3564 DAG.getVectorIdxConstant(0, DL));
3565 else
3566 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3567 DAG.getVectorIdxConstant(0, DL));
3568
3569 // We checked that Idx fits inside VT earlier
3570 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3571 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3572 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3573 if (VT.isFixedLengthVector())
3574 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3575 return Gather;
3576}
3577
3578/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3579/// which constitute a large proportion of the elements. In such cases we can
3580/// splat a vector with the dominant element and make up the shortfall with
3581/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3582/// Note that this includes vectors of 2 elements by association. The
3583/// upper-most element is the "dominant" one, allowing us to use a splat to
3584/// "insert" the upper element, and an insert of the lower element at position
3585/// 0, which improves codegen.
3587 const RISCVSubtarget &Subtarget) {
3588 MVT VT = Op.getSimpleValueType();
3589 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3590
3591 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3592
3593 SDLoc DL(Op);
3594 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3595
3596 MVT XLenVT = Subtarget.getXLenVT();
3597 unsigned NumElts = Op.getNumOperands();
3598
3599 SDValue DominantValue;
3600 unsigned MostCommonCount = 0;
3601 DenseMap<SDValue, unsigned> ValueCounts;
3602 unsigned NumUndefElts =
3603 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3604
3605 // Track the number of scalar loads we know we'd be inserting, estimated as
3606 // any non-zero floating-point constant. Other kinds of element are either
3607 // already in registers or are materialized on demand. The threshold at which
3608 // a vector load is more desirable than several scalar materializion and
3609 // vector-insertion instructions is not known.
3610 unsigned NumScalarLoads = 0;
3611
3612 for (SDValue V : Op->op_values()) {
3613 if (V.isUndef())
3614 continue;
3615
3616 unsigned &Count = ValueCounts[V];
3617 if (0 == Count)
3618 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3619 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3620
3621 // Is this value dominant? In case of a tie, prefer the highest element as
3622 // it's cheaper to insert near the beginning of a vector than it is at the
3623 // end.
3624 if (++Count >= MostCommonCount) {
3625 DominantValue = V;
3626 MostCommonCount = Count;
3627 }
3628 }
3629
3630 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3631 unsigned NumDefElts = NumElts - NumUndefElts;
3632 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3633
3634 // Don't perform this optimization when optimizing for size, since
3635 // materializing elements and inserting them tends to cause code bloat.
3636 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3637 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3638 ((MostCommonCount > DominantValueCountThreshold) ||
3639 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3640 // Start by splatting the most common element.
3641 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3642
3643 DenseSet<SDValue> Processed{DominantValue};
3644
3645 // We can handle an insert into the last element (of a splat) via
3646 // v(f)slide1down. This is slightly better than the vslideup insert
3647 // lowering as it avoids the need for a vector group temporary. It
3648 // is also better than using vmerge.vx as it avoids the need to
3649 // materialize the mask in a vector register.
3650 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3651 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3652 LastOp != DominantValue) {
3653 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3654 auto OpCode =
3656 if (!VT.isFloatingPoint())
3657 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3658 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3659 LastOp, Mask, VL);
3660 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3661 Processed.insert(LastOp);
3662 }
3663
3664 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3665 for (const auto &OpIdx : enumerate(Op->ops())) {
3666 const SDValue &V = OpIdx.value();
3667 if (V.isUndef() || !Processed.insert(V).second)
3668 continue;
3669 if (ValueCounts[V] == 1) {
3670 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3671 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3672 } else {
3673 // Blend in all instances of this value using a VSELECT, using a
3674 // mask where each bit signals whether that element is the one
3675 // we're after.
3677 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3678 return DAG.getConstant(V == V1, DL, XLenVT);
3679 });
3680 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3681 DAG.getBuildVector(SelMaskTy, DL, Ops),
3682 DAG.getSplatBuildVector(VT, DL, V), Vec);
3683 }
3684 }
3685
3686 return Vec;
3687 }
3688
3689 return SDValue();
3690}
3691
3693 const RISCVSubtarget &Subtarget) {
3694 MVT VT = Op.getSimpleValueType();
3695 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3696
3697 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3698
3699 SDLoc DL(Op);
3700 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3701
3702 MVT XLenVT = Subtarget.getXLenVT();
3703 unsigned NumElts = Op.getNumOperands();
3704
3705 if (VT.getVectorElementType() == MVT::i1) {
3706 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3707 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3708 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3709 }
3710
3711 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3712 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3713 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3714 }
3715
3716 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3717 // scalar integer chunks whose bit-width depends on the number of mask
3718 // bits and XLEN.
3719 // First, determine the most appropriate scalar integer type to use. This
3720 // is at most XLenVT, but may be shrunk to a smaller vector element type
3721 // according to the size of the final vector - use i8 chunks rather than
3722 // XLenVT if we're producing a v8i1. This results in more consistent
3723 // codegen across RV32 and RV64.
3724 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3725 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3726 // If we have to use more than one INSERT_VECTOR_ELT then this
3727 // optimization is likely to increase code size; avoid performing it in
3728 // such a case. We can use a load from a constant pool in this case.
3729 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3730 return SDValue();
3731 // Now we can create our integer vector type. Note that it may be larger
3732 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3733 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3734 MVT IntegerViaVecVT =
3735 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3736 IntegerViaVecElts);
3737
3738 uint64_t Bits = 0;
3739 unsigned BitPos = 0, IntegerEltIdx = 0;
3740 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3741
3742 for (unsigned I = 0; I < NumElts;) {
3743 SDValue V = Op.getOperand(I);
3744 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3745 Bits |= ((uint64_t)BitValue << BitPos);
3746 ++BitPos;
3747 ++I;
3748
3749 // Once we accumulate enough bits to fill our scalar type or process the
3750 // last element, insert into our vector and clear our accumulated data.
3751 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3752 if (NumViaIntegerBits <= 32)
3753 Bits = SignExtend64<32>(Bits);
3754 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3755 Elts[IntegerEltIdx] = Elt;
3756 Bits = 0;
3757 BitPos = 0;
3758 IntegerEltIdx++;
3759 }
3760 }
3761
3762 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3763
3764 if (NumElts < NumViaIntegerBits) {
3765 // If we're producing a smaller vector than our minimum legal integer
3766 // type, bitcast to the equivalent (known-legal) mask type, and extract
3767 // our final mask.
3768 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3769 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3770 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3771 DAG.getConstant(0, DL, XLenVT));
3772 } else {
3773 // Else we must have produced an integer type with the same size as the
3774 // mask type; bitcast for the final result.
3775 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3776 Vec = DAG.getBitcast(VT, Vec);
3777 }
3778
3779 return Vec;
3780 }
3781
3782 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3783 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3785 if (!VT.isFloatingPoint())
3786 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3787 Splat =
3788 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3789 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3790 }
3791
3792 // Try and match index sequences, which we can lower to the vid instruction
3793 // with optional modifications. An all-undef vector is matched by
3794 // getSplatValue, above.
3795 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3796 int64_t StepNumerator = SimpleVID->StepNumerator;
3797 unsigned StepDenominator = SimpleVID->StepDenominator;
3798 int64_t Addend = SimpleVID->Addend;
3799
3800 assert(StepNumerator != 0 && "Invalid step");
3801 bool Negate = false;
3802 int64_t SplatStepVal = StepNumerator;
3803 unsigned StepOpcode = ISD::MUL;
3804 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3805 // anyway as the shift of 63 won't fit in uimm5.
3806 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3807 isPowerOf2_64(std::abs(StepNumerator))) {
3808 Negate = StepNumerator < 0;
3809 StepOpcode = ISD::SHL;
3810 SplatStepVal = Log2_64(std::abs(StepNumerator));
3811 }
3812
3813 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3814 // threshold since it's the immediate value many RVV instructions accept.
3815 // There is no vmul.vi instruction so ensure multiply constant can fit in
3816 // a single addi instruction.
3817 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3818 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3819 isPowerOf2_32(StepDenominator) &&
3820 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3821 MVT VIDVT =
3823 MVT VIDContainerVT =
3824 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3825 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3826 // Convert right out of the scalable type so we can use standard ISD
3827 // nodes for the rest of the computation. If we used scalable types with
3828 // these, we'd lose the fixed-length vector info and generate worse
3829 // vsetvli code.
3830 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3831 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3832 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3833 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3834 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3835 }
3836 if (StepDenominator != 1) {
3837 SDValue SplatStep =
3838 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3839 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3840 }
3841 if (Addend != 0 || Negate) {
3842 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3843 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3844 VID);
3845 }
3846 if (VT.isFloatingPoint()) {
3847 // TODO: Use vfwcvt to reduce register pressure.
3848 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3849 }
3850 return VID;
3851 }
3852 }
3853
3854 // For very small build_vectors, use a single scalar insert of a constant.
3855 // TODO: Base this on constant rematerialization cost, not size.
3856 const unsigned EltBitSize = VT.getScalarSizeInBits();
3857 if (VT.getSizeInBits() <= 32 &&
3859 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3860 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3861 "Unexpected sequence type");
3862 // If we can use the original VL with the modified element type, this
3863 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3864 // be moved into InsertVSETVLI?
3865 unsigned ViaVecLen =
3866 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3867 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3868
3869 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3870 uint64_t SplatValue = 0;
3871 // Construct the amalgamated value at this larger vector type.
3872 for (const auto &OpIdx : enumerate(Op->op_values())) {
3873 const auto &SeqV = OpIdx.value();
3874 if (!SeqV.isUndef())
3875 SplatValue |=
3876 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3877 }
3878
3879 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3880 // achieve better constant materializion.
3881 // On RV32, we need to sign-extend to use getSignedConstant.
3882 if (ViaIntVT == MVT::i32)
3883 SplatValue = SignExtend64<32>(SplatValue);
3884
3885 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3886 DAG.getUNDEF(ViaVecVT),
3887 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3888 DAG.getVectorIdxConstant(0, DL));
3889 if (ViaVecLen != 1)
3891 MVT::getVectorVT(ViaIntVT, 1), Vec,
3892 DAG.getConstant(0, DL, XLenVT));
3893 return DAG.getBitcast(VT, Vec);
3894 }
3895
3896
3897 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3898 // when re-interpreted as a vector with a larger element type. For example,
3899 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3900 // could be instead splat as
3901 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3902 // TODO: This optimization could also work on non-constant splats, but it
3903 // would require bit-manipulation instructions to construct the splat value.
3904 SmallVector<SDValue> Sequence;
3905 const auto *BV = cast<BuildVectorSDNode>(Op);
3906 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3908 BV->getRepeatedSequence(Sequence) &&
3909 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3910 unsigned SeqLen = Sequence.size();
3911 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3912 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3913 ViaIntVT == MVT::i64) &&
3914 "Unexpected sequence type");
3915
3916 // If we can use the original VL with the modified element type, this
3917 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3918 // be moved into InsertVSETVLI?
3919 const unsigned RequiredVL = NumElts / SeqLen;
3920 const unsigned ViaVecLen =
3921 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3922 NumElts : RequiredVL;
3923 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3924
3925 unsigned EltIdx = 0;
3926 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3927 uint64_t SplatValue = 0;
3928 // Construct the amalgamated value which can be splatted as this larger
3929 // vector type.
3930 for (const auto &SeqV : Sequence) {
3931 if (!SeqV.isUndef())
3932 SplatValue |=
3933 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3934 EltIdx++;
3935 }
3936
3937 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3938 // achieve better constant materializion.
3939 // On RV32, we need to sign-extend to use getSignedConstant.
3940 if (ViaIntVT == MVT::i32)
3941 SplatValue = SignExtend64<32>(SplatValue);
3942
3943 // Since we can't introduce illegal i64 types at this stage, we can only
3944 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3945 // way we can use RVV instructions to splat.
3946 assert((ViaIntVT.bitsLE(XLenVT) ||
3947 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3948 "Unexpected bitcast sequence");
3949 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3950 SDValue ViaVL =
3951 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3952 MVT ViaContainerVT =
3953 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3954 SDValue Splat =
3955 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3956 DAG.getUNDEF(ViaContainerVT),
3957 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3958 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3959 if (ViaVecLen != RequiredVL)
3961 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3962 DAG.getConstant(0, DL, XLenVT));
3963 return DAG.getBitcast(VT, Splat);
3964 }
3965 }
3966
3967 // If the number of signbits allows, see if we can lower as a <N x i8>.
3968 // Our main goal here is to reduce LMUL (and thus work) required to
3969 // build the constant, but we will also narrow if the resulting
3970 // narrow vector is known to materialize cheaply.
3971 // TODO: We really should be costing the smaller vector. There are
3972 // profitable cases this misses.
3973 if (EltBitSize > 8 && VT.isInteger() &&
3974 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3975 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3976 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3977 DL, Op->ops());
3978 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3979 Source, DAG, Subtarget);
3980 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3981 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3982 }
3983
3984 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3985 return Res;
3986
3987 // For constant vectors, use generic constant pool lowering. Otherwise,
3988 // we'd have to materialize constants in GPRs just to move them into the
3989 // vector.
3990 return SDValue();
3991}
3992
3993static unsigned getPACKOpcode(unsigned DestBW,
3994 const RISCVSubtarget &Subtarget) {
3995 switch (DestBW) {
3996 default:
3997 llvm_unreachable("Unsupported pack size");
3998 case 16:
3999 return RISCV::PACKH;
4000 case 32:
4001 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4002 case 64:
4003 assert(Subtarget.is64Bit());
4004 return RISCV::PACK;
4005 }
4006}
4007
4008/// Double the element size of the build vector to reduce the number
4009/// of vslide1down in the build vector chain. In the worst case, this
4010/// trades three scalar operations for 1 vector operation. Scalar
4011/// operations are generally lower latency, and for out-of-order cores
4012/// we also benefit from additional parallelism.
4014 const RISCVSubtarget &Subtarget) {
4015 SDLoc DL(Op);
4016 MVT VT = Op.getSimpleValueType();
4017 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4018 MVT ElemVT = VT.getVectorElementType();
4019 if (!ElemVT.isInteger())
4020 return SDValue();
4021
4022 // TODO: Relax these architectural restrictions, possibly with costing
4023 // of the actual instructions required.
4024 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4025 return SDValue();
4026
4027 unsigned NumElts = VT.getVectorNumElements();
4028 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4029 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4030 NumElts % 2 != 0)
4031 return SDValue();
4032
4033 // Produce [B,A] packed into a type twice as wide. Note that all
4034 // scalars are XLenVT, possibly masked (see below).
4035 MVT XLenVT = Subtarget.getXLenVT();
4036 SDValue Mask = DAG.getConstant(
4037 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4038 auto pack = [&](SDValue A, SDValue B) {
4039 // Bias the scheduling of the inserted operations to near the
4040 // definition of the element - this tends to reduce register
4041 // pressure overall.
4042 SDLoc ElemDL(B);
4043 if (Subtarget.hasStdExtZbkb())
4044 // Note that we're relying on the high bits of the result being
4045 // don't care. For PACKW, the result is *sign* extended.
4046 return SDValue(
4047 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4048 ElemDL, XLenVT, A, B),
4049 0);
4050
4051 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4052 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4053 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4054 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4055 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4057 };
4058
4059 SmallVector<SDValue> NewOperands;
4060 NewOperands.reserve(NumElts / 2);
4061 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4062 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4063 assert(NumElts == NewOperands.size() * 2);
4064 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4065 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4066 return DAG.getNode(ISD::BITCAST, DL, VT,
4067 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4068}
4069
4071 const RISCVSubtarget &Subtarget) {
4072 MVT VT = Op.getSimpleValueType();
4073 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4074
4075 MVT EltVT = VT.getVectorElementType();
4076 MVT XLenVT = Subtarget.getXLenVT();
4077
4078 SDLoc DL(Op);
4079
4080 // Proper support for f16 requires Zvfh. bf16 always requires special
4081 // handling. We need to cast the scalar to integer and create an integer
4082 // build_vector.
4083 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4084 MVT IVT = VT.changeVectorElementType(MVT::i16);
4086 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4087 SDValue Elem = Op.getOperand(I);
4088 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4089 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4090 // Called by LegalizeDAG, we need to use XLenVT operations since we
4091 // can't create illegal types.
4092 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4093 // Manually constant fold so the integer build_vector can be lowered
4094 // better. Waiting for DAGCombine will be too late.
4095 APInt V =
4096 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4097 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4098 } else {
4099 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4100 }
4101 } else {
4102 // Called by scalar type legalizer, we can use i16.
4103 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4104 }
4105 }
4106 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4107 return DAG.getBitcast(VT, Res);
4108 }
4109
4110 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4112 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4113
4114 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4115
4116 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4117
4118 if (VT.getVectorElementType() == MVT::i1) {
4119 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4120 // vector type, we have a legal equivalently-sized i8 type, so we can use
4121 // that.
4122 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4123 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4124
4125 SDValue WideVec;
4126 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4127 // For a splat, perform a scalar truncate before creating the wider
4128 // vector.
4129 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4130 DAG.getConstant(1, DL, Splat.getValueType()));
4131 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4132 } else {
4133 SmallVector<SDValue, 8> Ops(Op->op_values());
4134 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4135 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4136 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4137 }
4138
4139 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4140 }
4141
4142 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4143 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4144 return Gather;
4145 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4147 if (!VT.isFloatingPoint())
4148 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4149 Splat =
4150 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4151 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4152 }
4153
4154 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4155 return Res;
4156
4157 // If we're compiling for an exact VLEN value, we can split our work per
4158 // register in the register group.
4159 if (const auto VLen = Subtarget.getRealVLen();
4160 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4161 MVT ElemVT = VT.getVectorElementType();
4162 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4163 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4164 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4165 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4166 assert(M1VT == getLMUL1VT(M1VT));
4167
4168 // The following semantically builds up a fixed length concat_vector
4169 // of the component build_vectors. We eagerly lower to scalable and
4170 // insert_subvector here to avoid DAG combining it back to a large
4171 // build_vector.
4172 SmallVector<SDValue> BuildVectorOps(Op->ops());
4173 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4174 SDValue Vec = DAG.getUNDEF(ContainerVT);
4175 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4176 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4177 SDValue SubBV =
4178 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4179 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4180 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4181 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4182 DAG.getVectorIdxConstant(InsertIdx, DL));
4183 }
4184 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4185 }
4186
4187 // If we're about to resort to vslide1down (or stack usage), pack our
4188 // elements into the widest scalar type we can. This will force a VL/VTYPE
4189 // toggle, but reduces the critical path, the number of vslide1down ops
4190 // required, and possibly enables scalar folds of the values.
4191 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4192 return Res;
4193
4194 // For m1 vectors, if we have non-undef values in both halves of our vector,
4195 // split the vector into low and high halves, build them separately, then
4196 // use a vselect to combine them. For long vectors, this cuts the critical
4197 // path of the vslide1down sequence in half, and gives us an opportunity
4198 // to special case each half independently. Note that we don't change the
4199 // length of the sub-vectors here, so if both fallback to the generic
4200 // vslide1down path, we should be able to fold the vselect into the final
4201 // vslidedown (for the undef tail) for the first half w/ masking.
4202 unsigned NumElts = VT.getVectorNumElements();
4203 unsigned NumUndefElts =
4204 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4205 unsigned NumDefElts = NumElts - NumUndefElts;
4206 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4207 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4208 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4209 SmallVector<SDValue> MaskVals;
4210 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4211 SubVecAOps.reserve(NumElts);
4212 SubVecBOps.reserve(NumElts);
4213 for (unsigned i = 0; i < NumElts; i++) {
4214 SDValue Elem = Op->getOperand(i);
4215 if (i < NumElts / 2) {
4216 SubVecAOps.push_back(Elem);
4217 SubVecBOps.push_back(UndefElem);
4218 } else {
4219 SubVecAOps.push_back(UndefElem);
4220 SubVecBOps.push_back(Elem);
4221 }
4222 bool SelectMaskVal = (i < NumElts / 2);
4223 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4224 }
4225 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4226 MaskVals.size() == NumElts);
4227
4228 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4229 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4230 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4231 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4232 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4233 }
4234
4235 // Cap the cost at a value linear to the number of elements in the vector.
4236 // The default lowering is to use the stack. The vector store + scalar loads
4237 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4238 // being (at least) linear in LMUL. As a result, using the vslidedown
4239 // lowering for every element ends up being VL*LMUL..
4240 // TODO: Should we be directly costing the stack alternative? Doing so might
4241 // give us a more accurate upper bound.
4242 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4243
4244 // TODO: unify with TTI getSlideCost.
4245 InstructionCost PerSlideCost = 1;
4246 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4247 default: break;
4249 PerSlideCost = 2;
4250 break;
4252 PerSlideCost = 4;
4253 break;
4255 PerSlideCost = 8;
4256 break;
4257 }
4258
4259 // TODO: Should we be using the build instseq then cost + evaluate scheme
4260 // we use for integer constants here?
4261 unsigned UndefCount = 0;
4262 for (const SDValue &V : Op->ops()) {
4263 if (V.isUndef()) {
4264 UndefCount++;
4265 continue;
4266 }
4267 if (UndefCount) {
4268 LinearBudget -= PerSlideCost;
4269 UndefCount = 0;
4270 }
4271 LinearBudget -= PerSlideCost;
4272 }
4273 if (UndefCount) {
4274 LinearBudget -= PerSlideCost;
4275 }
4276
4277 if (LinearBudget < 0)
4278 return SDValue();
4279
4280 assert((!VT.isFloatingPoint() ||
4281 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4282 "Illegal type which will result in reserved encoding");
4283
4284 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4285
4286 SDValue Vec;
4287 UndefCount = 0;
4288 for (SDValue V : Op->ops()) {
4289 if (V.isUndef()) {
4290 UndefCount++;
4291 continue;
4292 }
4293
4294 // Start our sequence with a TA splat in the hopes that hardware is able to
4295 // recognize there's no dependency on the prior value of our temporary
4296 // register.
4297 if (!Vec) {
4298 Vec = DAG.getSplatVector(VT, DL, V);
4299 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4300 UndefCount = 0;
4301 continue;
4302 }
4303
4304 if (UndefCount) {
4305 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4306 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4307 Vec, Offset, Mask, VL, Policy);
4308 UndefCount = 0;
4309 }
4310 auto OpCode =
4312 if (!VT.isFloatingPoint())
4313 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4314 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4315 V, Mask, VL);
4316 }
4317 if (UndefCount) {
4318 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4319 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4320 Vec, Offset, Mask, VL, Policy);
4321 }
4322 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4323}
4324
4325static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4327 SelectionDAG &DAG) {
4328 if (!Passthru)
4329 Passthru = DAG.getUNDEF(VT);
4330 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4331 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4332 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4333 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4334 // node in order to try and match RVV vector/scalar instructions.
4335 if ((LoC >> 31) == HiC)
4336 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4337
4338 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4339 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4340 // vlmax vsetvli or vsetivli to change the VL.
4341 // FIXME: Support larger constants?
4342 // FIXME: Support non-constant VLs by saturating?
4343 if (LoC == HiC) {
4344 SDValue NewVL;
4345 if (isAllOnesConstant(VL) ||
4346 (isa<RegisterSDNode>(VL) &&
4347 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4348 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4349 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4350 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4351
4352 if (NewVL) {
4353 MVT InterVT =
4354 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4355 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4356 DAG.getUNDEF(InterVT), Lo, NewVL);
4357 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4358 }
4359 }
4360 }
4361
4362 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4363 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4364 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4365 Hi.getConstantOperandVal(1) == 31)
4366 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4367
4368 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4369 // even if it might be sign extended.
4370 if (Hi.isUndef())
4371 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4372
4373 // Fall back to a stack store and stride x0 vector load.
4374 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4375 Hi, VL);
4376}
4377
4378// Called by type legalization to handle splat of i64 on RV32.
4379// FIXME: We can optimize this when the type has sign or zero bits in one
4380// of the halves.
4381static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4382 SDValue Scalar, SDValue VL,
4383 SelectionDAG &DAG) {
4384 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4385 SDValue Lo, Hi;
4386 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4387 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4388}
4389
4390// This function lowers a splat of a scalar operand Splat with the vector
4391// length VL. It ensures the final sequence is type legal, which is useful when
4392// lowering a splat after type legalization.
4393static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4394 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4395 const RISCVSubtarget &Subtarget) {
4396 bool HasPassthru = Passthru && !Passthru.isUndef();
4397 if (!HasPassthru && !Passthru)
4398 Passthru = DAG.getUNDEF(VT);
4399
4400 MVT EltVT = VT.getVectorElementType();
4401 MVT XLenVT = Subtarget.getXLenVT();
4402
4403 if (VT.isFloatingPoint()) {
4404 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4405 EltVT == MVT::bf16) {
4406 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4407 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4408 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4409 else
4410 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4411 MVT IVT = VT.changeVectorElementType(MVT::i16);
4412 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4413 SDValue Splat =
4414 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4415 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4416 }
4417 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4418 }
4419
4420 // Simplest case is that the operand needs to be promoted to XLenVT.
4421 if (Scalar.getValueType().bitsLE(XLenVT)) {
4422 // If the operand is a constant, sign extend to increase our chances
4423 // of being able to use a .vi instruction. ANY_EXTEND would become a
4424 // a zero extend and the simm5 check in isel would fail.
4425 // FIXME: Should we ignore the upper bits in isel instead?
4426 unsigned ExtOpc =
4427 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4428 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4429 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4430 }
4431
4432 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4433 "Unexpected scalar for splat lowering!");
4434
4435 if (isOneConstant(VL) && isNullConstant(Scalar))
4436 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4437 DAG.getConstant(0, DL, XLenVT), VL);
4438
4439 // Otherwise use the more complicated splatting algorithm.
4440 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4441}
4442
4443// This function lowers an insert of a scalar operand Scalar into lane
4444// 0 of the vector regardless of the value of VL. The contents of the
4445// remaining lanes of the result vector are unspecified. VL is assumed
4446// to be non-zero.
4448 const SDLoc &DL, SelectionDAG &DAG,
4449 const RISCVSubtarget &Subtarget) {
4450 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4451
4452 const MVT XLenVT = Subtarget.getXLenVT();
4453 SDValue Passthru = DAG.getUNDEF(VT);
4454
4455 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4456 isNullConstant(Scalar.getOperand(1))) {
4457 SDValue ExtractedVal = Scalar.getOperand(0);
4458 // The element types must be the same.
4459 if (ExtractedVal.getValueType().getVectorElementType() ==
4460 VT.getVectorElementType()) {
4461 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4462 MVT ExtractedContainerVT = ExtractedVT;
4463 if (ExtractedContainerVT.isFixedLengthVector()) {
4464 ExtractedContainerVT = getContainerForFixedLengthVector(
4465 DAG, ExtractedContainerVT, Subtarget);
4466 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4467 ExtractedVal, DAG, Subtarget);
4468 }
4469 if (ExtractedContainerVT.bitsLE(VT))
4470 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4471 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4472 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4473 DAG.getVectorIdxConstant(0, DL));
4474 }
4475 }
4476
4477
4478 if (VT.isFloatingPoint())
4479 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4480 DAG.getUNDEF(VT), Scalar, VL);
4481
4482 // Avoid the tricky legalization cases by falling back to using the
4483 // splat code which already handles it gracefully.
4484 if (!Scalar.getValueType().bitsLE(XLenVT))
4485 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4486 DAG.getConstant(1, DL, XLenVT),
4487 VT, DL, DAG, Subtarget);
4488
4489 // If the operand is a constant, sign extend to increase our chances
4490 // of being able to use a .vi instruction. ANY_EXTEND would become a
4491 // a zero extend and the simm5 check in isel would fail.
4492 // FIXME: Should we ignore the upper bits in isel instead?
4493 unsigned ExtOpc =
4494 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4495 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4496 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4497 VL);
4498}
4499
4500// Can this shuffle be performed on exactly one (possibly larger) input?
4501static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4502 SDValue V2) {
4503
4504 if (V2.isUndef() &&
4506 return V1;
4507
4508 // Both input must be extracts.
4509 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4510 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4511 return SDValue();
4512
4513 // Extracting from the same source.
4514 SDValue Src = V1.getOperand(0);
4515 if (Src != V2.getOperand(0))
4516 return SDValue();
4517
4518 // Src needs to have twice the number of elements.
4519 unsigned NumElts = VT.getVectorNumElements();
4520 if (!Src.getValueType().isFixedLengthVector() ||
4521 Src.getValueType().getVectorNumElements() != (NumElts * 2))
4522 return SDValue();
4523
4524 // The extracts must extract the two halves of the source.
4525 if (V1.getConstantOperandVal(1) != 0 ||
4526 V2.getConstantOperandVal(1) != NumElts)
4527 return SDValue();
4528
4529 return Src;
4530}
4531
4532/// Is this shuffle interleaving contiguous elements from one vector into the
4533/// even elements and contiguous elements from another vector into the odd
4534/// elements. \p EvenSrc will contain the element that should be in the first
4535/// even element. \p OddSrc will contain the element that should be in the first
4536/// odd element. These can be the first element in a source or the element half
4537/// way through the source.
4538static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4539 int &OddSrc, const RISCVSubtarget &Subtarget) {
4540 // We need to be able to widen elements to the next larger integer type.
4541 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4542 return false;
4543
4544 int Size = Mask.size();
4545 int NumElts = VT.getVectorNumElements();
4546 assert(Size == (int)NumElts && "Unexpected mask size");
4547
4548 SmallVector<unsigned, 2> StartIndexes;
4549 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4550 return false;
4551
4552 EvenSrc = StartIndexes[0];
4553 OddSrc = StartIndexes[1];
4554
4555 // One source should be low half of first vector.
4556 if (EvenSrc != 0 && OddSrc != 0)
4557 return false;
4558
4559 // Subvectors will be subtracted from either at the start of the two input
4560 // vectors, or at the start and middle of the first vector if it's an unary
4561 // interleave.
4562 // In both cases, HalfNumElts will be extracted.
4563 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4564 // we'll create an illegal extract_subvector.
4565 // FIXME: We could support other values using a slidedown first.
4566 int HalfNumElts = NumElts / 2;
4567 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4568}
4569
4570/// Match shuffles that concatenate two vectors, rotate the concatenation,
4571/// and then extract the original number of elements from the rotated result.
4572/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4573/// returned rotation amount is for a rotate right, where elements move from
4574/// higher elements to lower elements. \p LoSrc indicates the first source
4575/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4576/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4577/// 0 or 1 if a rotation is found.
4578///
4579/// NOTE: We talk about rotate to the right which matches how bit shift and
4580/// rotate instructions are described where LSBs are on the right, but LLVM IR
4581/// and the table below write vectors with the lowest elements on the left.
4582static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4583 int Size = Mask.size();
4584
4585 // We need to detect various ways of spelling a rotation:
4586 // [11, 12, 13, 14, 15, 0, 1, 2]
4587 // [-1, 12, 13, 14, -1, -1, 1, -1]
4588 // [-1, -1, -1, -1, -1, -1, 1, 2]
4589 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4590 // [-1, 4, 5, 6, -1, -1, 9, -1]
4591 // [-1, 4, 5, 6, -1, -1, -1, -1]
4592 int Rotation = 0;
4593 LoSrc = -1;
4594 HiSrc = -1;
4595 for (int i = 0; i != Size; ++i) {
4596 int M = Mask[i];
4597 if (M < 0)
4598 continue;
4599
4600 // Determine where a rotate vector would have started.
4601 int StartIdx = i - (M % Size);
4602 // The identity rotation isn't interesting, stop.
4603 if (StartIdx == 0)
4604 return -1;
4605
4606 // If we found the tail of a vector the rotation must be the missing
4607 // front. If we found the head of a vector, it must be how much of the
4608 // head.
4609 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4610
4611 if (Rotation == 0)
4612 Rotation = CandidateRotation;
4613 else if (Rotation != CandidateRotation)
4614 // The rotations don't match, so we can't match this mask.
4615 return -1;
4616
4617 // Compute which value this mask is pointing at.
4618 int MaskSrc = M < Size ? 0 : 1;
4619
4620 // Compute which of the two target values this index should be assigned to.
4621 // This reflects whether the high elements are remaining or the low elements
4622 // are remaining.
4623 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4624
4625 // Either set up this value if we've not encountered it before, or check
4626 // that it remains consistent.
4627 if (TargetSrc < 0)
4628 TargetSrc = MaskSrc;
4629 else if (TargetSrc != MaskSrc)
4630 // This may be a rotation, but it pulls from the inputs in some
4631 // unsupported interleaving.
4632 return -1;
4633 }
4634
4635 // Check that we successfully analyzed the mask, and normalize the results.
4636 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4637 assert((LoSrc >= 0 || HiSrc >= 0) &&
4638 "Failed to find a rotated input vector!");
4639
4640 return Rotation;
4641}
4642
4643// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4644// 2, 4, 8 and the integer type Factor-times larger than VT's
4645// element type must be a legal element type.
4646// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4647// -> [p, q, r, s] (Factor=2, Index=1)
4649 SDValue Src, unsigned Factor,
4650 unsigned Index, SelectionDAG &DAG) {
4651 unsigned EltBits = VT.getScalarSizeInBits();
4652 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4653 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4654 SrcEC.divideCoefficientBy(Factor));
4655 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4656 SrcEC.divideCoefficientBy(Factor));
4657 Src = DAG.getBitcast(WideSrcVT, Src);
4658
4659 unsigned Shift = Index * EltBits;
4660 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4661 DAG.getConstant(Shift, DL, WideSrcVT));
4662 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4664 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4665 DAG.getVectorIdxConstant(0, DL));
4666 return DAG.getBitcast(VT, Res);
4667}
4668
4669// Lower the following shuffle to vslidedown.
4670// a)
4671// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4672// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4673// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4674// b)
4675// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4676// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4677// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4678// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4679// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4680// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4682 SDValue V1, SDValue V2,
4683 ArrayRef<int> Mask,
4684 const RISCVSubtarget &Subtarget,
4685 SelectionDAG &DAG) {
4686 auto findNonEXTRACT_SUBVECTORParent =
4687 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4688 uint64_t Offset = 0;
4689 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4690 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4691 // a scalable vector. But we don't want to match the case.
4692 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4693 Offset += Parent.getConstantOperandVal(1);
4694 Parent = Parent.getOperand(0);
4695 }
4696 return std::make_pair(Parent, Offset);
4697 };
4698
4699 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4700 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4701
4702 // Extracting from the same source.
4703 SDValue Src = V1Src;
4704 if (Src != V2Src)
4705 return SDValue();
4706
4707 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4708 SmallVector<int, 16> NewMask(Mask);
4709 for (size_t i = 0; i != NewMask.size(); ++i) {
4710 if (NewMask[i] == -1)
4711 continue;
4712
4713 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4714 NewMask[i] = NewMask[i] + V1IndexOffset;
4715 } else {
4716 // Minus NewMask.size() is needed. Otherwise, the b case would be
4717 // <5,6,7,12> instead of <5,6,7,8>.
4718 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4719 }
4720 }
4721
4722 // First index must be known and non-zero. It will be used as the slidedown
4723 // amount.
4724 if (NewMask[0] <= 0)
4725 return SDValue();
4726
4727 // NewMask is also continuous.
4728 for (unsigned i = 1; i != NewMask.size(); ++i)
4729 if (NewMask[i - 1] + 1 != NewMask[i])
4730 return SDValue();
4731
4732 MVT XLenVT = Subtarget.getXLenVT();
4733 MVT SrcVT = Src.getSimpleValueType();
4734 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4735 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4736 SDValue Slidedown =
4737 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4738 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4739 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4740 return DAG.getNode(
4742 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4743 DAG.getConstant(0, DL, XLenVT));
4744}
4745
4746// Because vslideup leaves the destination elements at the start intact, we can
4747// use it to perform shuffles that insert subvectors:
4748//
4749// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4750// ->
4751// vsetvli zero, 8, e8, mf2, ta, ma
4752// vslideup.vi v8, v9, 4
4753//
4754// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4755// ->
4756// vsetvli zero, 5, e8, mf2, tu, ma
4757// vslideup.v1 v8, v9, 2
4759 SDValue V1, SDValue V2,
4760 ArrayRef<int> Mask,
4761 const RISCVSubtarget &Subtarget,
4762 SelectionDAG &DAG) {
4763 unsigned NumElts = VT.getVectorNumElements();
4764 int NumSubElts, Index;
4765 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4766 Index))
4767 return SDValue();
4768
4769 bool OpsSwapped = Mask[Index] < (int)NumElts;
4770 SDValue InPlace = OpsSwapped ? V2 : V1;
4771 SDValue ToInsert = OpsSwapped ? V1 : V2;
4772
4773 MVT XLenVT = Subtarget.getXLenVT();
4774 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4775 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4776 // We slide up by the index that the subvector is being inserted at, and set
4777 // VL to the index + the number of elements being inserted.
4779 // If the we're adding a suffix to the in place vector, i.e. inserting right
4780 // up to the very end of it, then we don't actually care about the tail.
4781 if (NumSubElts + Index >= (int)NumElts)
4782 Policy |= RISCVII::TAIL_AGNOSTIC;
4783
4784 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4785 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4786 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4787
4788 SDValue Res;
4789 // If we're inserting into the lowest elements, use a tail undisturbed
4790 // vmv.v.v.
4791 if (Index == 0)
4792 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4793 VL);
4794 else
4795 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4796 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4797 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4798}
4799
4800/// Match v(f)slide1up/down idioms. These operations involve sliding
4801/// N-1 elements to make room for an inserted scalar at one end.
4803 SDValue V1, SDValue V2,
4804 ArrayRef<int> Mask,
4805 const RISCVSubtarget &Subtarget,
4806 SelectionDAG &DAG) {
4807 bool OpsSwapped = false;
4808 if (!isa<BuildVectorSDNode>(V1)) {
4809 if (!isa<BuildVectorSDNode>(V2))
4810 return SDValue();
4811 std::swap(V1, V2);
4812 OpsSwapped = true;
4813 }
4814 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4815 if (!Splat)
4816 return SDValue();
4817
4818 // Return true if the mask could describe a slide of Mask.size() - 1
4819 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4820 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4821 const unsigned S = (Offset > 0) ? 0 : -Offset;
4822 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4823 for (unsigned i = S; i != E; ++i)
4824 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4825 return false;
4826 return true;
4827 };
4828
4829 const unsigned NumElts = VT.getVectorNumElements();
4830 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4831 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4832 return SDValue();
4833
4834 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4835 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4836 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4837 return SDValue();
4838
4839 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4840 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4841
4842 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4843 // vslide1{down,up}.vx instead.
4844 if (VT.getVectorElementType() == MVT::bf16 ||
4845 (VT.getVectorElementType() == MVT::f16 &&
4846 !Subtarget.hasVInstructionsF16())) {
4847 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4848 Splat =
4849 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4850 V2 = DAG.getBitcast(
4851 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4852 SDValue Vec = DAG.getNode(
4854 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4855 Vec = DAG.getBitcast(ContainerVT, Vec);
4856 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4857 }
4858
4859 auto OpCode = IsVSlidedown ?
4862 if (!VT.isFloatingPoint())
4863 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4864 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4865 DAG.getUNDEF(ContainerVT),
4866 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4867 Splat, TrueMask, VL);
4868 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4869}
4870
4871// Match a mask which "spreads" the leading elements of a vector evenly
4872// across the result. Factor is the spread amount, and Index is the
4873// offset applied. (on success, Index < Factor) This is the inverse
4874// of a deinterleave with the same Factor and Index. This is analogous
4875// to an interleave, except that all but one lane is undef.
4876static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4877 SmallVector<bool> LaneIsUndef(Factor, true);
4878 for (unsigned i = 0; i < Mask.size(); i++)
4879 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4880
4881 bool Found = false;
4882 for (unsigned i = 0; i < Factor; i++) {
4883 if (LaneIsUndef[i])
4884 continue;
4885 if (Found)
4886 return false;
4887 Index = i;
4888 Found = true;
4889 }
4890 if (!Found)
4891 return false;
4892
4893 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4894 unsigned j = i * Factor + Index;
4895 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4896 return false;
4897 }
4898 return true;
4899}
4900
4901// Given a vector a, b, c, d return a vector Factor times longer
4902// with Factor-1 undef's between elements. Ex:
4903// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4904// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4905static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4906 const SDLoc &DL, SelectionDAG &DAG) {
4907
4908 MVT VT = V.getSimpleValueType();
4909 unsigned EltBits = VT.getScalarSizeInBits();
4911 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4912
4913 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4914
4915 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4916 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4917 // allow the SHL to fold away if Index is 0.
4918 if (Index != 0)
4919 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4920 DAG.getConstant(EltBits * Index, DL, WideVT));
4921 // Make sure to use original element type
4923 EC.multiplyCoefficientBy(Factor));
4924 return DAG.getBitcast(ResultVT, Result);
4925}
4926
4927// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4928// to create an interleaved vector of <[vscale x] n*2 x ty>.
4929// This requires that the size of ty is less than the subtarget's maximum ELEN.
4931 const SDLoc &DL, SelectionDAG &DAG,
4932 const RISCVSubtarget &Subtarget) {
4933
4934 // FIXME: Not only does this optimize the code, it fixes some correctness
4935 // issues because MIR does not have freeze.
4936 if (EvenV.isUndef())
4937 return getWideningSpread(OddV, 2, 1, DL, DAG);
4938 if (OddV.isUndef())
4939 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4940
4941 MVT VecVT = EvenV.getSimpleValueType();
4942 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4943 // Convert fixed vectors to scalable if needed
4944 if (VecContainerVT.isFixedLengthVector()) {
4945 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4946 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4947 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4948 }
4949
4950 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4951
4952 // We're working with a vector of the same size as the resulting
4953 // interleaved vector, but with half the number of elements and
4954 // twice the SEW (Hence the restriction on not using the maximum
4955 // ELEN)
4956 MVT WideVT =
4958 VecVT.getVectorElementCount());
4959 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4960 if (WideContainerVT.isFixedLengthVector())
4961 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4962
4963 // Bitcast the input vectors to integers in case they are FP
4964 VecContainerVT = VecContainerVT.changeTypeToInteger();
4965 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4966 OddV = DAG.getBitcast(VecContainerVT, OddV);
4967
4968 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4969 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4970
4971 SDValue Interleaved;
4972 if (Subtarget.hasStdExtZvbb()) {
4973 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4974 SDValue OffsetVec =
4975 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4976 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4977 OffsetVec, Passthru, Mask, VL);
4978 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4979 Interleaved, EvenV, Passthru, Mask, VL);
4980 } else {
4981 // FIXME: We should freeze the odd vector here. We already handled the case
4982 // of provably undef/poison above.
4983
4984 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4985 // vwaddu.vv
4986 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4987 OddV, Passthru, Mask, VL);
4988
4989 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4990 SDValue AllOnesVec = DAG.getSplatVector(
4991 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4992 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4993 OddV, AllOnesVec, Passthru, Mask, VL);
4994
4995 // Add the two together so we get
4996 // (OddV * 0xff...ff) + (OddV + EvenV)
4997 // = (OddV * 0x100...00) + EvenV
4998 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4999 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5000 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5001 Interleaved, OddsMul, Passthru, Mask, VL);
5002 }
5003
5004 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5005 MVT ResultContainerVT = MVT::getVectorVT(
5006 VecVT.getVectorElementType(), // Make sure to use original type
5007 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5008 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5009
5010 // Convert back to a fixed vector if needed
5011 MVT ResultVT =
5014 if (ResultVT.isFixedLengthVector())
5015 Interleaved =
5016 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5017
5018 return Interleaved;
5019}
5020
5021// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5022// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5024 SelectionDAG &DAG,
5025 const RISCVSubtarget &Subtarget) {
5026 SDLoc DL(SVN);
5027 MVT VT = SVN->getSimpleValueType(0);
5028 SDValue V = SVN->getOperand(0);
5029 unsigned NumElts = VT.getVectorNumElements();
5030
5031 assert(VT.getVectorElementType() == MVT::i1);
5032
5034 SVN->getMask().size()) ||
5035 !SVN->getOperand(1).isUndef())
5036 return SDValue();
5037
5038 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5039 EVT ViaVT = EVT::getVectorVT(
5040 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5041 EVT ViaBitVT =
5042 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5043
5044 // If we don't have zvbb or the larger element type > ELEN, the operation will
5045 // be illegal.
5047 ViaVT) ||
5048 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5049 return SDValue();
5050
5051 // If the bit vector doesn't fit exactly into the larger element type, we need
5052 // to insert it into the larger vector and then shift up the reversed bits
5053 // afterwards to get rid of the gap introduced.
5054 if (ViaEltSize > NumElts)
5055 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5056 V, DAG.getVectorIdxConstant(0, DL));
5057
5058 SDValue Res =
5059 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5060
5061 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5062 // element type.
5063 if (ViaEltSize > NumElts)
5064 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5065 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5066
5067 Res = DAG.getBitcast(ViaBitVT, Res);
5068
5069 if (ViaEltSize > NumElts)
5070 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5071 DAG.getVectorIdxConstant(0, DL));
5072 return Res;
5073}
5074
5076 SelectionDAG &DAG,
5077 const RISCVSubtarget &Subtarget,
5078 MVT &RotateVT, unsigned &RotateAmt) {
5079 SDLoc DL(SVN);
5080
5081 EVT VT = SVN->getValueType(0);
5082 unsigned NumElts = VT.getVectorNumElements();
5083 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5084 unsigned NumSubElts;
5085 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5086 NumElts, NumSubElts, RotateAmt))
5087 return false;
5088 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5089 NumElts / NumSubElts);
5090
5091 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5092 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5093}
5094
5095// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5096// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5097// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5099 SelectionDAG &DAG,
5100 const RISCVSubtarget &Subtarget) {
5101 SDLoc DL(SVN);
5102
5103 EVT VT = SVN->getValueType(0);
5104 unsigned RotateAmt;
5105 MVT RotateVT;
5106 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5107 return SDValue();
5108
5109 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5110
5111 SDValue Rotate;
5112 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5113 // so canonicalize to vrev8.
5114 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5115 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5116 else
5117 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5118 DAG.getConstant(RotateAmt, DL, RotateVT));
5119
5120 return DAG.getBitcast(VT, Rotate);
5121}
5122
5123// If compiling with an exactly known VLEN, see if we can split a
5124// shuffle on m2 or larger into a small number of m1 sized shuffles
5125// which write each destination registers exactly once.
5127 SelectionDAG &DAG,
5128 const RISCVSubtarget &Subtarget) {
5129 SDLoc DL(SVN);
5130 MVT VT = SVN->getSimpleValueType(0);
5131 SDValue V1 = SVN->getOperand(0);
5132 SDValue V2 = SVN->getOperand(1);
5133 ArrayRef<int> Mask = SVN->getMask();
5134
5135 // If we don't know exact data layout, not much we can do. If this
5136 // is already m1 or smaller, no point in splitting further.
5137 const auto VLen = Subtarget.getRealVLen();
5138 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5139 return SDValue();
5140
5141 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5142 // expansion for.
5143 unsigned RotateAmt;
5144 MVT RotateVT;
5145 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5146 return SDValue();
5147
5148 MVT ElemVT = VT.getVectorElementType();
5149 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5150
5151 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5152 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5153 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5154 assert(M1VT == getLMUL1VT(M1VT));
5155 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5156 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5157 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5158 unsigned NumOfDestRegs = NumElts / NumOpElts;
5159 // The following semantically builds up a fixed length concat_vector
5160 // of the component shuffle_vectors. We eagerly lower to scalable here
5161 // to avoid DAG combining it back to a large shuffle_vector again.
5162 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5163 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5165 Operands;
5167 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5168 [&]() { Operands.emplace_back(); },
5169 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5170 Operands.emplace_back().emplace_back(
5171 SrcVecIdx, UINT_MAX,
5172 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5173 },
5174 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5175 if (NewReg)
5176 Operands.emplace_back();
5177 Operands.back().emplace_back(
5178 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5179 });
5180 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5181 // Note: check that we do not emit too many shuffles here to prevent code
5182 // size explosion.
5183 // TODO: investigate, if it can be improved by extra analysis of the masks to
5184 // check if the code is more profitable.
5185 unsigned NumShuffles = std::accumulate(
5186 Operands.begin(), Operands.end(), 0u,
5187 [&](unsigned N,
5188 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5189 if (Data.empty())
5190 return N;
5191 N += Data.size();
5192 for (const auto &P : Data) {
5193 unsigned Idx2 = std::get<1>(P);
5194 ArrayRef<int> Mask = std::get<2>(P);
5195 if (Idx2 != UINT_MAX)
5196 ++N;
5197 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5198 --N;
5199 }
5200 return N;
5201 });
5202 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5203 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5204 return SDValue();
5205 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5206 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5207 DAG.getVectorIdxConstant(ExtractIdx, DL));
5208 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5209 return SubVec;
5210 };
5211 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5213 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5214 return SubVec;
5215 };
5216 SDValue Vec = DAG.getUNDEF(ContainerVT);
5217 for (auto [I, Data] : enumerate(Operands)) {
5218 if (Data.empty())
5219 continue;
5221 for (unsigned I : seq<unsigned>(Data.size())) {
5222 const auto &[Idx1, Idx2, _] = Data[I];
5223 // If the shuffle contains permutation of odd number of elements,
5224 // Idx1 might be used already in the first iteration.
5225 //
5226 // Idx1 = shuffle Idx1, Idx2
5227 // Idx1 = shuffle Idx1, Idx3
5228 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5229 if (!V)
5230 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5231 (Idx1 % NumOfSrcRegs) * NumOpElts);
5232 if (Idx2 != UINT_MAX) {
5233 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5234 if (!V)
5235 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5236 (Idx2 % NumOfSrcRegs) * NumOpElts);
5237 }
5238 }
5239 SDValue V;
5240 for (const auto &[Idx1, Idx2, Mask] : Data) {
5241 SDValue V1 = Values.at(Idx1);
5242 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5243 V = PerformShuffle(V1, V2, Mask);
5244 Values[Idx1] = V;
5245 }
5246
5247 unsigned InsertIdx = I * NumOpElts;
5248 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5249 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5250 DAG.getVectorIdxConstant(InsertIdx, DL));
5251 }
5252 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5253}
5254
5255// Matches a subset of compress masks with a contiguous prefix of output
5256// elements. This could be extended to allow gaps by deciding which
5257// source elements to spuriously demand.
5259 int Last = -1;
5260 bool SawUndef = false;
5261 for (unsigned i = 0; i < Mask.size(); i++) {
5262 if (Mask[i] == -1) {
5263 SawUndef = true;
5264 continue;
5265 }
5266 if (SawUndef)
5267 return false;
5268 if (i > (unsigned)Mask[i])
5269 return false;
5270 if (Mask[i] <= Last)
5271 return false;
5272 Last = Mask[i];
5273 }
5274 return true;
5275}
5276
5277/// Given a shuffle where the indices are disjoint between the two sources,
5278/// e.g.:
5279///
5280/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5281///
5282/// Merge the two sources into one and do a single source shuffle:
5283///
5284/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5285/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5286///
5287/// A vselect will either be merged into a masked instruction or be lowered as a
5288/// vmerge.vvm, which is cheaper than a vrgather.vv.
5290 SelectionDAG &DAG,
5291 const RISCVSubtarget &Subtarget) {
5292 MVT VT = SVN->getSimpleValueType(0);
5293 MVT XLenVT = Subtarget.getXLenVT();
5294 SDLoc DL(SVN);
5295
5296 const ArrayRef<int> Mask = SVN->getMask();
5297
5298 // Work out which source each lane will come from.
5299 SmallVector<int, 16> Srcs(Mask.size(), -1);
5300
5301 for (int Idx : Mask) {
5302 if (Idx == -1)
5303 continue;
5304 unsigned SrcIdx = Idx % Mask.size();
5305 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5306 if (Srcs[SrcIdx] == -1)
5307 // Mark this source as using this lane.
5308 Srcs[SrcIdx] = Src;
5309 else if (Srcs[SrcIdx] != Src)
5310 // The other source is using this lane: not disjoint.
5311 return SDValue();
5312 }
5313
5314 SmallVector<SDValue> SelectMaskVals;
5315 for (int Lane : Srcs) {
5316 if (Lane == -1)
5317 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5318 else
5319 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5320 }
5321 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5322 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5323 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5324 SVN->getOperand(0), SVN->getOperand(1));
5325
5326 // Move all indices relative to the first source.
5327 SmallVector<int> NewMask(Mask.size());
5328 for (unsigned I = 0; I < Mask.size(); I++) {
5329 if (Mask[I] == -1)
5330 NewMask[I] = -1;
5331 else
5332 NewMask[I] = Mask[I] % Mask.size();
5333 }
5334
5335 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5336}
5337
5338/// Is this mask local (i.e. elements only move within their local span), and
5339/// repeating (that is, the same rearrangement is being done within each span)?
5340static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5341 SmallVector<int> LowSpan(Span, -1);
5342 for (auto [I, M] : enumerate(Mask)) {
5343 if (M == -1)
5344 continue;
5345 if ((M / Span) != (int)(I / Span))
5346 return false;
5347 int SpanIdx = I % Span;
5348 int Expected = M % Span;
5349 if (LowSpan[SpanIdx] == -1)
5350 LowSpan[SpanIdx] = Expected;
5351 if (LowSpan[SpanIdx] != Expected)
5352 return false;
5353 }
5354 return true;
5355}
5356
5357/// Is this mask only using elements from the first span of the input?
5358static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5359 return all_of(Mask,
5360 [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5361}
5362
5363/// Try to widen element type to get a new mask value for a better permutation
5364/// sequence. This doesn't try to inspect the widened mask for profitability;
5365/// we speculate the widened form is equal or better. This has the effect of
5366/// reducing mask constant sizes - allowing cheaper materialization sequences
5367/// - and index sequence sizes - reducing register pressure and materialization
5368/// cost, at the cost of (possibly) an extra VTYPE toggle.
5370 SDLoc DL(Op);
5371 MVT VT = Op.getSimpleValueType();
5372 MVT ScalarVT = VT.getVectorElementType();
5373 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5374 SDValue V0 = Op.getOperand(0);
5375 SDValue V1 = Op.getOperand(1);
5376 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5377
5378 // Avoid wasted work leading to isTypeLegal check failing below
5379 if (ElementSize > 32)
5380 return SDValue();
5381
5382 SmallVector<int, 8> NewMask;
5383 if (!widenShuffleMaskElts(Mask, NewMask))
5384 return SDValue();
5385
5386 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5387 : MVT::getIntegerVT(ElementSize * 2);
5388 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5389 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5390 return SDValue();
5391 V0 = DAG.getBitcast(NewVT, V0);
5392 V1 = DAG.getBitcast(NewVT, V1);
5393 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5394}
5395
5397 const RISCVSubtarget &Subtarget) {
5398 SDValue V1 = Op.getOperand(0);
5399 SDValue V2 = Op.getOperand(1);
5400 SDLoc DL(Op);
5401 MVT XLenVT = Subtarget.getXLenVT();
5402 MVT VT = Op.getSimpleValueType();
5403 unsigned NumElts = VT.getVectorNumElements();
5404 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5405
5406 if (VT.getVectorElementType() == MVT::i1) {
5407 // Lower to a vror.vi of a larger element type if possible before we promote
5408 // i1s to i8s.
5409 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5410 return V;
5411 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5412 return V;
5413
5414 // Promote i1 shuffle to i8 shuffle.
5415 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5416 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5417 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5418 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5419 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5420 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5421 ISD::SETNE);
5422 }
5423
5424 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5425
5426 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5427
5428 if (SVN->isSplat()) {
5429 const int Lane = SVN->getSplatIndex();
5430 if (Lane >= 0) {
5431 MVT SVT = VT.getVectorElementType();
5432
5433 // Turn splatted vector load into a strided load with an X0 stride.
5434 SDValue V = V1;
5435 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5436 // with undef.
5437 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5438 int Offset = Lane;
5439 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5440 int OpElements =
5441 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5442 V = V.getOperand(Offset / OpElements);
5443 Offset %= OpElements;
5444 }
5445
5446 // We need to ensure the load isn't atomic or volatile.
5447 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5448 auto *Ld = cast<LoadSDNode>(V);
5449 Offset *= SVT.getStoreSize();
5450 SDValue NewAddr = DAG.getMemBasePlusOffset(
5451 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5452
5453 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5454 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5455 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5456 SDValue IntID =
5457 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5458 SDValue Ops[] = {Ld->getChain(),
5459 IntID,
5460 DAG.getUNDEF(ContainerVT),
5461 NewAddr,
5462 DAG.getRegister(RISCV::X0, XLenVT),
5463 VL};
5464 SDValue NewLoad = DAG.getMemIntrinsicNode(
5465 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5467 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5468 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5469 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5470 }
5471
5472 MVT SplatVT = ContainerVT;
5473
5474 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5475 if (SVT == MVT::bf16 ||
5476 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5477 SVT = MVT::i16;
5478 SplatVT = ContainerVT.changeVectorElementType(SVT);
5479 }
5480
5481 // Otherwise use a scalar load and splat. This will give the best
5482 // opportunity to fold a splat into the operation. ISel can turn it into
5483 // the x0 strided load if we aren't able to fold away the select.
5484 if (SVT.isFloatingPoint())
5485 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5486 Ld->getPointerInfo().getWithOffset(Offset),
5487 Ld->getOriginalAlign(),
5488 Ld->getMemOperand()->getFlags());
5489 else
5490 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5491 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5492 Ld->getOriginalAlign(),
5493 Ld->getMemOperand()->getFlags());
5495
5496 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5498 SDValue Splat =
5499 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5500 Splat = DAG.getBitcast(ContainerVT, Splat);
5501 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5502 }
5503
5504 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5505 assert(Lane < (int)NumElts && "Unexpected lane!");
5506 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5507 V1, DAG.getConstant(Lane, DL, XLenVT),
5508 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5509 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5510 }
5511 }
5512
5513 // For exact VLEN m2 or greater, try to split to m1 operations if we
5514 // can split cleanly.
5515 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5516 return V;
5517
5518 ArrayRef<int> Mask = SVN->getMask();
5519
5520 if (SDValue V =
5521 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5522 return V;
5523
5524 if (SDValue V =
5525 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5526 return V;
5527
5528 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5529 // available.
5530 if (Subtarget.hasStdExtZvkb())
5531 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5532 return V;
5533
5534 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5535 // be undef which can be handled with a single SLIDEDOWN/UP.
5536 int LoSrc, HiSrc;
5537 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5538 if (Rotation > 0) {
5539 SDValue LoV, HiV;
5540 if (LoSrc >= 0) {
5541 LoV = LoSrc == 0 ? V1 : V2;
5542 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5543 }
5544 if (HiSrc >= 0) {
5545 HiV = HiSrc == 0 ? V1 : V2;
5546 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5547 }
5548
5549 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5550 // to slide LoV up by (NumElts - Rotation).
5551 unsigned InvRotate = NumElts - Rotation;
5552
5553 SDValue Res = DAG.getUNDEF(ContainerVT);
5554 if (HiV) {
5555 // Even though we could use a smaller VL, don't to avoid a vsetivli
5556 // toggle.
5557 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5558 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5559 }
5560 if (LoV)
5561 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5562 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5564
5565 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5566 }
5567
5568 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5569 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5570
5571 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5572 // use shift and truncate to perform the shuffle.
5573 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5574 // shift-and-trunc reducing total cost for everything except an mf8 result.
5575 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5576 // to do the entire operation.
5577 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5578 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5579 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5580 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5581 unsigned Index = 0;
5582 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5583 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5584 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5585 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5586 }
5587 }
5588 }
5589
5590 if (SDValue V =
5591 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5592 return V;
5593
5594 // Detect an interleave shuffle and lower to
5595 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5596 int EvenSrc, OddSrc;
5597 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5598 // Extract the halves of the vectors.
5599 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5600
5601 // Recognize if one half is actually undef; the matching above will
5602 // otherwise reuse the even stream for the undef one. This improves
5603 // spread(2) shuffles.
5604 bool LaneIsUndef[2] = { true, true};
5605 for (unsigned i = 0; i < Mask.size(); i++)
5606 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5607
5608 int Size = Mask.size();
5609 SDValue EvenV, OddV;
5610 if (LaneIsUndef[0]) {
5611 EvenV = DAG.getUNDEF(HalfVT);
5612 } else {
5613 assert(EvenSrc >= 0 && "Undef source?");
5614 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5615 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5616 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5617 }
5618
5619 if (LaneIsUndef[1]) {
5620 OddV = DAG.getUNDEF(HalfVT);
5621 } else {
5622 assert(OddSrc >= 0 && "Undef source?");
5623 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5624 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5625 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5626 }
5627
5628 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5629 }
5630
5631
5632 // Handle any remaining single source shuffles
5633 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5634 if (V2.isUndef()) {
5635 // We might be able to express the shuffle as a bitrotate. But even if we
5636 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5637 // shifts and a vor will have a higher throughput than a vrgather.
5638 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5639 return V;
5640
5641 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5642 // is fully covered in interleave(2) above, so it is ignored here.
5643 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5644 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5645 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5646 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5647 unsigned Index;
5648 if (isSpreadMask(Mask, Factor, Index)) {
5649 MVT NarrowVT =
5650 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5651 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5652 DAG.getVectorIdxConstant(0, DL));
5653 return getWideningSpread(Src, Factor, Index, DL, DAG);
5654 }
5655 }
5656 }
5657
5658 // If only a prefix of the source elements influence a prefix of the
5659 // destination elements, try to see if we can reduce the required LMUL
5660 unsigned MinVLen = Subtarget.getRealMinVLen();
5661 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
5662 if (NumElts > MinVLMAX) {
5663 unsigned MaxIdx = 0;
5664 for (auto [I, M] : enumerate(Mask)) {
5665 if (M == -1)
5666 continue;
5667 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
5668 }
5669 unsigned NewNumElts =
5670 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
5671 if (NewNumElts != NumElts) {
5672 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
5673 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
5674 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewVT, V1, ZeroIdx);
5675 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
5676 Mask.take_front(NewNumElts));
5677 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Res,
5678 ZeroIdx);
5679 }
5680 }
5681
5682 // Before hitting generic lowering fallbacks, try to widen the mask
5683 // to a wider SEW.
5684 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5685 return V;
5686
5687 // Can we generate a vcompress instead of a vrgather? These scale better
5688 // at high LMUL, at the cost of not being able to fold a following select
5689 // into them. The mask constants are also smaller than the index vector
5690 // constants, and thus easier to materialize.
5691 if (isCompressMask(Mask)) {
5692 SmallVector<SDValue> MaskVals(NumElts,
5693 DAG.getConstant(false, DL, XLenVT));
5694 for (auto Idx : Mask) {
5695 if (Idx == -1)
5696 break;
5697 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5698 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5699 }
5700 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5701 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5702 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5703 DAG.getUNDEF(VT));
5704 }
5705
5706 if (VT.getScalarSizeInBits() == 8 &&
5707 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5708 // On such a vector we're unable to use i8 as the index type.
5709 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5710 // may involve vector splitting if we're already at LMUL=8, or our
5711 // user-supplied maximum fixed-length LMUL.
5712 return SDValue();
5713 }
5714
5715 // Base case for the two operand recursion below - handle the worst case
5716 // single source shuffle.
5717 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5718 MVT IndexVT = VT.changeTypeToInteger();
5719 // Since we can't introduce illegal index types at this stage, use i16 and
5720 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5721 // than XLenVT.
5722 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5723 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5724 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5725 }
5726
5727 // If the mask allows, we can do all the index computation in 16 bits. This
5728 // requires less work and less register pressure at high LMUL, and creates
5729 // smaller constants which may be cheaper to materialize.
5730 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5731 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5732 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5733 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5734 }
5735
5736 MVT IndexContainerVT =
5737 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5738
5739 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5740 SmallVector<SDValue> GatherIndicesLHS;
5741 for (int MaskIndex : Mask) {
5742 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5743 GatherIndicesLHS.push_back(IsLHSIndex
5744 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5745 : DAG.getUNDEF(XLenVT));
5746 }
5747 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5748 LHSIndices =
5749 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
5750
5751 SDValue Gather;
5752 if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) {
5753 // If we have a locally repeating mask, then we can reuse the first
5754 // register in the index register group for all registers within the
5755 // source register group. TODO: This generalizes to m2, and m4.
5756 const MVT M1VT = getLMUL1VT(ContainerVT);
5757 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
5758 SDValue SubIndex =
5759 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
5760 DAG.getVectorIdxConstant(0, DL));
5761 auto [InnerTrueMask, InnerVL] =
5762 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
5763 int N = ContainerVT.getVectorMinNumElements() /
5765 assert(isPowerOf2_32(N) && N <= 8);
5766 Gather = DAG.getUNDEF(ContainerVT);
5767 for (int i = 0; i < N; i++) {
5768 SDValue SubIdx =
5770 SDValue SubV1 =
5771 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, V1, SubIdx);
5772 SDValue SubVec =
5773 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
5774 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
5775 Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
5776 SubVec, SubIdx);
5777 }
5778 } else if (NumElts > MinVLMAX && isLowSourceShuffle(Mask, MinVLMAX)) {
5779 // If we have a shuffle which only uses the first register in our
5780 // source register group, we can do a linear number of m1 vrgathers
5781 // reusing the same source register (but with different indices)
5782 // TODO: This can be generalized for m2 or m4, or for any shuffle
5783 // for which we can do a vslidedown followed by this expansion.
5784 const MVT M1VT = getLMUL1VT(ContainerVT);
5785 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
5786 auto [InnerTrueMask, InnerVL] =
5787 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
5788 int N = ContainerVT.getVectorMinNumElements() /
5790 assert(isPowerOf2_32(N) && N <= 8);
5791 Gather = DAG.getUNDEF(ContainerVT);
5792 SDValue SlideAmt =
5793 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
5794 SDValue SubV1 =
5795 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, V1,
5796 DAG.getVectorIdxConstant(0, DL));
5797 for (int i = 0; i < N; i++) {
5798 if (i != 0)
5799 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
5800 DAG.getUNDEF(IndexContainerVT), LHSIndices,
5801 SlideAmt, TrueMask, VL);
5802 SDValue SubIndex =
5803 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
5804 DAG.getVectorIdxConstant(0, DL));
5805 SDValue SubVec =
5806 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
5807 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
5808 SDValue SubIdx =
5810 Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
5811 SubVec, SubIdx);
5812 }
5813 } else {
5814 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5815 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5816 }
5817 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5818 }
5819
5820 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5821 // merged with a second vrgather.
5822 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5823
5824 // Now construct the mask that will be used by the blended vrgather operation.
5825 // Construct the appropriate indices into each vector.
5826 for (int MaskIndex : Mask) {
5827 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5828 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5829 ? MaskIndex : -1);
5830 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5831 }
5832
5833 // If the mask indices are disjoint between the two sources, we can lower it
5834 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5835 // operands may end up being lowered to something cheaper than a vrgather.vv.
5836 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5837 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
5838 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
5839 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5840 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5841 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5842 return V;
5843
5844 // Before hitting generic lowering fallbacks, try to widen the mask
5845 // to a wider SEW.
5846 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5847 return V;
5848
5849 // Try to pick a profitable operand order.
5850 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5851 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5852
5853 // Recursively invoke lowering for each operand if we had two
5854 // independent single source shuffles, and then combine the result via a
5855 // vselect. Note that the vselect will likely be folded back into the
5856 // second permute (vrgather, or other) by the post-isel combine.
5857 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5858 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5859
5860 SmallVector<SDValue> MaskVals;
5861 for (int MaskIndex : Mask) {
5862 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5863 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5864 }
5865
5866 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5867 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5868 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5869
5870 if (SwapOps)
5871 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5872 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5873}
5874
5876 // Only support legal VTs for other shuffles for now.
5877 if (!isTypeLegal(VT))
5878 return false;
5879
5880 // Support splats for any type. These should type legalize well.
5882 return true;
5883
5884 const unsigned NumElts = M.size();
5885 MVT SVT = VT.getSimpleVT();
5886
5887 // Not for i1 vectors.
5888 if (SVT.getScalarType() == MVT::i1)
5889 return false;
5890
5891 int Dummy1, Dummy2;
5892 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
5893 (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5894 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5895}
5896
5897// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5898// the exponent.
5899SDValue
5900RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5901 SelectionDAG &DAG) const {
5902 MVT VT = Op.getSimpleValueType();
5903 unsigned EltSize = VT.getScalarSizeInBits();
5904 SDValue Src = Op.getOperand(0);
5905 SDLoc DL(Op);
5906 MVT ContainerVT = VT;
5907
5908 SDValue Mask, VL;
5909 if (Op->isVPOpcode()) {
5910 Mask = Op.getOperand(1);
5911 if (VT.isFixedLengthVector())
5912 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5913 Subtarget);
5914 VL = Op.getOperand(2);
5915 }
5916
5917 // We choose FP type that can represent the value if possible. Otherwise, we
5918 // use rounding to zero conversion for correct exponent of the result.
5919 // TODO: Use f16 for i8 when possible?
5920 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5921 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5922 FloatEltVT = MVT::f32;
5923 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5924
5925 // Legal types should have been checked in the RISCVTargetLowering
5926 // constructor.
5927 // TODO: Splitting may make sense in some cases.
5928 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5929 "Expected legal float type!");
5930
5931 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5932 // The trailing zero count is equal to log2 of this single bit value.
5933 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5934 SDValue Neg = DAG.getNegative(Src, DL, VT);
5935 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5936 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5937 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5938 Src, Mask, VL);
5939 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5940 }
5941
5942 // We have a legal FP type, convert to it.
5943 SDValue FloatVal;
5944 if (FloatVT.bitsGT(VT)) {
5945 if (Op->isVPOpcode())
5946 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5947 else
5948 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5949 } else {
5950 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5951 if (VT.isFixedLengthVector()) {
5952 ContainerVT = getContainerForFixedLengthVector(VT);
5953 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5954 }
5955 if (!Op->isVPOpcode())
5956 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5957 SDValue RTZRM =
5959 MVT ContainerFloatVT =
5960 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5961 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5962 Src, Mask, RTZRM, VL);
5963 if (VT.isFixedLengthVector())
5964 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5965 }
5966 // Bitcast to integer and shift the exponent to the LSB.
5967 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5968 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5969 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5970
5971 SDValue Exp;
5972 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5973 if (Op->isVPOpcode()) {
5974 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5975 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5976 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5977 } else {
5978 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5979 DAG.getConstant(ShiftAmt, DL, IntVT));
5980 if (IntVT.bitsLT(VT))
5981 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5982 else if (IntVT.bitsGT(VT))
5983 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5984 }
5985
5986 // The exponent contains log2 of the value in biased form.
5987 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5988 // For trailing zeros, we just need to subtract the bias.
5989 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5990 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5991 DAG.getConstant(ExponentBias, DL, VT));
5992 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5993 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5994 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5995
5996 // For leading zeros, we need to remove the bias and convert from log2 to
5997 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5998 unsigned Adjust = ExponentBias + (EltSize - 1);
5999 SDValue Res;
6000 if (Op->isVPOpcode())
6001 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6002 Mask, VL);
6003 else
6004 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6005
6006 // The above result with zero input equals to Adjust which is greater than
6007 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6008 if (Op.getOpcode() == ISD::CTLZ)
6009 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6010 else if (Op.getOpcode() == ISD::VP_CTLZ)
6011 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6012 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6013 return Res;
6014}
6015
6016SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6017 SelectionDAG &DAG) const {
6018 SDLoc DL(Op);
6019 MVT XLenVT = Subtarget.getXLenVT();
6020 SDValue Source = Op->getOperand(0);
6021 MVT SrcVT = Source.getSimpleValueType();
6022 SDValue Mask = Op->getOperand(1);
6023 SDValue EVL = Op->getOperand(2);
6024
6025 if (SrcVT.isFixedLengthVector()) {
6026 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6027 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6028 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6029 Subtarget);
6030 SrcVT = ContainerVT;
6031 }
6032
6033 // Convert to boolean vector.
6034 if (SrcVT.getScalarType() != MVT::i1) {
6035 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6036 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6037 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6038 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6039 DAG.getUNDEF(SrcVT), Mask, EVL});
6040 }
6041
6042 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6043 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6044 // In this case, we can interpret poison as -1, so nothing to do further.
6045 return Res;
6046
6047 // Convert -1 to VL.
6048 SDValue SetCC =
6049 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6050 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6051 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6052}
6053
6054// While RVV has alignment restrictions, we should always be able to load as a
6055// legal equivalently-sized byte-typed vector instead. This method is
6056// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6057// the load is already correctly-aligned, it returns SDValue().
6058SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6059 SelectionDAG &DAG) const {
6060 auto *Load = cast<LoadSDNode>(Op);
6061 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6062
6064 Load->getMemoryVT(),
6065 *Load->getMemOperand()))
6066 return SDValue();
6067
6068 SDLoc DL(Op);
6069 MVT VT = Op.getSimpleValueType();
6070 unsigned EltSizeBits = VT.getScalarSizeInBits();
6071 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6072 "Unexpected unaligned RVV load type");
6073 MVT NewVT =
6074 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6075 assert(NewVT.isValid() &&
6076 "Expecting equally-sized RVV vector types to be legal");
6077 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6078 Load->getPointerInfo(), Load->getOriginalAlign(),
6079 Load->getMemOperand()->getFlags());
6080 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6081}
6082
6083// While RVV has alignment restrictions, we should always be able to store as a
6084// legal equivalently-sized byte-typed vector instead. This method is
6085// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6086// returns SDValue() if the store is already correctly aligned.
6087SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6088 SelectionDAG &DAG) const {
6089 auto *Store = cast<StoreSDNode>(Op);
6090 assert(Store && Store->getValue().getValueType().isVector() &&
6091 "Expected vector store");
6092
6094 Store->getMemoryVT(),
6095 *Store->getMemOperand()))
6096 return SDValue();
6097
6098 SDLoc DL(Op);
6099 SDValue StoredVal = Store->getValue();
6100 MVT VT = StoredVal.getSimpleValueType();
6101 unsigned EltSizeBits = VT.getScalarSizeInBits();
6102 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6103 "Unexpected unaligned RVV store type");
6104 MVT NewVT =
6105 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6106 assert(NewVT.isValid() &&
6107 "Expecting equally-sized RVV vector types to be legal");
6108 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6109 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6110 Store->getPointerInfo(), Store->getOriginalAlign(),
6111 Store->getMemOperand()->getFlags());
6112}
6113
6115 const RISCVSubtarget &Subtarget) {
6116 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6117
6118 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6119
6120 // All simm32 constants should be handled by isel.
6121 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6122 // this check redundant, but small immediates are common so this check
6123 // should have better compile time.
6124 if (isInt<32>(Imm))
6125 return Op;
6126
6127 // We only need to cost the immediate, if constant pool lowering is enabled.
6128 if (!Subtarget.useConstantPoolForLargeInts())
6129 return Op;
6130
6132 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6133 return Op;
6134
6135 // Optimizations below are disabled for opt size. If we're optimizing for
6136 // size, use a constant pool.
6137 if (DAG.shouldOptForSize())
6138 return SDValue();
6139
6140 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6141 // that if it will avoid a constant pool.
6142 // It will require an extra temporary register though.
6143 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6144 // low and high 32 bits are the same and bit 31 and 63 are set.
6145 unsigned ShiftAmt, AddOpc;
6146 RISCVMatInt::InstSeq SeqLo =
6147 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6148 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6149 return Op;
6150
6151 return SDValue();
6152}
6153
6154SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6155 SelectionDAG &DAG) const {
6156 MVT VT = Op.getSimpleValueType();
6157 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6158
6159 // Can this constant be selected by a Zfa FLI instruction?
6160 bool Negate = false;
6161 int Index = getLegalZfaFPImm(Imm, VT);
6162
6163 // If the constant is negative, try negating.
6164 if (Index < 0 && Imm.isNegative()) {
6165 Index = getLegalZfaFPImm(-Imm, VT);
6166 Negate = true;
6167 }
6168
6169 // If we couldn't find a FLI lowering, fall back to generic code.
6170 if (Index < 0)
6171 return SDValue();
6172
6173 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6174 SDLoc DL(Op);
6175 SDValue Const =
6176 DAG.getNode(RISCVISD::FLI, DL, VT,
6177 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6178 if (!Negate)
6179 return Const;
6180
6181 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6182}
6183
6185 const RISCVSubtarget &Subtarget) {
6186 SDLoc dl(Op);
6187 AtomicOrdering FenceOrdering =
6188 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6189 SyncScope::ID FenceSSID =
6190 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6191
6192 if (Subtarget.hasStdExtZtso()) {
6193 // The only fence that needs an instruction is a sequentially-consistent
6194 // cross-thread fence.
6195 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6196 FenceSSID == SyncScope::System)
6197 return Op;
6198
6199 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6200 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6201 }
6202
6203 // singlethread fences only synchronize with signal handlers on the same
6204 // thread and thus only need to preserve instruction order, not actually
6205 // enforce memory ordering.
6206 if (FenceSSID == SyncScope::SingleThread)
6207 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6208 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6209
6210 return Op;
6211}
6212
6213SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6214 SelectionDAG &DAG) const {
6215 SDLoc DL(Op);
6216 MVT VT = Op.getSimpleValueType();
6217 MVT XLenVT = Subtarget.getXLenVT();
6218 unsigned Check = Op.getConstantOperandVal(1);
6219 unsigned TDCMask = 0;
6220 if (Check & fcSNan)
6221 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6222 if (Check & fcQNan)
6223 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6224 if (Check & fcPosInf)
6226 if (Check & fcNegInf)
6228 if (Check & fcPosNormal)
6230 if (Check & fcNegNormal)
6232 if (Check & fcPosSubnormal)
6234 if (Check & fcNegSubnormal)
6236 if (Check & fcPosZero)
6237 TDCMask |= RISCV::FPMASK_Positive_Zero;
6238 if (Check & fcNegZero)
6239 TDCMask |= RISCV::FPMASK_Negative_Zero;
6240
6241 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6242
6243 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6244
6245 if (VT.isVector()) {
6246 SDValue Op0 = Op.getOperand(0);
6247 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6248
6249 if (VT.isScalableVector()) {
6251 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6252 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6253 Mask = Op.getOperand(2);
6254 VL = Op.getOperand(3);
6255 }
6256 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6257 VL, Op->getFlags());
6258 if (IsOneBitMask)
6259 return DAG.getSetCC(DL, VT, FPCLASS,
6260 DAG.getConstant(TDCMask, DL, DstVT),
6262 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6263 DAG.getConstant(TDCMask, DL, DstVT));
6264 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6265 ISD::SETNE);
6266 }
6267
6268 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6269 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6270 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6271 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6272 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6273 Mask = Op.getOperand(2);
6274 MVT MaskContainerVT =
6275 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6276 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6277 VL = Op.getOperand(3);
6278 }
6279 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6280
6281 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6282 Mask, VL, Op->getFlags());
6283
6284 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6285 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6286 if (IsOneBitMask) {
6287 SDValue VMSEQ =
6288 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6289 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6290 DAG.getUNDEF(ContainerVT), Mask, VL});
6291 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6292 }
6293 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6294 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6295
6296 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6297 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6298 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6299
6300 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6301 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6302 DAG.getUNDEF(ContainerVT), Mask, VL});
6303 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6304 }
6305
6306 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6307 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6308 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6310 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6311}
6312
6313// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6314// operations propagate nans.
6316 const RISCVSubtarget &Subtarget) {
6317 SDLoc DL(Op);
6318 MVT VT = Op.getSimpleValueType();
6319
6320 SDValue X = Op.getOperand(0);
6321 SDValue Y = Op.getOperand(1);
6322
6323 if (!VT.isVector()) {
6324 MVT XLenVT = Subtarget.getXLenVT();
6325
6326 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6327 // ensures that when one input is a nan, the other will also be a nan
6328 // allowing the nan to propagate. If both inputs are nan, this will swap the
6329 // inputs which is harmless.
6330
6331 SDValue NewY = Y;
6332 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6333 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6334 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6335 }
6336
6337 SDValue NewX = X;
6338 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6339 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6340 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6341 }
6342
6343 unsigned Opc =
6344 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6345 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6346 }
6347
6348 // Check no NaNs before converting to fixed vector scalable.
6349 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6350 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6351
6352 MVT ContainerVT = VT;
6353 if (VT.isFixedLengthVector()) {
6354 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6355 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6356 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6357 }
6358
6359 SDValue Mask, VL;
6360 if (Op->isVPOpcode()) {
6361 Mask = Op.getOperand(2);
6362 if (VT.isFixedLengthVector())
6363 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6364 Subtarget);
6365 VL = Op.getOperand(3);
6366 } else {
6367 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6368 }
6369
6370 SDValue NewY = Y;
6371 if (!XIsNeverNan) {
6372 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6373 {X, X, DAG.getCondCode(ISD::SETOEQ),
6374 DAG.getUNDEF(ContainerVT), Mask, VL});
6375 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6376 DAG.getUNDEF(ContainerVT), VL);
6377 }
6378
6379 SDValue NewX = X;
6380 if (!YIsNeverNan) {
6381 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6382 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6383 DAG.getUNDEF(ContainerVT), Mask, VL});
6384 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6385 DAG.getUNDEF(ContainerVT), VL);
6386 }
6387
6388 unsigned Opc =
6389 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6392 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6393 DAG.getUNDEF(ContainerVT), Mask, VL);
6394 if (VT.isFixedLengthVector())
6395 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6396 return Res;
6397}
6398
6400 const RISCVSubtarget &Subtarget) {
6401 bool IsFABS = Op.getOpcode() == ISD::FABS;
6402 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6403 "Wrong opcode for lowering FABS or FNEG.");
6404
6405 MVT XLenVT = Subtarget.getXLenVT();
6406 MVT VT = Op.getSimpleValueType();
6407 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6408
6409 SDLoc DL(Op);
6410 SDValue Fmv =
6411 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6412
6413 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6414 Mask = Mask.sext(Subtarget.getXLen());
6415
6416 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6417 SDValue Logic =
6418 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6419 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6420}
6421
6423 const RISCVSubtarget &Subtarget) {
6424 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6425
6426 MVT XLenVT = Subtarget.getXLenVT();
6427 MVT VT = Op.getSimpleValueType();
6428 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6429
6430 SDValue Mag = Op.getOperand(0);
6431 SDValue Sign = Op.getOperand(1);
6432
6433 SDLoc DL(Op);
6434
6435 // Get sign bit into an integer value.
6436 SDValue SignAsInt;
6437 unsigned SignSize = Sign.getValueSizeInBits();
6438 if (SignSize == Subtarget.getXLen()) {
6439 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6440 } else if (SignSize == 16) {
6441 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6442 } else if (SignSize == 32) {
6443 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6444 } else if (SignSize == 64) {
6445 assert(XLenVT == MVT::i32 && "Unexpected type");
6446 // Copy the upper word to integer.
6447 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6448 .getValue(1);
6449 SignSize = 32;
6450 } else
6451 llvm_unreachable("Unexpected sign size");
6452
6453 // Get the signbit at the right position for MagAsInt.
6454 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6455 if (ShiftAmount > 0) {
6456 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6457 DAG.getConstant(ShiftAmount, DL, XLenVT));
6458 } else if (ShiftAmount < 0) {
6459 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6460 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6461 }
6462
6463 // Mask the sign bit and any bits above it. The extra bits will be dropped
6464 // when we convert back to FP.
6465 SDValue SignMask = DAG.getConstant(
6466 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6467 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6468
6469 // Transform Mag value to integer, and clear the sign bit.
6470 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6471 SDValue ClearSignMask = DAG.getConstant(
6472 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6473 SDValue ClearedSign =
6474 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6475
6476 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6478
6479 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6480}
6481
6482/// Get a RISC-V target specified VL op for a given SDNode.
6483static unsigned getRISCVVLOp(SDValue Op) {
6484#define OP_CASE(NODE) \
6485 case ISD::NODE: \
6486 return RISCVISD::NODE##_VL;
6487#define VP_CASE(NODE) \
6488 case ISD::VP_##NODE: \
6489 return RISCVISD::NODE##_VL;
6490 // clang-format off
6491 switch (Op.getOpcode()) {
6492 default:
6493 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6494 OP_CASE(ADD)
6495 OP_CASE(SUB)
6496 OP_CASE(MUL)
6497 OP_CASE(MULHS)
6498 OP_CASE(MULHU)
6499 OP_CASE(SDIV)
6500 OP_CASE(SREM)
6501 OP_CASE(UDIV)
6502 OP_CASE(UREM)
6503 OP_CASE(SHL)
6504 OP_CASE(SRA)
6505 OP_CASE(SRL)
6506 OP_CASE(ROTL)
6507 OP_CASE(ROTR)
6508 OP_CASE(BSWAP)
6509 OP_CASE(CTTZ)
6510 OP_CASE(CTLZ)
6511 OP_CASE(CTPOP)
6512 OP_CASE(BITREVERSE)
6513 OP_CASE(SADDSAT)
6514 OP_CASE(UADDSAT)
6515 OP_CASE(SSUBSAT)
6516 OP_CASE(USUBSAT)
6517 OP_CASE(AVGFLOORS)
6518 OP_CASE(AVGFLOORU)
6519 OP_CASE(AVGCEILS)
6520 OP_CASE(AVGCEILU)
6521 OP_CASE(FADD)
6522 OP_CASE(FSUB)
6523 OP_CASE(FMUL)
6524 OP_CASE(FDIV)
6525 OP_CASE(FNEG)
6526 OP_CASE(FABS)
6527 OP_CASE(FSQRT)
6528 OP_CASE(SMIN)
6529 OP_CASE(SMAX)
6530 OP_CASE(UMIN)
6531 OP_CASE(UMAX)
6532 OP_CASE(STRICT_FADD)
6533 OP_CASE(STRICT_FSUB)
6534 OP_CASE(STRICT_FMUL)
6535 OP_CASE(STRICT_FDIV)
6536 OP_CASE(STRICT_FSQRT)
6537 VP_CASE(ADD) // VP_ADD
6538 VP_CASE(SUB) // VP_SUB
6539 VP_CASE(MUL) // VP_MUL
6540 VP_CASE(SDIV) // VP_SDIV
6541 VP_CASE(SREM) // VP_SREM
6542 VP_CASE(UDIV) // VP_UDIV
6543 VP_CASE(UREM) // VP_UREM
6544 VP_CASE(SHL) // VP_SHL
6545 VP_CASE(FADD) // VP_FADD
6546 VP_CASE(FSUB) // VP_FSUB
6547 VP_CASE(FMUL) // VP_FMUL
6548 VP_CASE(FDIV) // VP_FDIV
6549 VP_CASE(FNEG) // VP_FNEG
6550 VP_CASE(FABS) // VP_FABS
6551 VP_CASE(SMIN) // VP_SMIN
6552 VP_CASE(SMAX) // VP_SMAX
6553 VP_CASE(UMIN) // VP_UMIN
6554 VP_CASE(UMAX) // VP_UMAX
6555 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6556 VP_CASE(SETCC) // VP_SETCC
6557 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6558 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6559 VP_CASE(BITREVERSE) // VP_BITREVERSE
6560 VP_CASE(SADDSAT) // VP_SADDSAT
6561 VP_CASE(UADDSAT) // VP_UADDSAT
6562 VP_CASE(SSUBSAT) // VP_SSUBSAT
6563 VP_CASE(USUBSAT) // VP_USUBSAT
6564 VP_CASE(BSWAP) // VP_BSWAP
6565 VP_CASE(CTLZ) // VP_CTLZ
6566 VP_CASE(CTTZ) // VP_CTTZ
6567 VP_CASE(CTPOP) // VP_CTPOP
6569 case ISD::VP_CTLZ_ZERO_UNDEF:
6570 return RISCVISD::CTLZ_VL;
6572 case ISD::VP_CTTZ_ZERO_UNDEF:
6573 return RISCVISD::CTTZ_VL;
6574 case ISD::FMA:
6575 case ISD::VP_FMA:
6576 return RISCVISD::VFMADD_VL;
6577 case ISD::STRICT_FMA:
6579 case ISD::AND:
6580 case ISD::VP_AND:
6581 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6582 return RISCVISD::VMAND_VL;
6583 return RISCVISD::AND_VL;
6584 case ISD::OR:
6585 case ISD::VP_OR:
6586 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6587 return RISCVISD::VMOR_VL;
6588 return RISCVISD::OR_VL;
6589 case ISD::XOR:
6590 case ISD::VP_XOR:
6591 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6592 return RISCVISD::VMXOR_VL;
6593 return RISCVISD::XOR_VL;
6594 case ISD::VP_SELECT:
6595 case ISD::VP_MERGE:
6596 return RISCVISD::VMERGE_VL;
6597 case ISD::VP_SRA:
6598 return RISCVISD::SRA_VL;
6599 case ISD::VP_SRL:
6600 return RISCVISD::SRL_VL;
6601 case ISD::VP_SQRT:
6602 return RISCVISD::FSQRT_VL;
6603 case ISD::VP_SIGN_EXTEND:
6604 return RISCVISD::VSEXT_VL;
6605 case ISD::VP_ZERO_EXTEND:
6606 return RISCVISD::VZEXT_VL;
6607 case ISD::VP_FP_TO_SINT:
6609 case ISD::VP_FP_TO_UINT:
6611 case ISD::FMINNUM:
6612 case ISD::VP_FMINNUM:
6613 return RISCVISD::VFMIN_VL;
6614 case ISD::FMAXNUM:
6615 case ISD::VP_FMAXNUM:
6616 return RISCVISD::VFMAX_VL;
6617 case ISD::LRINT:
6618 case ISD::VP_LRINT:
6619 case ISD::LLRINT:
6620 case ISD::VP_LLRINT:
6622 }
6623 // clang-format on
6624#undef OP_CASE
6625#undef VP_CASE
6626}
6627
6628/// Return true if a RISC-V target specified op has a passthru operand.
6629static bool hasPassthruOp(unsigned Opcode) {
6630 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6632 "not a RISC-V target specific op");
6633 static_assert(
6636 "adding target specific op should update this function");
6637 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6638 return true;
6639 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6640 return true;
6641 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6642 return true;
6643 if (Opcode == RISCVISD::SETCC_VL)
6644 return true;
6645 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6646 return true;
6647 if (Opcode == RISCVISD::VMERGE_VL)
6648 return true;
6649 return false;
6650}
6651
6652/// Return true if a RISC-V target specified op has a mask operand.
6653static bool hasMaskOp(unsigned Opcode) {
6654 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6656 "not a RISC-V target specific op");
6657 static_assert(
6660 "adding target specific op should update this function");
6661 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6662 return true;
6663 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6664 return true;
6665 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6667 return true;
6668 return false;
6669}
6670
6672 const RISCVSubtarget &Subtarget) {
6673 if (Op.getValueType() == MVT::nxv32f16 &&
6674 (Subtarget.hasVInstructionsF16Minimal() &&
6675 !Subtarget.hasVInstructionsF16()))
6676 return true;
6677 if (Op.getValueType() == MVT::nxv32bf16)
6678 return true;
6679 return false;
6680}
6681
6683 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6684 SDLoc DL(Op);
6685
6688
6689 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6690 if (!Op.getOperand(j).getValueType().isVector()) {
6691 LoOperands[j] = Op.getOperand(j);
6692 HiOperands[j] = Op.getOperand(j);
6693 continue;
6694 }
6695 std::tie(LoOperands[j], HiOperands[j]) =
6696 DAG.SplitVector(Op.getOperand(j), DL);
6697 }
6698
6699 SDValue LoRes =
6700 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6701 SDValue HiRes =
6702 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6703
6704 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6705}
6706
6708 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6709 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6710 SDLoc DL(Op);
6711
6714
6715 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6716 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6717 std::tie(LoOperands[j], HiOperands[j]) =
6718 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6719 continue;
6720 }
6721 if (!Op.getOperand(j).getValueType().isVector()) {
6722 LoOperands[j] = Op.getOperand(j);
6723 HiOperands[j] = Op.getOperand(j);
6724 continue;
6725 }
6726 std::tie(LoOperands[j], HiOperands[j]) =
6727 DAG.SplitVector(Op.getOperand(j), DL);
6728 }
6729
6730 SDValue LoRes =
6731 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6732 SDValue HiRes =
6733 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6734
6735 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6736}
6737
6739 SDLoc DL(Op);
6740
6741 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6742 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6743 auto [EVLLo, EVLHi] =
6744 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6745
6746 SDValue ResLo =
6747 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6748 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6749 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6750 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6751}
6752
6754
6755 assert(Op->isStrictFPOpcode());
6756
6757 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6758
6759 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6760 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6761
6762 SDLoc DL(Op);
6763
6766
6767 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6768 if (!Op.getOperand(j).getValueType().isVector()) {
6769 LoOperands[j] = Op.getOperand(j);
6770 HiOperands[j] = Op.getOperand(j);
6771 continue;
6772 }
6773 std::tie(LoOperands[j], HiOperands[j]) =
6774 DAG.SplitVector(Op.getOperand(j), DL);
6775 }
6776
6777 SDValue LoRes =
6778 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6779 HiOperands[0] = LoRes.getValue(1);
6780 SDValue HiRes =
6781 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6782
6783 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6784 LoRes.getValue(0), HiRes.getValue(0));
6785 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6786}
6787
6789 SelectionDAG &DAG) const {
6790 switch (Op.getOpcode()) {
6791 default:
6792 report_fatal_error("unimplemented operand");
6793 case ISD::ATOMIC_FENCE:
6794 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6795 case ISD::GlobalAddress:
6796 return lowerGlobalAddress(Op, DAG);
6797 case ISD::BlockAddress:
6798 return lowerBlockAddress(Op, DAG);
6799 case ISD::ConstantPool:
6800 return lowerConstantPool(Op, DAG);
6801 case ISD::JumpTable:
6802 return lowerJumpTable(Op, DAG);
6804 return lowerGlobalTLSAddress(Op, DAG);
6805 case ISD::Constant:
6806 return lowerConstant(Op, DAG, Subtarget);
6807 case ISD::ConstantFP:
6808 return lowerConstantFP(Op, DAG);
6809 case ISD::SELECT:
6810 return lowerSELECT(Op, DAG);
6811 case ISD::BRCOND:
6812 return lowerBRCOND(Op, DAG);
6813 case ISD::VASTART:
6814 return lowerVASTART(Op, DAG);
6815 case ISD::FRAMEADDR:
6816 return lowerFRAMEADDR(Op, DAG);
6817 case ISD::RETURNADDR:
6818 return lowerRETURNADDR(Op, DAG);
6819 case ISD::SHL_PARTS:
6820 return lowerShiftLeftParts(Op, DAG);
6821 case ISD::SRA_PARTS:
6822 return lowerShiftRightParts(Op, DAG, true);
6823 case ISD::SRL_PARTS:
6824 return lowerShiftRightParts(Op, DAG, false);
6825 case ISD::ROTL:
6826 case ISD::ROTR:
6827 if (Op.getValueType().isFixedLengthVector()) {
6828 assert(Subtarget.hasStdExtZvkb());
6829 return lowerToScalableOp(Op, DAG);
6830 }
6831 assert(Subtarget.hasVendorXTHeadBb() &&
6832 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6833 "Unexpected custom legalization");
6834 // XTHeadBb only supports rotate by constant.
6835 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6836 return SDValue();
6837 return Op;
6838 case ISD::BITCAST: {
6839 SDLoc DL(Op);
6840 EVT VT = Op.getValueType();
6841 SDValue Op0 = Op.getOperand(0);
6842 EVT Op0VT = Op0.getValueType();
6843 MVT XLenVT = Subtarget.getXLenVT();
6844 if (Op0VT == MVT::i16 &&
6845 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6846 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6847 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6848 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6849 }
6850 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6851 Subtarget.hasStdExtFOrZfinx()) {
6852 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6853 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6854 }
6855 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6856 Subtarget.hasStdExtDOrZdinx()) {
6857 SDValue Lo, Hi;
6858 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6859 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6860 }
6861
6862 // Consider other scalar<->scalar casts as legal if the types are legal.
6863 // Otherwise expand them.
6864 if (!VT.isVector() && !Op0VT.isVector()) {
6865 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6866 return Op;
6867 return SDValue();
6868 }
6869
6870 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6871 "Unexpected types");
6872
6873 if (VT.isFixedLengthVector()) {
6874 // We can handle fixed length vector bitcasts with a simple replacement
6875 // in isel.
6876 if (Op0VT.isFixedLengthVector())
6877 return Op;
6878 // When bitcasting from scalar to fixed-length vector, insert the scalar
6879 // into a one-element vector of the result type, and perform a vector
6880 // bitcast.
6881 if (!Op0VT.isVector()) {
6882 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6883 if (!isTypeLegal(BVT))
6884 return SDValue();
6885 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6886 DAG.getUNDEF(BVT), Op0,
6887 DAG.getVectorIdxConstant(0, DL)));
6888 }
6889 return SDValue();
6890 }
6891 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6892 // thus: bitcast the vector to a one-element vector type whose element type
6893 // is the same as the result type, and extract the first element.
6894 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6895 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6896 if (!isTypeLegal(BVT))
6897 return SDValue();
6898 SDValue BVec = DAG.getBitcast(BVT, Op0);
6899 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6900 DAG.getVectorIdxConstant(0, DL));
6901 }
6902 return SDValue();
6903 }
6905 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6907 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6909 return LowerINTRINSIC_VOID(Op, DAG);
6910 case ISD::IS_FPCLASS:
6911 return LowerIS_FPCLASS(Op, DAG);
6912 case ISD::BITREVERSE: {
6913 MVT VT = Op.getSimpleValueType();
6914 if (VT.isFixedLengthVector()) {
6915 assert(Subtarget.hasStdExtZvbb());
6916 return lowerToScalableOp(Op, DAG);
6917 }
6918 SDLoc DL(Op);
6919 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6920 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6921 // Expand bitreverse to a bswap(rev8) followed by brev8.
6922 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6923 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6924 }
6925 case ISD::TRUNCATE:
6928 // Only custom-lower vector truncates
6929 if (!Op.getSimpleValueType().isVector())
6930 return Op;
6931 return lowerVectorTruncLike(Op, DAG);
6932 case ISD::ANY_EXTEND:
6933 case ISD::ZERO_EXTEND:
6934 if (Op.getOperand(0).getValueType().isVector() &&
6935 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6936 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6937 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6938 case ISD::SIGN_EXTEND:
6939 if (Op.getOperand(0).getValueType().isVector() &&
6940 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6941 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6942 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6944 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6946 return lowerINSERT_VECTOR_ELT(Op, DAG);
6948 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6949 case ISD::SCALAR_TO_VECTOR: {
6950 MVT VT = Op.getSimpleValueType();
6951 SDLoc DL(Op);
6952 SDValue Scalar = Op.getOperand(0);
6953 if (VT.getVectorElementType() == MVT::i1) {
6954 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6955 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6956 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6957 }
6958 MVT ContainerVT = VT;
6959 if (VT.isFixedLengthVector())
6960 ContainerVT = getContainerForFixedLengthVector(VT);
6961 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6962
6963 SDValue V;
6964 if (VT.isFloatingPoint()) {
6965 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6966 DAG.getUNDEF(ContainerVT), Scalar, VL);
6967 } else {
6968 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6969 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6970 DAG.getUNDEF(ContainerVT), Scalar, VL);
6971 }
6972 if (VT.isFixedLengthVector())
6973 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6974 return V;
6975 }
6976 case ISD::VSCALE: {
6977 MVT XLenVT = Subtarget.getXLenVT();
6978 MVT VT = Op.getSimpleValueType();
6979 SDLoc DL(Op);
6980 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6981 // We define our scalable vector types for lmul=1 to use a 64 bit known
6982 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6983 // vscale as VLENB / 8.
6984 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6985 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6986 report_fatal_error("Support for VLEN==32 is incomplete.");
6987 // We assume VLENB is a multiple of 8. We manually choose the best shift
6988 // here because SimplifyDemandedBits isn't always able to simplify it.
6989 uint64_t Val = Op.getConstantOperandVal(0);
6990 if (isPowerOf2_64(Val)) {
6991 uint64_t Log2 = Log2_64(Val);
6992 if (Log2 < 3)
6993 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6994 DAG.getConstant(3 - Log2, DL, VT));
6995 else if (Log2 > 3)
6996 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6997 DAG.getConstant(Log2 - 3, DL, XLenVT));
6998 } else if ((Val % 8) == 0) {
6999 // If the multiplier is a multiple of 8, scale it down to avoid needing
7000 // to shift the VLENB value.
7001 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7002 DAG.getConstant(Val / 8, DL, XLenVT));
7003 } else {
7004 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7005 DAG.getConstant(3, DL, XLenVT));
7006 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7007 DAG.getConstant(Val, DL, XLenVT));
7008 }
7009 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7010 }
7011 case ISD::FPOWI: {
7012 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7013 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7014 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7015 Op.getOperand(1).getValueType() == MVT::i32) {
7016 SDLoc DL(Op);
7017 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7018 SDValue Powi =
7019 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7020 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7021 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7022 }
7023 return SDValue();
7024 }
7025 case ISD::FMAXIMUM:
7026 case ISD::FMINIMUM:
7027 if (isPromotedOpNeedingSplit(Op, Subtarget))
7028 return SplitVectorOp(Op, DAG);
7029 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7030 case ISD::FP_EXTEND:
7031 case ISD::FP_ROUND:
7032 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7035 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7036 case ISD::SINT_TO_FP:
7037 case ISD::UINT_TO_FP:
7038 if (Op.getValueType().isVector() &&
7039 ((Op.getValueType().getScalarType() == MVT::f16 &&
7040 (Subtarget.hasVInstructionsF16Minimal() &&
7041 !Subtarget.hasVInstructionsF16())) ||
7042 Op.getValueType().getScalarType() == MVT::bf16)) {
7043 if (isPromotedOpNeedingSplit(Op, Subtarget))
7044 return SplitVectorOp(Op, DAG);
7045 // int -> f32
7046 SDLoc DL(Op);
7047 MVT NVT =
7048 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7049 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7050 // f32 -> [b]f16
7051 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7052 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7053 }
7054 [[fallthrough]];
7055 case ISD::FP_TO_SINT:
7056 case ISD::FP_TO_UINT:
7057 if (SDValue Op1 = Op.getOperand(0);
7058 Op1.getValueType().isVector() &&
7059 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7060 (Subtarget.hasVInstructionsF16Minimal() &&
7061 !Subtarget.hasVInstructionsF16())) ||
7062 Op1.getValueType().getScalarType() == MVT::bf16)) {
7063 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7064 return SplitVectorOp(Op, DAG);
7065 // [b]f16 -> f32
7066 SDLoc DL(Op);
7067 MVT NVT = MVT::getVectorVT(MVT::f32,
7068 Op1.getValueType().getVectorElementCount());
7069 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7070 // f32 -> int
7071 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7072 }
7073 [[fallthrough]];
7078 // RVV can only do fp<->int conversions to types half/double the size as
7079 // the source. We custom-lower any conversions that do two hops into
7080 // sequences.
7081 MVT VT = Op.getSimpleValueType();
7082 if (VT.isScalarInteger())
7083 return lowerFP_TO_INT(Op, DAG, Subtarget);
7084 bool IsStrict = Op->isStrictFPOpcode();
7085 SDValue Src = Op.getOperand(0 + IsStrict);
7086 MVT SrcVT = Src.getSimpleValueType();
7087 if (SrcVT.isScalarInteger())
7088 return lowerINT_TO_FP(Op, DAG, Subtarget);
7089 if (!VT.isVector())
7090 return Op;
7091 SDLoc DL(Op);
7092 MVT EltVT = VT.getVectorElementType();
7093 MVT SrcEltVT = SrcVT.getVectorElementType();
7094 unsigned EltSize = EltVT.getSizeInBits();
7095 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7096 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7097 "Unexpected vector element types");
7098
7099 bool IsInt2FP = SrcEltVT.isInteger();
7100 // Widening conversions
7101 if (EltSize > (2 * SrcEltSize)) {
7102 if (IsInt2FP) {
7103 // Do a regular integer sign/zero extension then convert to float.
7104 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7106 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7107 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7110 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7111 if (IsStrict)
7112 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7113 Op.getOperand(0), Ext);
7114 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7115 }
7116 // FP2Int
7117 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7118 // Do one doubling fp_extend then complete the operation by converting
7119 // to int.
7120 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7121 if (IsStrict) {
7122 auto [FExt, Chain] =
7123 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7124 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7125 }
7126 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7127 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7128 }
7129
7130 // Narrowing conversions
7131 if (SrcEltSize > (2 * EltSize)) {
7132 if (IsInt2FP) {
7133 // One narrowing int_to_fp, then an fp_round.
7134 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7135 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7136 if (IsStrict) {
7137 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7138 DAG.getVTList(InterimFVT, MVT::Other),
7139 Op.getOperand(0), Src);
7140 SDValue Chain = Int2FP.getValue(1);
7141 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7142 }
7143 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7144 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7145 }
7146 // FP2Int
7147 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7148 // representable by the integer, the result is poison.
7149 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7151 if (IsStrict) {
7152 SDValue FP2Int =
7153 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7154 Op.getOperand(0), Src);
7155 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7156 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7157 }
7158 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7159 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7160 }
7161
7162 // Scalable vectors can exit here. Patterns will handle equally-sized
7163 // conversions halving/doubling ones.
7164 if (!VT.isFixedLengthVector())
7165 return Op;
7166
7167 // For fixed-length vectors we lower to a custom "VL" node.
7168 unsigned RVVOpc = 0;
7169 switch (Op.getOpcode()) {
7170 default:
7171 llvm_unreachable("Impossible opcode");
7172 case ISD::FP_TO_SINT:
7174 break;
7175 case ISD::FP_TO_UINT:
7177 break;
7178 case ISD::SINT_TO_FP:
7179 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7180 break;
7181 case ISD::UINT_TO_FP:
7182 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7183 break;
7186 break;
7189 break;
7192 break;
7195 break;
7196 }
7197
7198 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7199 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7200 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7201 "Expected same element count");
7202
7203 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7204
7205 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7206 if (IsStrict) {
7207 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7208 Op.getOperand(0), Src, Mask, VL);
7209 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7210 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7211 }
7212 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7213 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7214 }
7217 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7218 case ISD::FP_TO_BF16: {
7219 // Custom lower to ensure the libcall return is passed in an FPR on hard
7220 // float ABIs.
7221 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7222 SDLoc DL(Op);
7223 MakeLibCallOptions CallOptions;
7224 RTLIB::Libcall LC =
7225 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7226 SDValue Res =
7227 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7228 if (Subtarget.is64Bit())
7229 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7230 return DAG.getBitcast(MVT::i32, Res);
7231 }
7232 case ISD::BF16_TO_FP: {
7233 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7234 MVT VT = Op.getSimpleValueType();
7235 SDLoc DL(Op);
7236 Op = DAG.getNode(
7237 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7238 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7239 SDValue Res = Subtarget.is64Bit()
7240 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7241 : DAG.getBitcast(MVT::f32, Op);
7242 // fp_extend if the target VT is bigger than f32.
7243 if (VT != MVT::f32)
7244 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7245 return Res;
7246 }
7248 case ISD::FP_TO_FP16: {
7249 // Custom lower to ensure the libcall return is passed in an FPR on hard
7250 // float ABIs.
7251 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7252 SDLoc DL(Op);
7253 MakeLibCallOptions CallOptions;
7254 bool IsStrict = Op->isStrictFPOpcode();
7255 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7256 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7257 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7258 SDValue Res;
7259 std::tie(Res, Chain) =
7260 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7261 if (Subtarget.is64Bit())
7262 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7263 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7264 if (IsStrict)
7265 return DAG.getMergeValues({Result, Chain}, DL);
7266 return Result;
7267 }
7269 case ISD::FP16_TO_FP: {
7270 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7271 // float ABIs.
7272 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7273 SDLoc DL(Op);
7274 MakeLibCallOptions CallOptions;
7275 bool IsStrict = Op->isStrictFPOpcode();
7276 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7277 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7278 SDValue Arg = Subtarget.is64Bit()
7279 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7280 : DAG.getBitcast(MVT::f32, Op0);
7281 SDValue Res;
7282 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7283 CallOptions, DL, Chain);
7284 if (IsStrict)
7285 return DAG.getMergeValues({Res, Chain}, DL);
7286 return Res;
7287 }
7288 case ISD::FTRUNC:
7289 case ISD::FCEIL:
7290 case ISD::FFLOOR:
7291 case ISD::FNEARBYINT:
7292 case ISD::FRINT:
7293 case ISD::FROUND:
7294 case ISD::FROUNDEVEN:
7295 if (isPromotedOpNeedingSplit(Op, Subtarget))
7296 return SplitVectorOp(Op, DAG);
7297 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7298 case ISD::LRINT:
7299 case ISD::LLRINT:
7300 if (Op.getValueType().isVector())
7301 return lowerVectorXRINT(Op, DAG, Subtarget);
7302 [[fallthrough]];
7303 case ISD::LROUND:
7304 case ISD::LLROUND: {
7305 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7306 "Unexpected custom legalisation");
7307 SDLoc DL(Op);
7308 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7309 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7310 }
7311 case ISD::STRICT_LRINT:
7312 case ISD::STRICT_LLRINT:
7313 case ISD::STRICT_LROUND:
7314 case ISD::STRICT_LLROUND: {
7315 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7316 "Unexpected custom legalisation");
7317 SDLoc DL(Op);
7318 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7319 {Op.getOperand(0), Op.getOperand(1)});
7320 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7321 {Ext.getValue(1), Ext.getValue(0)});
7322 }
7323 case ISD::VECREDUCE_ADD:
7328 return lowerVECREDUCE(Op, DAG);
7329 case ISD::VECREDUCE_AND:
7330 case ISD::VECREDUCE_OR:
7331 case ISD::VECREDUCE_XOR:
7332 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7333 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7334 return lowerVECREDUCE(Op, DAG);
7341 return lowerFPVECREDUCE(Op, DAG);
7342 case ISD::VP_REDUCE_ADD:
7343 case ISD::VP_REDUCE_UMAX:
7344 case ISD::VP_REDUCE_SMAX:
7345 case ISD::VP_REDUCE_UMIN:
7346 case ISD::VP_REDUCE_SMIN:
7347 case ISD::VP_REDUCE_FADD:
7348 case ISD::VP_REDUCE_SEQ_FADD:
7349 case ISD::VP_REDUCE_FMIN:
7350 case ISD::VP_REDUCE_FMAX:
7351 case ISD::VP_REDUCE_FMINIMUM:
7352 case ISD::VP_REDUCE_FMAXIMUM:
7353 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7354 return SplitVectorReductionOp(Op, DAG);
7355 return lowerVPREDUCE(Op, DAG);
7356 case ISD::VP_REDUCE_AND:
7357 case ISD::VP_REDUCE_OR:
7358 case ISD::VP_REDUCE_XOR:
7359 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7360 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7361 return lowerVPREDUCE(Op, DAG);
7362 case ISD::VP_CTTZ_ELTS:
7363 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7364 return lowerVPCttzElements(Op, DAG);
7365 case ISD::UNDEF: {
7366 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7367 return convertFromScalableVector(Op.getSimpleValueType(),
7368 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7369 }
7371 return lowerINSERT_SUBVECTOR(Op, DAG);
7373 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7375 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7377 return lowerVECTOR_INTERLEAVE(Op, DAG);
7378 case ISD::STEP_VECTOR:
7379 return lowerSTEP_VECTOR(Op, DAG);
7381 return lowerVECTOR_REVERSE(Op, DAG);
7382 case ISD::VECTOR_SPLICE:
7383 return lowerVECTOR_SPLICE(Op, DAG);
7384 case ISD::BUILD_VECTOR:
7385 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7386 case ISD::SPLAT_VECTOR: {
7387 MVT VT = Op.getSimpleValueType();
7388 MVT EltVT = VT.getVectorElementType();
7389 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7390 EltVT == MVT::bf16) {
7391 SDLoc DL(Op);
7392 SDValue Elt;
7393 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7394 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7395 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7396 Op.getOperand(0));
7397 else
7398 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7399 MVT IVT = VT.changeVectorElementType(MVT::i16);
7400 return DAG.getNode(ISD::BITCAST, DL, VT,
7401 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7402 }
7403
7404 if (EltVT == MVT::i1)
7405 return lowerVectorMaskSplat(Op, DAG);
7406 return SDValue();
7407 }
7409 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7410 case ISD::CONCAT_VECTORS: {
7411 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7412 // better than going through the stack, as the default expansion does.
7413 SDLoc DL(Op);
7414 MVT VT = Op.getSimpleValueType();
7415 MVT ContainerVT = VT;
7416 if (VT.isFixedLengthVector())
7417 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7418
7419 // Recursively split concat_vectors with more than 2 operands:
7420 //
7421 // concat_vector op1, op2, op3, op4
7422 // ->
7423 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7424 //
7425 // This reduces the length of the chain of vslideups and allows us to
7426 // perform the vslideups at a smaller LMUL, limited to MF2.
7427 if (Op.getNumOperands() > 2 &&
7428 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7429 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7431 size_t HalfNumOps = Op.getNumOperands() / 2;
7432 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7433 Op->ops().take_front(HalfNumOps));
7434 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7435 Op->ops().drop_front(HalfNumOps));
7436 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7437 }
7438
7439 unsigned NumOpElts =
7440 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7441 SDValue Vec = DAG.getUNDEF(VT);
7442 for (const auto &OpIdx : enumerate(Op->ops())) {
7443 SDValue SubVec = OpIdx.value();
7444 // Don't insert undef subvectors.
7445 if (SubVec.isUndef())
7446 continue;
7447 Vec =
7448 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7449 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7450 }
7451 return Vec;
7452 }
7453 case ISD::LOAD: {
7454 auto *Load = cast<LoadSDNode>(Op);
7455 EVT VecTy = Load->getMemoryVT();
7456 // Handle normal vector tuple load.
7457 if (VecTy.isRISCVVectorTuple()) {
7458 SDLoc DL(Op);
7459 MVT XLenVT = Subtarget.getXLenVT();
7460 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7461 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7462 unsigned NumElts = Sz / (NF * 8);
7463 int Log2LMUL = Log2_64(NumElts) - 3;
7464
7465 auto Flag = SDNodeFlags();
7466 Flag.setNoUnsignedWrap(true);
7467 SDValue Ret = DAG.getUNDEF(VecTy);
7468 SDValue BasePtr = Load->getBasePtr();
7469 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7470 VROffset =
7471 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7472 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7473 SmallVector<SDValue, 8> OutChains;
7474
7475 // Load NF vector registers and combine them to a vector tuple.
7476 for (unsigned i = 0; i < NF; ++i) {
7477 SDValue LoadVal = DAG.getLoad(
7478 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7479 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7480 OutChains.push_back(LoadVal.getValue(1));
7481 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7482 DAG.getVectorIdxConstant(i, DL));
7483 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7484 }
7485 return DAG.getMergeValues(
7486 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7487 }
7488
7489 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7490 return V;
7491 if (Op.getValueType().isFixedLengthVector())
7492 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7493 return Op;
7494 }
7495 case ISD::STORE: {
7496 auto *Store = cast<StoreSDNode>(Op);
7497 SDValue StoredVal = Store->getValue();
7498 EVT VecTy = StoredVal.getValueType();
7499 // Handle normal vector tuple store.
7500 if (VecTy.isRISCVVectorTuple()) {
7501 SDLoc DL(Op);
7502 MVT XLenVT = Subtarget.getXLenVT();
7503 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7504 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7505 unsigned NumElts = Sz / (NF * 8);
7506 int Log2LMUL = Log2_64(NumElts) - 3;
7507
7508 auto Flag = SDNodeFlags();
7509 Flag.setNoUnsignedWrap(true);
7510 SDValue Ret;
7511 SDValue Chain = Store->getChain();
7512 SDValue BasePtr = Store->getBasePtr();
7513 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7514 VROffset =
7515 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7516 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7517
7518 // Extract subregisters in a vector tuple and store them individually.
7519 for (unsigned i = 0; i < NF; ++i) {
7520 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7521 MVT::getScalableVectorVT(MVT::i8, NumElts),
7522 StoredVal, DAG.getVectorIdxConstant(i, DL));
7523 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7524 MachinePointerInfo(Store->getAddressSpace()),
7525 Store->getOriginalAlign(),
7526 Store->getMemOperand()->getFlags());
7527 Chain = Ret.getValue(0);
7528 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7529 }
7530 return Ret;
7531 }
7532
7533 if (auto V = expandUnalignedRVVStore(Op, DAG))
7534 return V;
7535 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7536 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7537 return Op;
7538 }
7539 case ISD::MLOAD:
7540 case ISD::VP_LOAD:
7541 return lowerMaskedLoad(Op, DAG);
7542 case ISD::MSTORE:
7543 case ISD::VP_STORE:
7544 return lowerMaskedStore(Op, DAG);
7546 return lowerVectorCompress(Op, DAG);
7547 case ISD::SELECT_CC: {
7548 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7549 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7550 // into separate SETCC+SELECT just like LegalizeDAG.
7551 SDValue Tmp1 = Op.getOperand(0);
7552 SDValue Tmp2 = Op.getOperand(1);
7553 SDValue True = Op.getOperand(2);
7554 SDValue False = Op.getOperand(3);
7555 EVT VT = Op.getValueType();
7556 SDValue CC = Op.getOperand(4);
7557 EVT CmpVT = Tmp1.getValueType();
7558 EVT CCVT =
7559 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7560 SDLoc DL(Op);
7561 SDValue Cond =
7562 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7563 return DAG.getSelect(DL, VT, Cond, True, False);
7564 }
7565 case ISD::SETCC: {
7566 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7567 if (OpVT.isScalarInteger()) {
7568 MVT VT = Op.getSimpleValueType();
7569 SDValue LHS = Op.getOperand(0);
7570 SDValue RHS = Op.getOperand(1);
7571 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7572 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7573 "Unexpected CondCode");
7574
7575 SDLoc DL(Op);
7576
7577 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7578 // convert this to the equivalent of (set(u)ge X, C+1) by using
7579 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7580 // in a register.
7581 if (isa<ConstantSDNode>(RHS)) {
7582 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7583 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7584 // If this is an unsigned compare and the constant is -1, incrementing
7585 // the constant would change behavior. The result should be false.
7586 if (CCVal == ISD::SETUGT && Imm == -1)
7587 return DAG.getConstant(0, DL, VT);
7588 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7589 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7590 SDValue SetCC = DAG.getSetCC(
7591 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7592 return DAG.getLogicalNOT(DL, SetCC, VT);
7593 }
7594 }
7595
7596 // Not a constant we could handle, swap the operands and condition code to
7597 // SETLT/SETULT.
7598 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7599 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7600 }
7601
7602 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7603 return SplitVectorOp(Op, DAG);
7604
7605 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7606 }
7607 case ISD::ADD:
7608 case ISD::SUB:
7609 case ISD::MUL:
7610 case ISD::MULHS:
7611 case ISD::MULHU:
7612 case ISD::AND:
7613 case ISD::OR:
7614 case ISD::XOR:
7615 case ISD::SDIV:
7616 case ISD::SREM:
7617 case ISD::UDIV:
7618 case ISD::UREM:
7619 case ISD::BSWAP:
7620 case ISD::CTPOP:
7621 return lowerToScalableOp(Op, DAG);
7622 case ISD::SHL:
7623 case ISD::SRA:
7624 case ISD::SRL:
7625 if (Op.getSimpleValueType().isFixedLengthVector())
7626 return lowerToScalableOp(Op, DAG);
7627 // This can be called for an i32 shift amount that needs to be promoted.
7628 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7629 "Unexpected custom legalisation");
7630 return SDValue();
7631 case ISD::FABS:
7632 case ISD::FNEG:
7633 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7634 return lowerFABSorFNEG(Op, DAG, Subtarget);
7635 [[fallthrough]];
7636 case ISD::FADD:
7637 case ISD::FSUB:
7638 case ISD::FMUL:
7639 case ISD::FDIV:
7640 case ISD::FSQRT:
7641 case ISD::FMA:
7642 case ISD::FMINNUM:
7643 case ISD::FMAXNUM:
7644 if (isPromotedOpNeedingSplit(Op, Subtarget))
7645 return SplitVectorOp(Op, DAG);
7646 [[fallthrough]];
7647 case ISD::AVGFLOORS:
7648 case ISD::AVGFLOORU:
7649 case ISD::AVGCEILS:
7650 case ISD::AVGCEILU:
7651 case ISD::SMIN:
7652 case ISD::SMAX:
7653 case ISD::UMIN:
7654 case ISD::UMAX:
7655 case ISD::UADDSAT:
7656 case ISD::USUBSAT:
7657 case ISD::SADDSAT:
7658 case ISD::SSUBSAT:
7659 return lowerToScalableOp(Op, DAG);
7660 case ISD::ABDS:
7661 case ISD::ABDU: {
7662 SDLoc dl(Op);
7663 EVT VT = Op->getValueType(0);
7664 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7665 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7666 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7667
7668 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7669 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7670 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7671 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7672 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7673 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7674 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7675 }
7676 case ISD::ABS:
7677 case ISD::VP_ABS:
7678 return lowerABS(Op, DAG);
7679 case ISD::CTLZ:
7681 case ISD::CTTZ:
7683 if (Subtarget.hasStdExtZvbb())
7684 return lowerToScalableOp(Op, DAG);
7685 assert(Op.getOpcode() != ISD::CTTZ);
7686 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7687 case ISD::VSELECT:
7688 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7689 case ISD::FCOPYSIGN:
7690 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7691 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7692 if (isPromotedOpNeedingSplit(Op, Subtarget))
7693 return SplitVectorOp(Op, DAG);
7694 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7695 case ISD::STRICT_FADD:
7696 case ISD::STRICT_FSUB:
7697 case ISD::STRICT_FMUL:
7698 case ISD::STRICT_FDIV:
7699 case ISD::STRICT_FSQRT:
7700 case ISD::STRICT_FMA:
7701 if (isPromotedOpNeedingSplit(Op, Subtarget))
7702 return SplitStrictFPVectorOp(Op, DAG);
7703 return lowerToScalableOp(Op, DAG);
7704 case ISD::STRICT_FSETCC:
7706 return lowerVectorStrictFSetcc(Op, DAG);
7707 case ISD::STRICT_FCEIL:
7708 case ISD::STRICT_FRINT:
7709 case ISD::STRICT_FFLOOR:
7710 case ISD::STRICT_FTRUNC:
7712 case ISD::STRICT_FROUND:
7714 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7715 case ISD::MGATHER:
7716 case ISD::VP_GATHER:
7717 return lowerMaskedGather(Op, DAG);
7718 case ISD::MSCATTER:
7719 case ISD::VP_SCATTER:
7720 return lowerMaskedScatter(Op, DAG);
7721 case ISD::GET_ROUNDING:
7722 return lowerGET_ROUNDING(Op, DAG);
7723 case ISD::SET_ROUNDING:
7724 return lowerSET_ROUNDING(Op, DAG);
7725 case ISD::EH_DWARF_CFA:
7726 return lowerEH_DWARF_CFA(Op, DAG);
7727 case ISD::VP_MERGE:
7728 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7729 return lowerVPMergeMask(Op, DAG);
7730 [[fallthrough]];
7731 case ISD::VP_SELECT:
7732 case ISD::VP_ADD:
7733 case ISD::VP_SUB:
7734 case ISD::VP_MUL:
7735 case ISD::VP_SDIV:
7736 case ISD::VP_UDIV:
7737 case ISD::VP_SREM:
7738 case ISD::VP_UREM:
7739 case ISD::VP_UADDSAT:
7740 case ISD::VP_USUBSAT:
7741 case ISD::VP_SADDSAT:
7742 case ISD::VP_SSUBSAT:
7743 case ISD::VP_LRINT:
7744 case ISD::VP_LLRINT:
7745 return lowerVPOp(Op, DAG);
7746 case ISD::VP_AND:
7747 case ISD::VP_OR:
7748 case ISD::VP_XOR:
7749 return lowerLogicVPOp(Op, DAG);
7750 case ISD::VP_FADD:
7751 case ISD::VP_FSUB:
7752 case ISD::VP_FMUL:
7753 case ISD::VP_FDIV:
7754 case ISD::VP_FNEG:
7755 case ISD::VP_FABS:
7756 case ISD::VP_SQRT:
7757 case ISD::VP_FMA:
7758 case ISD::VP_FMINNUM:
7759 case ISD::VP_FMAXNUM:
7760 case ISD::VP_FCOPYSIGN:
7761 if (isPromotedOpNeedingSplit(Op, Subtarget))
7762 return SplitVPOp(Op, DAG);
7763 [[fallthrough]];
7764 case ISD::VP_SRA:
7765 case ISD::VP_SRL:
7766 case ISD::VP_SHL:
7767 return lowerVPOp(Op, DAG);
7768 case ISD::VP_IS_FPCLASS:
7769 return LowerIS_FPCLASS(Op, DAG);
7770 case ISD::VP_SIGN_EXTEND:
7771 case ISD::VP_ZERO_EXTEND:
7772 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7773 return lowerVPExtMaskOp(Op, DAG);
7774 return lowerVPOp(Op, DAG);
7775 case ISD::VP_TRUNCATE:
7776 return lowerVectorTruncLike(Op, DAG);
7777 case ISD::VP_FP_EXTEND:
7778 case ISD::VP_FP_ROUND:
7779 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7780 case ISD::VP_SINT_TO_FP:
7781 case ISD::VP_UINT_TO_FP:
7782 if (Op.getValueType().isVector() &&
7783 ((Op.getValueType().getScalarType() == MVT::f16 &&
7784 (Subtarget.hasVInstructionsF16Minimal() &&
7785 !Subtarget.hasVInstructionsF16())) ||
7786 Op.getValueType().getScalarType() == MVT::bf16)) {
7787 if (isPromotedOpNeedingSplit(Op, Subtarget))
7788 return SplitVectorOp(Op, DAG);
7789 // int -> f32
7790 SDLoc DL(Op);
7791 MVT NVT =
7792 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7793 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7794 // f32 -> [b]f16
7795 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7796 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7797 }
7798 [[fallthrough]];
7799 case ISD::VP_FP_TO_SINT:
7800 case ISD::VP_FP_TO_UINT:
7801 if (SDValue Op1 = Op.getOperand(0);
7802 Op1.getValueType().isVector() &&
7803 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7804 (Subtarget.hasVInstructionsF16Minimal() &&
7805 !Subtarget.hasVInstructionsF16())) ||
7806 Op1.getValueType().getScalarType() == MVT::bf16)) {
7807 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7808 return SplitVectorOp(Op, DAG);
7809 // [b]f16 -> f32
7810 SDLoc DL(Op);
7811 MVT NVT = MVT::getVectorVT(MVT::f32,
7812 Op1.getValueType().getVectorElementCount());
7813 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7814 // f32 -> int
7815 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7816 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7817 }
7818 return lowerVPFPIntConvOp(Op, DAG);
7819 case ISD::VP_SETCC:
7820 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7821 return SplitVPOp(Op, DAG);
7822 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7823 return lowerVPSetCCMaskOp(Op, DAG);
7824 [[fallthrough]];
7825 case ISD::VP_SMIN:
7826 case ISD::VP_SMAX:
7827 case ISD::VP_UMIN:
7828 case ISD::VP_UMAX:
7829 case ISD::VP_BITREVERSE:
7830 case ISD::VP_BSWAP:
7831 return lowerVPOp(Op, DAG);
7832 case ISD::VP_CTLZ:
7833 case ISD::VP_CTLZ_ZERO_UNDEF:
7834 if (Subtarget.hasStdExtZvbb())
7835 return lowerVPOp(Op, DAG);
7836 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7837 case ISD::VP_CTTZ:
7838 case ISD::VP_CTTZ_ZERO_UNDEF:
7839 if (Subtarget.hasStdExtZvbb())
7840 return lowerVPOp(Op, DAG);
7841 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7842 case ISD::VP_CTPOP:
7843 return lowerVPOp(Op, DAG);
7844 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7845 return lowerVPStridedLoad(Op, DAG);
7846 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7847 return lowerVPStridedStore(Op, DAG);
7848 case ISD::VP_FCEIL:
7849 case ISD::VP_FFLOOR:
7850 case ISD::VP_FRINT:
7851 case ISD::VP_FNEARBYINT:
7852 case ISD::VP_FROUND:
7853 case ISD::VP_FROUNDEVEN:
7854 case ISD::VP_FROUNDTOZERO:
7855 if (isPromotedOpNeedingSplit(Op, Subtarget))
7856 return SplitVPOp(Op, DAG);
7857 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7858 case ISD::VP_FMAXIMUM:
7859 case ISD::VP_FMINIMUM:
7860 if (isPromotedOpNeedingSplit(Op, Subtarget))
7861 return SplitVPOp(Op, DAG);
7862 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7863 case ISD::EXPERIMENTAL_VP_SPLICE:
7864 return lowerVPSpliceExperimental(Op, DAG);
7865 case ISD::EXPERIMENTAL_VP_REVERSE:
7866 return lowerVPReverseExperimental(Op, DAG);
7867 case ISD::EXPERIMENTAL_VP_SPLAT:
7868 return lowerVPSplatExperimental(Op, DAG);
7869 case ISD::CLEAR_CACHE: {
7870 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7871 "llvm.clear_cache only needs custom lower on Linux targets");
7872 SDLoc DL(Op);
7873 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7874 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7875 Op.getOperand(2), Flags, DL);
7876 }
7878 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7880 return lowerINIT_TRAMPOLINE(Op, DAG);
7882 return lowerADJUST_TRAMPOLINE(Op, DAG);
7883 }
7884}
7885
7886SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7887 SDValue Start, SDValue End,
7888 SDValue Flags, SDLoc DL) const {
7889 MakeLibCallOptions CallOptions;
7890 std::pair<SDValue, SDValue> CallResult =
7891 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7892 {Start, End, Flags}, CallOptions, DL, InChain);
7893
7894 // This function returns void so only the out chain matters.
7895 return CallResult.second;
7896}
7897
7898SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7899 SelectionDAG &DAG) const {
7900 if (!Subtarget.is64Bit())
7901 llvm::report_fatal_error("Trampolines only implemented for RV64");
7902
7903 // Create an MCCodeEmitter to encode instructions.
7905 assert(TLO);
7906 MCContext &MCCtx = TLO->getContext();
7907
7908 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7909 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7910
7911 SDValue Root = Op.getOperand(0);
7912 SDValue Trmp = Op.getOperand(1); // trampoline
7913 SDLoc dl(Op);
7914
7915 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7916
7917 // We store in the trampoline buffer the following instructions and data.
7918 // Offset:
7919 // 0: auipc t2, 0
7920 // 4: ld t0, 24(t2)
7921 // 8: ld t2, 16(t2)
7922 // 12: jalr t0
7923 // 16: <StaticChainOffset>
7924 // 24: <FunctionAddressOffset>
7925 // 32:
7926
7927 constexpr unsigned StaticChainOffset = 16;
7928 constexpr unsigned FunctionAddressOffset = 24;
7929
7931 assert(STI);
7932 auto GetEncoding = [&](const MCInst &MC) {
7935 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7936 uint32_t Encoding = support::endian::read32le(CB.data());
7937 return Encoding;
7938 };
7939
7940 SDValue OutChains[6];
7941
7942 uint32_t Encodings[] = {
7943 // auipc t2, 0
7944 // Loads the current PC into t2.
7945 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7946 // ld t0, 24(t2)
7947 // Loads the function address into t0. Note that we are using offsets
7948 // pc-relative to the first instruction of the trampoline.
7949 GetEncoding(
7950 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7951 FunctionAddressOffset)),
7952 // ld t2, 16(t2)
7953 // Load the value of the static chain.
7954 GetEncoding(
7955 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7956 StaticChainOffset)),
7957 // jalr t0
7958 // Jump to the function.
7959 GetEncoding(MCInstBuilder(RISCV::JALR)
7960 .addReg(RISCV::X0)
7961 .addReg(RISCV::X5)
7962 .addImm(0))};
7963
7964 // Store encoded instructions.
7965 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7966 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7967 DAG.getConstant(Idx * 4, dl, MVT::i64))
7968 : Trmp;
7969 OutChains[Idx] = DAG.getTruncStore(
7970 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7971 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7972 }
7973
7974 // Now store the variable part of the trampoline.
7975 SDValue FunctionAddress = Op.getOperand(2);
7976 SDValue StaticChain = Op.getOperand(3);
7977
7978 // Store the given static chain and function pointer in the trampoline buffer.
7979 struct OffsetValuePair {
7980 const unsigned Offset;
7981 const SDValue Value;
7982 SDValue Addr = SDValue(); // Used to cache the address.
7983 } OffsetValues[] = {
7984 {StaticChainOffset, StaticChain},
7985 {FunctionAddressOffset, FunctionAddress},
7986 };
7987 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7988 SDValue Addr =
7989 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7990 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7991 OffsetValue.Addr = Addr;
7992 OutChains[Idx + 4] =
7993 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7994 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7995 }
7996
7997 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7998
7999 // The end of instructions of trampoline is the same as the static chain
8000 // address that we computed earlier.
8001 SDValue EndOfTrmp = OffsetValues[0].Addr;
8002
8003 // Call clear cache on the trampoline instructions.
8004 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8005 Trmp, EndOfTrmp);
8006
8007 return Chain;
8008}
8009
8010SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8011 SelectionDAG &DAG) const {
8012 if (!Subtarget.is64Bit())
8013 llvm::report_fatal_error("Trampolines only implemented for RV64");
8014
8015 return Op.getOperand(0);
8016}
8017
8019 SelectionDAG &DAG, unsigned Flags) {
8020 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8021}
8022
8024 SelectionDAG &DAG, unsigned Flags) {
8025 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8026 Flags);
8027}
8028
8030 SelectionDAG &DAG, unsigned Flags) {
8031 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8032 N->getOffset(), Flags);
8033}
8034
8036 SelectionDAG &DAG, unsigned Flags) {
8037 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8038}
8039
8041 EVT Ty, SelectionDAG &DAG) {
8043 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8044 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8045 return DAG.getLoad(
8046 Ty, DL, DAG.getEntryNode(), LC,
8048}
8049
8051 EVT Ty, SelectionDAG &DAG) {
8053 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8054 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8055 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8056 return DAG.getLoad(
8057 Ty, DL, DAG.getEntryNode(), LC,
8059}
8060
8061template <class NodeTy>
8062SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8063 bool IsLocal, bool IsExternWeak) const {
8064 SDLoc DL(N);
8065 EVT Ty = getPointerTy(DAG.getDataLayout());
8066
8067 // When HWASAN is used and tagging of global variables is enabled
8068 // they should be accessed via the GOT, since the tagged address of a global
8069 // is incompatible with existing code models. This also applies to non-pic
8070 // mode.
8071 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8072 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8073 if (IsLocal && !Subtarget.allowTaggedGlobals())
8074 // Use PC-relative addressing to access the symbol. This generates the
8075 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8076 // %pcrel_lo(auipc)).
8077 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8078
8079 // Use PC-relative addressing to access the GOT for this symbol, then load
8080 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8081 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8082 SDValue Load =
8083 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8089 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8090 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8091 return Load;
8092 }
8093
8094 switch (getTargetMachine().getCodeModel()) {
8095 default:
8096 report_fatal_error("Unsupported code model for lowering");
8097 case CodeModel::Small: {
8098 // Generate a sequence for accessing addresses within the first 2 GiB of
8099 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8100 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8101 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8102 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8103 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8104 }
8105 case CodeModel::Medium: {
8106 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8107 if (IsExternWeak) {
8108 // An extern weak symbol may be undefined, i.e. have value 0, which may
8109 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8110 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8111 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8112 SDValue Load =
8113 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8119 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8120 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8121 return Load;
8122 }
8123
8124 // Generate a sequence for accessing addresses within any 2GiB range within
8125 // the address space. This generates the pattern (PseudoLLA sym), which
8126 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8127 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8128 }
8129 case CodeModel::Large: {
8130 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8131 return getLargeGlobalAddress(G, DL, Ty, DAG);
8132
8133 // Using pc-relative mode for other node type.
8134 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8135 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8136 }
8137 }
8138}
8139
8140SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8141 SelectionDAG &DAG) const {
8142 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8143 assert(N->getOffset() == 0 && "unexpected offset in global node");
8144 const GlobalValue *GV = N->getGlobal();
8145 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8146}
8147
8148SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8149 SelectionDAG &DAG) const {
8150 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8151
8152 return getAddr(N, DAG);
8153}
8154
8155SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8156 SelectionDAG &DAG) const {
8157 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8158
8159 return getAddr(N, DAG);
8160}
8161
8162SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8163 SelectionDAG &DAG) const {
8164 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8165
8166 return getAddr(N, DAG);
8167}
8168
8169SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8170 SelectionDAG &DAG,
8171 bool UseGOT) const {
8172 SDLoc DL(N);
8173 EVT Ty = getPointerTy(DAG.getDataLayout());
8174 const GlobalValue *GV = N->getGlobal();
8175 MVT XLenVT = Subtarget.getXLenVT();
8176
8177 if (UseGOT) {
8178 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8179 // load the address from the GOT and add the thread pointer. This generates
8180 // the pattern (PseudoLA_TLS_IE sym), which expands to
8181 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8182 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8183 SDValue Load =
8184 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8190 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8191 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8192
8193 // Add the thread pointer.
8194 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8195 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8196 }
8197
8198 // Generate a sequence for accessing the address relative to the thread
8199 // pointer, with the appropriate adjustment for the thread pointer offset.
8200 // This generates the pattern
8201 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8202 SDValue AddrHi =
8204 SDValue AddrAdd =
8206 SDValue AddrLo =
8208
8209 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8210 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8211 SDValue MNAdd =
8212 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8213 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8214}
8215
8216SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8217 SelectionDAG &DAG) const {
8218 SDLoc DL(N);
8219 EVT Ty = getPointerTy(DAG.getDataLayout());
8220 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8221 const GlobalValue *GV = N->getGlobal();
8222
8223 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8224 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8225 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8226 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8227 SDValue Load =
8228 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8229
8230 // Prepare argument list to generate call.
8232 ArgListEntry Entry;
8233 Entry.Node = Load;
8234 Entry.Ty = CallTy;
8235 Args.push_back(Entry);
8236
8237 // Setup call to __tls_get_addr.
8239 CLI.setDebugLoc(DL)
8240 .setChain(DAG.getEntryNode())
8241 .setLibCallee(CallingConv::C, CallTy,
8242 DAG.getExternalSymbol("__tls_get_addr", Ty),
8243 std::move(Args));
8244
8245 return LowerCallTo(CLI).first;
8246}
8247
8248SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8249 SelectionDAG &DAG) const {
8250 SDLoc DL(N);
8251 EVT Ty = getPointerTy(DAG.getDataLayout());
8252 const GlobalValue *GV = N->getGlobal();
8253
8254 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8255 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8256 //
8257 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8258 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8259 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8260 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8261 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8262 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8263}
8264
8265SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8266 SelectionDAG &DAG) const {
8267 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8268 assert(N->getOffset() == 0 && "unexpected offset in global node");
8269
8270 if (DAG.getTarget().useEmulatedTLS())
8271 return LowerToTLSEmulatedModel(N, DAG);
8272
8274
8277 report_fatal_error("In GHC calling convention TLS is not supported");
8278
8279 SDValue Addr;
8280 switch (Model) {
8282 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8283 break;
8285 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8286 break;
8289 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8290 : getDynamicTLSAddr(N, DAG);
8291 break;
8292 }
8293
8294 return Addr;
8295}
8296
8297// Return true if Val is equal to (setcc LHS, RHS, CC).
8298// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8299// Otherwise, return std::nullopt.
8300static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8301 ISD::CondCode CC, SDValue Val) {
8302 assert(Val->getOpcode() == ISD::SETCC);
8303 SDValue LHS2 = Val.getOperand(0);
8304 SDValue RHS2 = Val.getOperand(1);
8305 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8306
8307 if (LHS == LHS2 && RHS == RHS2) {
8308 if (CC == CC2)
8309 return true;
8310 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8311 return false;
8312 } else if (LHS == RHS2 && RHS == LHS2) {
8314 if (CC == CC2)
8315 return true;
8316 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8317 return false;
8318 }
8319
8320 return std::nullopt;
8321}
8322
8324 const RISCVSubtarget &Subtarget) {
8325 SDValue CondV = N->getOperand(0);
8326 SDValue TrueV = N->getOperand(1);
8327 SDValue FalseV = N->getOperand(2);
8328 MVT VT = N->getSimpleValueType(0);
8329 SDLoc DL(N);
8330
8331 if (!Subtarget.hasConditionalMoveFusion()) {
8332 // (select c, -1, y) -> -c | y
8333 if (isAllOnesConstant(TrueV)) {
8334 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8335 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8336 }
8337 // (select c, y, -1) -> (c-1) | y
8338 if (isAllOnesConstant(FalseV)) {
8339 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8340 DAG.getAllOnesConstant(DL, VT));
8341 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8342 }
8343
8344 // (select c, 0, y) -> (c-1) & y
8345 if (isNullConstant(TrueV)) {
8346 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8347 DAG.getAllOnesConstant(DL, VT));
8348 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8349 }
8350 // (select c, y, 0) -> -c & y
8351 if (isNullConstant(FalseV)) {
8352 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8353 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8354 }
8355 }
8356
8357 // select c, ~x, x --> xor -c, x
8358 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8359 const APInt &TrueVal = TrueV->getAsAPIntVal();
8360 const APInt &FalseVal = FalseV->getAsAPIntVal();
8361 if (~TrueVal == FalseVal) {
8362 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8363 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8364 }
8365 }
8366
8367 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8368 // when both truev and falsev are also setcc.
8369 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8370 FalseV.getOpcode() == ISD::SETCC) {
8371 SDValue LHS = CondV.getOperand(0);
8372 SDValue RHS = CondV.getOperand(1);
8373 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8374
8375 // (select x, x, y) -> x | y
8376 // (select !x, x, y) -> x & y
8377 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8378 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8379 DAG.getFreeze(FalseV));
8380 }
8381 // (select x, y, x) -> x & y
8382 // (select !x, y, x) -> x | y
8383 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8384 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8385 DAG.getFreeze(TrueV), FalseV);
8386 }
8387 }
8388
8389 return SDValue();
8390}
8391
8392// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8393// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8394// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8395// being `0` or `-1`. In such cases we can replace `select` with `and`.
8396// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8397// than `c0`?
8398static SDValue
8400 const RISCVSubtarget &Subtarget) {
8401 if (Subtarget.hasShortForwardBranchOpt())
8402 return SDValue();
8403
8404 unsigned SelOpNo = 0;
8405 SDValue Sel = BO->getOperand(0);
8406 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8407 SelOpNo = 1;
8408 Sel = BO->getOperand(1);
8409 }
8410
8411 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8412 return SDValue();
8413
8414 unsigned ConstSelOpNo = 1;
8415 unsigned OtherSelOpNo = 2;
8416 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8417 ConstSelOpNo = 2;
8418 OtherSelOpNo = 1;
8419 }
8420 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8421 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8422 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8423 return SDValue();
8424
8425 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8426 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8427 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8428 return SDValue();
8429
8430 SDLoc DL(Sel);
8431 EVT VT = BO->getValueType(0);
8432
8433 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8434 if (SelOpNo == 1)
8435 std::swap(NewConstOps[0], NewConstOps[1]);
8436
8437 SDValue NewConstOp =
8438 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8439 if (!NewConstOp)
8440 return SDValue();
8441
8442 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8443 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8444 return SDValue();
8445
8446 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8447 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8448 if (SelOpNo == 1)
8449 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8450 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8451
8452 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8453 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8454 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8455}
8456
8457SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8458 SDValue CondV = Op.getOperand(0);
8459 SDValue TrueV = Op.getOperand(1);
8460 SDValue FalseV = Op.getOperand(2);
8461 SDLoc DL(Op);
8462 MVT VT = Op.getSimpleValueType();
8463 MVT XLenVT = Subtarget.getXLenVT();
8464
8465 // Lower vector SELECTs to VSELECTs by splatting the condition.
8466 if (VT.isVector()) {
8467 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8468 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8469 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8470 }
8471
8472 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8473 // nodes to implement the SELECT. Performing the lowering here allows for
8474 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8475 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8476 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8477 VT.isScalarInteger()) {
8478 // (select c, t, 0) -> (czero_eqz t, c)
8479 if (isNullConstant(FalseV))
8480 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8481 // (select c, 0, f) -> (czero_nez f, c)
8482 if (isNullConstant(TrueV))
8483 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8484
8485 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8486 if (TrueV.getOpcode() == ISD::AND &&
8487 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8488 return DAG.getNode(
8489 ISD::OR, DL, VT, TrueV,
8490 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8491 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8492 if (FalseV.getOpcode() == ISD::AND &&
8493 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8494 return DAG.getNode(
8495 ISD::OR, DL, VT, FalseV,
8496 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8497
8498 // Try some other optimizations before falling back to generic lowering.
8499 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8500 return V;
8501
8502 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8503 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8504 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8505 const APInt &TrueVal = TrueV->getAsAPIntVal();
8506 const APInt &FalseVal = FalseV->getAsAPIntVal();
8507
8508 // Prefer these over Zicond to avoid materializing an immediate:
8509 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
8510 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
8511 if (CondV.getOpcode() == ISD::SETCC &&
8512 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
8513 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8514 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
8515 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
8516 int64_t TrueImm = TrueVal.getSExtValue();
8517 int64_t FalseImm = FalseVal.getSExtValue();
8518 if (CCVal == ISD::SETGT)
8519 std::swap(TrueImm, FalseImm);
8520 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
8521 isInt<12>(TrueImm - FalseImm)) {
8522 SDValue SRA =
8523 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
8524 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
8525 SDValue AND =
8526 DAG.getNode(ISD::AND, DL, VT, SRA,
8527 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
8528 return DAG.getNode(ISD::ADD, DL, VT, AND,
8529 DAG.getSignedConstant(FalseImm, DL, VT));
8530 }
8531 }
8532 }
8533
8534 const int TrueValCost = RISCVMatInt::getIntMatCost(
8535 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8536 const int FalseValCost = RISCVMatInt::getIntMatCost(
8537 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8538 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8539 SDValue LHSVal = DAG.getConstant(
8540 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8541 SDValue RHSVal =
8542 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8543 SDValue CMOV =
8545 DL, VT, LHSVal, CondV);
8546 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8547 }
8548
8549 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8550 // Unless we have the short forward branch optimization.
8551 if (!Subtarget.hasConditionalMoveFusion())
8552 return DAG.getNode(
8553 ISD::OR, DL, VT,
8554 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8555 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8556 }
8557
8558 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8559 return V;
8560
8561 if (Op.hasOneUse()) {
8562 unsigned UseOpc = Op->user_begin()->getOpcode();
8563 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8564 SDNode *BinOp = *Op->user_begin();
8565 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8566 DAG, Subtarget)) {
8567 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8568 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8569 // may return a constant node and cause crash in lowerSELECT.
8570 if (NewSel.getOpcode() == ISD::SELECT)
8571 return lowerSELECT(NewSel, DAG);
8572 return NewSel;
8573 }
8574 }
8575 }
8576
8577 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8578 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8579 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8580 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8581 if (FPTV && FPFV) {
8582 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8583 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8584 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8585 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8586 DAG.getConstant(1, DL, XLenVT));
8587 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8588 }
8589 }
8590
8591 // If the condition is not an integer SETCC which operates on XLenVT, we need
8592 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8593 // (select condv, truev, falsev)
8594 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8595 if (CondV.getOpcode() != ISD::SETCC ||
8596 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8597 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8598 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8599
8600 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8601
8602 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8603 }
8604
8605 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8606 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8607 // advantage of the integer compare+branch instructions. i.e.:
8608 // (select (setcc lhs, rhs, cc), truev, falsev)
8609 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8610 SDValue LHS = CondV.getOperand(0);
8611 SDValue RHS = CondV.getOperand(1);
8612 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8613
8614 // Special case for a select of 2 constants that have a difference of 1.
8615 // Normally this is done by DAGCombine, but if the select is introduced by
8616 // type legalization or op legalization, we miss it. Restricting to SETLT
8617 // case for now because that is what signed saturating add/sub need.
8618 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8619 // but we would probably want to swap the true/false values if the condition
8620 // is SETGE/SETLE to avoid an XORI.
8621 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8622 CCVal == ISD::SETLT) {
8623 const APInt &TrueVal = TrueV->getAsAPIntVal();
8624 const APInt &FalseVal = FalseV->getAsAPIntVal();
8625 if (TrueVal - 1 == FalseVal)
8626 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8627 if (TrueVal + 1 == FalseVal)
8628 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8629 }
8630
8631 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8632 // 1 < x ? x : 1 -> 0 < x ? x : 1
8633 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8634 RHS == TrueV && LHS == FalseV) {
8635 LHS = DAG.getConstant(0, DL, VT);
8636 // 0 <u x is the same as x != 0.
8637 if (CCVal == ISD::SETULT) {
8638 std::swap(LHS, RHS);
8639 CCVal = ISD::SETNE;
8640 }
8641 }
8642
8643 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8644 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8645 RHS == FalseV) {
8646 RHS = DAG.getConstant(0, DL, VT);
8647 }
8648
8649 SDValue TargetCC = DAG.getCondCode(CCVal);
8650
8651 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8652 // (select (setcc lhs, rhs, CC), constant, falsev)
8653 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8654 std::swap(TrueV, FalseV);
8655 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8656 }
8657
8658 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8659 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8660}
8661
8662SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8663 SDValue CondV = Op.getOperand(1);
8664 SDLoc DL(Op);
8665 MVT XLenVT = Subtarget.getXLenVT();
8666
8667 if (CondV.getOpcode() == ISD::SETCC &&
8668 CondV.getOperand(0).getValueType() == XLenVT) {
8669 SDValue LHS = CondV.getOperand(0);
8670 SDValue RHS = CondV.getOperand(1);
8671 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8672
8673 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8674
8675 SDValue TargetCC = DAG.getCondCode(CCVal);
8676 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8677 LHS, RHS, TargetCC, Op.getOperand(2));
8678 }
8679
8680 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8681 CondV, DAG.getConstant(0, DL, XLenVT),
8682 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8683}
8684
8685SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8688
8689 SDLoc DL(Op);
8690 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8692
8693 // vastart just stores the address of the VarArgsFrameIndex slot into the
8694 // memory location argument.
8695 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8696 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8697 MachinePointerInfo(SV));
8698}
8699
8700SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8701 SelectionDAG &DAG) const {
8702 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8704 MachineFrameInfo &MFI = MF.getFrameInfo();
8705 MFI.setFrameAddressIsTaken(true);
8706 Register FrameReg = RI.getFrameRegister(MF);
8707 int XLenInBytes = Subtarget.getXLen() / 8;
8708
8709 EVT VT = Op.getValueType();
8710 SDLoc DL(Op);
8711 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8712 unsigned Depth = Op.getConstantOperandVal(0);
8713 while (Depth--) {
8714 int Offset = -(XLenInBytes * 2);
8715 SDValue Ptr = DAG.getNode(
8716 ISD::ADD, DL, VT, FrameAddr,
8718 FrameAddr =
8719 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8720 }
8721 return FrameAddr;
8722}
8723
8724SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8725 SelectionDAG &DAG) const {
8726 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8728 MachineFrameInfo &MFI = MF.getFrameInfo();
8729 MFI.setReturnAddressIsTaken(true);
8730 MVT XLenVT = Subtarget.getXLenVT();
8731 int XLenInBytes = Subtarget.getXLen() / 8;
8732
8734 return SDValue();
8735
8736 EVT VT = Op.getValueType();
8737 SDLoc DL(Op);
8738 unsigned Depth = Op.getConstantOperandVal(0);
8739 if (Depth) {
8740 int Off = -XLenInBytes;
8741 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8742 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8743 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8744 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8746 }
8747
8748 // Return the value of the return address register, marking it an implicit
8749 // live-in.
8750 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8751 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8752}
8753
8754SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8755 SelectionDAG &DAG) const {
8756 SDLoc DL(Op);
8757 SDValue Lo = Op.getOperand(0);
8758 SDValue Hi = Op.getOperand(1);
8759 SDValue Shamt = Op.getOperand(2);
8760 EVT VT = Lo.getValueType();
8761
8762 // if Shamt-XLEN < 0: // Shamt < XLEN
8763 // Lo = Lo << Shamt
8764 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8765 // else:
8766 // Lo = 0
8767 // Hi = Lo << (Shamt-XLEN)
8768
8769 SDValue Zero = DAG.getConstant(0, DL, VT);
8770 SDValue One = DAG.getConstant(1, DL, VT);
8771 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8772 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8773 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8774 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8775
8776 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8777 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8778 SDValue ShiftRightLo =
8779 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8780 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8781 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8782 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8783
8784 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8785
8786 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8787 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8788
8789 SDValue Parts[2] = {Lo, Hi};
8790 return DAG.getMergeValues(Parts, DL);
8791}
8792
8793SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8794 bool IsSRA) const {
8795 SDLoc DL(Op);
8796 SDValue Lo = Op.getOperand(0);
8797 SDValue Hi = Op.getOperand(1);
8798 SDValue Shamt = Op.getOperand(2);
8799 EVT VT = Lo.getValueType();
8800
8801 // SRA expansion:
8802 // if Shamt-XLEN < 0: // Shamt < XLEN
8803 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8804 // Hi = Hi >>s Shamt
8805 // else:
8806 // Lo = Hi >>s (Shamt-XLEN);
8807 // Hi = Hi >>s (XLEN-1)
8808 //
8809 // SRL expansion:
8810 // if Shamt-XLEN < 0: // Shamt < XLEN
8811 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8812 // Hi = Hi >>u Shamt
8813 // else:
8814 // Lo = Hi >>u (Shamt-XLEN);
8815 // Hi = 0;
8816
8817 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8818
8819 SDValue Zero = DAG.getConstant(0, DL, VT);
8820 SDValue One = DAG.getConstant(1, DL, VT);
8821 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8822 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8823 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8824 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8825
8826 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8827 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8828 SDValue ShiftLeftHi =
8829 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8830 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8831 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8832 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8833 SDValue HiFalse =
8834 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8835
8836 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8837
8838 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8839 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8840
8841 SDValue Parts[2] = {Lo, Hi};
8842 return DAG.getMergeValues(Parts, DL);
8843}
8844
8845// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8846// legal equivalently-sized i8 type, so we can use that as a go-between.
8847SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8848 SelectionDAG &DAG) const {
8849 SDLoc DL(Op);
8850 MVT VT = Op.getSimpleValueType();
8851 SDValue SplatVal = Op.getOperand(0);
8852 // All-zeros or all-ones splats are handled specially.
8853 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8854 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8855 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8856 }
8857 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8858 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8859 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8860 }
8861 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8862 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8863 DAG.getConstant(1, DL, SplatVal.getValueType()));
8864 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8865 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8866 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8867}
8868
8869// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8870// illegal (currently only vXi64 RV32).
8871// FIXME: We could also catch non-constant sign-extended i32 values and lower
8872// them to VMV_V_X_VL.
8873SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8874 SelectionDAG &DAG) const {
8875 SDLoc DL(Op);
8876 MVT VecVT = Op.getSimpleValueType();
8877 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8878 "Unexpected SPLAT_VECTOR_PARTS lowering");
8879
8880 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8881 SDValue Lo = Op.getOperand(0);
8882 SDValue Hi = Op.getOperand(1);
8883
8884 MVT ContainerVT = VecVT;
8885 if (VecVT.isFixedLengthVector())
8886 ContainerVT = getContainerForFixedLengthVector(VecVT);
8887
8888 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8889
8890 SDValue Res =
8891 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8892
8893 if (VecVT.isFixedLengthVector())
8894 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8895
8896 return Res;
8897}
8898
8899// Custom-lower extensions from mask vectors by using a vselect either with 1
8900// for zero/any-extension or -1 for sign-extension:
8901// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8902// Note that any-extension is lowered identically to zero-extension.
8903SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8904 int64_t ExtTrueVal) const {
8905 SDLoc DL(Op);
8906 MVT VecVT = Op.getSimpleValueType();
8907 SDValue Src = Op.getOperand(0);
8908 // Only custom-lower extensions from mask types
8909 assert(Src.getValueType().isVector() &&
8910 Src.getValueType().getVectorElementType() == MVT::i1);
8911
8912 if (VecVT.isScalableVector()) {
8913 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8914 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8915 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8916 }
8917
8918 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8919 MVT I1ContainerVT =
8920 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8921
8922 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8923
8924 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8925
8926 MVT XLenVT = Subtarget.getXLenVT();
8927 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8928 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8929
8930 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8931 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8932 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8933 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8934 SDValue Select =
8935 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8936 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8937
8938 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8939}
8940
8941SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8942 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8943 MVT ExtVT = Op.getSimpleValueType();
8944 // Only custom-lower extensions from fixed-length vector types.
8945 if (!ExtVT.isFixedLengthVector())
8946 return Op;
8947 MVT VT = Op.getOperand(0).getSimpleValueType();
8948 // Grab the canonical container type for the extended type. Infer the smaller
8949 // type from that to ensure the same number of vector elements, as we know
8950 // the LMUL will be sufficient to hold the smaller type.
8951 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8952 // Get the extended container type manually to ensure the same number of
8953 // vector elements between source and dest.
8954 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8955 ContainerExtVT.getVectorElementCount());
8956
8957 SDValue Op1 =
8958 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8959
8960 SDLoc DL(Op);
8961 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8962
8963 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8964
8965 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8966}
8967
8968// Custom-lower truncations from vectors to mask vectors by using a mask and a
8969// setcc operation:
8970// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8971SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8972 SelectionDAG &DAG) const {
8973 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8974 SDLoc DL(Op);
8975 EVT MaskVT = Op.getValueType();
8976 // Only expect to custom-lower truncations to mask types
8977 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8978 "Unexpected type for vector mask lowering");
8979 SDValue Src = Op.getOperand(0);
8980 MVT VecVT = Src.getSimpleValueType();
8981 SDValue Mask, VL;
8982 if (IsVPTrunc) {
8983 Mask = Op.getOperand(1);
8984 VL = Op.getOperand(2);
8985 }
8986 // If this is a fixed vector, we need to convert it to a scalable vector.
8987 MVT ContainerVT = VecVT;
8988
8989 if (VecVT.isFixedLengthVector()) {
8990 ContainerVT = getContainerForFixedLengthVector(VecVT);
8991 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8992 if (IsVPTrunc) {
8993 MVT MaskContainerVT =
8994 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8995 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8996 }
8997 }
8998
8999 if (!IsVPTrunc) {
9000 std::tie(Mask, VL) =
9001 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9002 }
9003
9004 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9005 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9006
9007 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9008 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9009 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9010 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9011
9012 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9013 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9014 DAG.getUNDEF(ContainerVT), Mask, VL);
9015 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9016 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9017 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9018 if (MaskVT.isFixedLengthVector())
9019 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9020 return Trunc;
9021}
9022
9023SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9024 SelectionDAG &DAG) const {
9025 unsigned Opc = Op.getOpcode();
9026 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9027 SDLoc DL(Op);
9028
9029 MVT VT = Op.getSimpleValueType();
9030 // Only custom-lower vector truncates
9031 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9032
9033 // Truncates to mask types are handled differently
9034 if (VT.getVectorElementType() == MVT::i1)
9035 return lowerVectorMaskTruncLike(Op, DAG);
9036
9037 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9038 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9039 // truncate by one power of two at a time.
9040 MVT DstEltVT = VT.getVectorElementType();
9041
9042 SDValue Src = Op.getOperand(0);
9043 MVT SrcVT = Src.getSimpleValueType();
9044 MVT SrcEltVT = SrcVT.getVectorElementType();
9045
9046 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9047 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9048 "Unexpected vector truncate lowering");
9049
9050 MVT ContainerVT = SrcVT;
9051 SDValue Mask, VL;
9052 if (IsVPTrunc) {
9053 Mask = Op.getOperand(1);
9054 VL = Op.getOperand(2);
9055 }
9056 if (SrcVT.isFixedLengthVector()) {
9057 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9058 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9059 if (IsVPTrunc) {
9060 MVT MaskVT = getMaskTypeFor(ContainerVT);
9061 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9062 }
9063 }
9064
9065 SDValue Result = Src;
9066 if (!IsVPTrunc) {
9067 std::tie(Mask, VL) =
9068 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9069 }
9070
9071 unsigned NewOpc;
9072 if (Opc == ISD::TRUNCATE_SSAT_S)
9074 else if (Opc == ISD::TRUNCATE_USAT_U)
9076 else
9078
9079 do {
9080 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9081 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9082 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9083 } while (SrcEltVT != DstEltVT);
9084
9085 if (SrcVT.isFixedLengthVector())
9086 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9087
9088 return Result;
9089}
9090
9091SDValue
9092RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9093 SelectionDAG &DAG) const {
9094 SDLoc DL(Op);
9095 SDValue Chain = Op.getOperand(0);
9096 SDValue Src = Op.getOperand(1);
9097 MVT VT = Op.getSimpleValueType();
9098 MVT SrcVT = Src.getSimpleValueType();
9099 MVT ContainerVT = VT;
9100 if (VT.isFixedLengthVector()) {
9101 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9102 ContainerVT =
9103 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9104 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9105 }
9106
9107 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9108
9109 // RVV can only widen/truncate fp to types double/half the size as the source.
9110 if ((VT.getVectorElementType() == MVT::f64 &&
9111 (SrcVT.getVectorElementType() == MVT::f16 ||
9112 SrcVT.getVectorElementType() == MVT::bf16)) ||
9113 ((VT.getVectorElementType() == MVT::f16 ||
9114 VT.getVectorElementType() == MVT::bf16) &&
9115 SrcVT.getVectorElementType() == MVT::f64)) {
9116 // For double rounding, the intermediate rounding should be round-to-odd.
9117 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9120 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9121 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9122 Chain, Src, Mask, VL);
9123 Chain = Src.getValue(1);
9124 }
9125
9126 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9129 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
9130 Chain, Src, Mask, VL);
9131 if (VT.isFixedLengthVector()) {
9132 // StrictFP operations have two result values. Their lowered result should
9133 // have same result count.
9134 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
9135 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
9136 }
9137 return Res;
9138}
9139
9140SDValue
9141RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
9142 SelectionDAG &DAG) const {
9143 bool IsVP =
9144 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
9145 bool IsExtend =
9146 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
9147 // RVV can only do truncate fp to types half the size as the source. We
9148 // custom-lower f64->f16 rounds via RVV's round-to-odd float
9149 // conversion instruction.
9150 SDLoc DL(Op);
9151 MVT VT = Op.getSimpleValueType();
9152
9153 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9154
9155 SDValue Src = Op.getOperand(0);
9156 MVT SrcVT = Src.getSimpleValueType();
9157
9158 bool IsDirectExtend =
9159 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9160 (SrcVT.getVectorElementType() != MVT::f16 &&
9161 SrcVT.getVectorElementType() != MVT::bf16));
9162 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9163 VT.getVectorElementType() != MVT::bf16) ||
9164 SrcVT.getVectorElementType() != MVT::f64);
9165
9166 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9167
9168 // We have regular SD node patterns for direct non-VL extends.
9169 if (VT.isScalableVector() && IsDirectConv && !IsVP)
9170 return Op;
9171
9172 // Prepare any fixed-length vector operands.
9173 MVT ContainerVT = VT;
9174 SDValue Mask, VL;
9175 if (IsVP) {
9176 Mask = Op.getOperand(1);
9177 VL = Op.getOperand(2);
9178 }
9179 if (VT.isFixedLengthVector()) {
9180 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9181 ContainerVT =
9182 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9183 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9184 if (IsVP) {
9185 MVT MaskVT = getMaskTypeFor(ContainerVT);
9186 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9187 }
9188 }
9189
9190 if (!IsVP)
9191 std::tie(Mask, VL) =
9192 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9193
9194 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9195
9196 if (IsDirectConv) {
9197 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9198 if (VT.isFixedLengthVector())
9199 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9200 return Src;
9201 }
9202
9203 unsigned InterConvOpc =
9205
9206 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9207 SDValue IntermediateConv =
9208 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9209 SDValue Result =
9210 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9211 if (VT.isFixedLengthVector())
9212 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9213 return Result;
9214}
9215
9216// Given a scalable vector type and an index into it, returns the type for the
9217// smallest subvector that the index fits in. This can be used to reduce LMUL
9218// for operations like vslidedown.
9219//
9220// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9221static std::optional<MVT>
9222getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9223 const RISCVSubtarget &Subtarget) {
9224 assert(VecVT.isScalableVector());
9225 const unsigned EltSize = VecVT.getScalarSizeInBits();
9226 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9227 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9228 MVT SmallerVT;
9229 if (MaxIdx < MinVLMAX)
9230 SmallerVT = getLMUL1VT(VecVT);
9231 else if (MaxIdx < MinVLMAX * 2)
9232 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9233 else if (MaxIdx < MinVLMAX * 4)
9234 SmallerVT = getLMUL1VT(VecVT)
9237 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9238 return std::nullopt;
9239 return SmallerVT;
9240}
9241
9242// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9243// first position of a vector, and that vector is slid up to the insert index.
9244// By limiting the active vector length to index+1 and merging with the
9245// original vector (with an undisturbed tail policy for elements >= VL), we
9246// achieve the desired result of leaving all elements untouched except the one
9247// at VL-1, which is replaced with the desired value.
9248SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9249 SelectionDAG &DAG) const {
9250 SDLoc DL(Op);
9251 MVT VecVT = Op.getSimpleValueType();
9252 MVT XLenVT = Subtarget.getXLenVT();
9253 SDValue Vec = Op.getOperand(0);
9254 SDValue Val = Op.getOperand(1);
9255 MVT ValVT = Val.getSimpleValueType();
9256 SDValue Idx = Op.getOperand(2);
9257
9258 if (VecVT.getVectorElementType() == MVT::i1) {
9259 // FIXME: For now we just promote to an i8 vector and insert into that,
9260 // but this is probably not optimal.
9261 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9262 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9263 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9264 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9265 }
9266
9267 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9268 ValVT == MVT::bf16) {
9269 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9270 MVT IntVT = VecVT.changeTypeToInteger();
9271 SDValue IntInsert = DAG.getNode(
9272 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9273 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9274 return DAG.getBitcast(VecVT, IntInsert);
9275 }
9276
9277 MVT ContainerVT = VecVT;
9278 // If the operand is a fixed-length vector, convert to a scalable one.
9279 if (VecVT.isFixedLengthVector()) {
9280 ContainerVT = getContainerForFixedLengthVector(VecVT);
9281 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9282 }
9283
9284 // If we know the index we're going to insert at, we can shrink Vec so that
9285 // we're performing the scalar inserts and slideup on a smaller LMUL.
9286 MVT OrigContainerVT = ContainerVT;
9287 SDValue OrigVec = Vec;
9288 SDValue AlignedIdx;
9289 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9290 const unsigned OrigIdx = IdxC->getZExtValue();
9291 // Do we know an upper bound on LMUL?
9292 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9293 DL, DAG, Subtarget)) {
9294 ContainerVT = *ShrunkVT;
9295 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9296 }
9297
9298 // If we're compiling for an exact VLEN value, we can always perform
9299 // the insert in m1 as we can determine the register corresponding to
9300 // the index in the register group.
9301 const MVT M1VT = getLMUL1VT(ContainerVT);
9302 if (auto VLEN = Subtarget.getRealVLen();
9303 VLEN && ContainerVT.bitsGT(M1VT)) {
9304 EVT ElemVT = VecVT.getVectorElementType();
9305 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9306 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9307 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9308 unsigned ExtractIdx =
9309 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9310 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9311 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9312 ContainerVT = M1VT;
9313 }
9314
9315 if (AlignedIdx)
9316 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9317 AlignedIdx);
9318 }
9319
9320 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9321 // Even i64-element vectors on RV32 can be lowered without scalar
9322 // legalization if the most-significant 32 bits of the value are not affected
9323 // by the sign-extension of the lower 32 bits.
9324 // TODO: We could also catch sign extensions of a 32-bit value.
9325 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9326 const auto *CVal = cast<ConstantSDNode>(Val);
9327 if (isInt<32>(CVal->getSExtValue())) {
9328 IsLegalInsert = true;
9329 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9330 }
9331 }
9332
9333 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9334
9335 SDValue ValInVec;
9336
9337 if (IsLegalInsert) {
9338 unsigned Opc =
9340 if (isNullConstant(Idx)) {
9341 if (!VecVT.isFloatingPoint())
9342 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9343 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9344
9345 if (AlignedIdx)
9346 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9347 Vec, AlignedIdx);
9348 if (!VecVT.isFixedLengthVector())
9349 return Vec;
9350 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9351 }
9352 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9353 } else {
9354 // On RV32, i64-element vectors must be specially handled to place the
9355 // value at element 0, by using two vslide1down instructions in sequence on
9356 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9357 // this.
9358 SDValue ValLo, ValHi;
9359 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9360 MVT I32ContainerVT =
9361 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9362 SDValue I32Mask =
9363 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9364 // Limit the active VL to two.
9365 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9366 // If the Idx is 0 we can insert directly into the vector.
9367 if (isNullConstant(Idx)) {
9368 // First slide in the lo value, then the hi in above it. We use slide1down
9369 // to avoid the register group overlap constraint of vslide1up.
9370 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9371 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9372 // If the source vector is undef don't pass along the tail elements from
9373 // the previous slide1down.
9374 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9375 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9376 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9377 // Bitcast back to the right container type.
9378 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9379
9380 if (AlignedIdx)
9381 ValInVec =
9382 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9383 ValInVec, AlignedIdx);
9384 if (!VecVT.isFixedLengthVector())
9385 return ValInVec;
9386 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9387 }
9388
9389 // First slide in the lo value, then the hi in above it. We use slide1down
9390 // to avoid the register group overlap constraint of vslide1up.
9391 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9392 DAG.getUNDEF(I32ContainerVT),
9393 DAG.getUNDEF(I32ContainerVT), ValLo,
9394 I32Mask, InsertI64VL);
9395 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9396 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9397 I32Mask, InsertI64VL);
9398 // Bitcast back to the right container type.
9399 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9400 }
9401
9402 // Now that the value is in a vector, slide it into position.
9403 SDValue InsertVL =
9404 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9405
9406 // Use tail agnostic policy if Idx is the last index of Vec.
9408 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9409 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9410 Policy = RISCVII::TAIL_AGNOSTIC;
9411 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9412 Idx, Mask, InsertVL, Policy);
9413
9414 if (AlignedIdx)
9415 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9416 Slideup, AlignedIdx);
9417 if (!VecVT.isFixedLengthVector())
9418 return Slideup;
9419 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9420}
9421
9422// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9423// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9424// types this is done using VMV_X_S to allow us to glean information about the
9425// sign bits of the result.
9426SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9427 SelectionDAG &DAG) const {
9428 SDLoc DL(Op);
9429 SDValue Idx = Op.getOperand(1);
9430 SDValue Vec = Op.getOperand(0);
9431 EVT EltVT = Op.getValueType();
9432 MVT VecVT = Vec.getSimpleValueType();
9433 MVT XLenVT = Subtarget.getXLenVT();
9434
9435 if (VecVT.getVectorElementType() == MVT::i1) {
9436 // Use vfirst.m to extract the first bit.
9437 if (isNullConstant(Idx)) {
9438 MVT ContainerVT = VecVT;
9439 if (VecVT.isFixedLengthVector()) {
9440 ContainerVT = getContainerForFixedLengthVector(VecVT);
9441 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9442 }
9443 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9444 SDValue Vfirst =
9445 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9446 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9447 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9448 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9449 }
9450 if (VecVT.isFixedLengthVector()) {
9451 unsigned NumElts = VecVT.getVectorNumElements();
9452 if (NumElts >= 8) {
9453 MVT WideEltVT;
9454 unsigned WidenVecLen;
9455 SDValue ExtractElementIdx;
9456 SDValue ExtractBitIdx;
9457 unsigned MaxEEW = Subtarget.getELen();
9458 MVT LargestEltVT = MVT::getIntegerVT(
9459 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9460 if (NumElts <= LargestEltVT.getSizeInBits()) {
9461 assert(isPowerOf2_32(NumElts) &&
9462 "the number of elements should be power of 2");
9463 WideEltVT = MVT::getIntegerVT(NumElts);
9464 WidenVecLen = 1;
9465 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9466 ExtractBitIdx = Idx;
9467 } else {
9468 WideEltVT = LargestEltVT;
9469 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9470 // extract element index = index / element width
9471 ExtractElementIdx = DAG.getNode(
9472 ISD::SRL, DL, XLenVT, Idx,
9473 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9474 // mask bit index = index % element width
9475 ExtractBitIdx = DAG.getNode(
9476 ISD::AND, DL, XLenVT, Idx,
9477 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9478 }
9479 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9480 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9481 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9482 Vec, ExtractElementIdx);
9483 // Extract the bit from GPR.
9484 SDValue ShiftRight =
9485 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9486 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9487 DAG.getConstant(1, DL, XLenVT));
9488 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9489 }
9490 }
9491 // Otherwise, promote to an i8 vector and extract from that.
9492 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9493 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9495 }
9496
9497 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9498 EltVT == MVT::bf16) {
9499 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9500 MVT IntVT = VecVT.changeTypeToInteger();
9501 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9502 SDValue IntExtract =
9503 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9504 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9505 }
9506
9507 // If this is a fixed vector, we need to convert it to a scalable vector.
9508 MVT ContainerVT = VecVT;
9509 if (VecVT.isFixedLengthVector()) {
9510 ContainerVT = getContainerForFixedLengthVector(VecVT);
9511 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9512 }
9513
9514 // If we're compiling for an exact VLEN value and we have a known
9515 // constant index, we can always perform the extract in m1 (or
9516 // smaller) as we can determine the register corresponding to
9517 // the index in the register group.
9518 const auto VLen = Subtarget.getRealVLen();
9519 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9520 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9521 MVT M1VT = getLMUL1VT(ContainerVT);
9522 unsigned OrigIdx = IdxC->getZExtValue();
9523 EVT ElemVT = VecVT.getVectorElementType();
9524 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9525 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9526 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9527 unsigned ExtractIdx =
9528 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9529 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9530 DAG.getVectorIdxConstant(ExtractIdx, DL));
9531 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9532 ContainerVT = M1VT;
9533 }
9534
9535 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9536 // contains our index.
9537 std::optional<uint64_t> MaxIdx;
9538 if (VecVT.isFixedLengthVector())
9539 MaxIdx = VecVT.getVectorNumElements() - 1;
9540 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9541 MaxIdx = IdxC->getZExtValue();
9542 if (MaxIdx) {
9543 if (auto SmallerVT =
9544 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9545 ContainerVT = *SmallerVT;
9546 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9547 DAG.getConstant(0, DL, XLenVT));
9548 }
9549 }
9550
9551 // If after narrowing, the required slide is still greater than LMUL2,
9552 // fallback to generic expansion and go through the stack. This is done
9553 // for a subtle reason: extracting *all* elements out of a vector is
9554 // widely expected to be linear in vector size, but because vslidedown
9555 // is linear in LMUL, performing N extracts using vslidedown becomes
9556 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9557 // seems to have the same problem (the store is linear in LMUL), but the
9558 // generic expansion *memoizes* the store, and thus for many extracts of
9559 // the same vector we end up with one store and a bunch of loads.
9560 // TODO: We don't have the same code for insert_vector_elt because we
9561 // have BUILD_VECTOR and handle the degenerate case there. Should we
9562 // consider adding an inverse BUILD_VECTOR node?
9563 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9564 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9565 return SDValue();
9566
9567 // If the index is 0, the vector is already in the right position.
9568 if (!isNullConstant(Idx)) {
9569 // Use a VL of 1 to avoid processing more elements than we need.
9570 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9571 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9572 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9573 }
9574
9575 if (!EltVT.isInteger()) {
9576 // Floating-point extracts are handled in TableGen.
9577 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9578 DAG.getVectorIdxConstant(0, DL));
9579 }
9580
9581 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9582 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9583}
9584
9585// Some RVV intrinsics may claim that they want an integer operand to be
9586// promoted or expanded.
9588 const RISCVSubtarget &Subtarget) {
9589 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9590 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9591 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9592 "Unexpected opcode");
9593
9594 if (!Subtarget.hasVInstructions())
9595 return SDValue();
9596
9597 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9598 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9599 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9600
9601 SDLoc DL(Op);
9602
9604 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9605 if (!II || !II->hasScalarOperand())
9606 return SDValue();
9607
9608 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9609 assert(SplatOp < Op.getNumOperands());
9610
9612 SDValue &ScalarOp = Operands[SplatOp];
9613 MVT OpVT = ScalarOp.getSimpleValueType();
9614 MVT XLenVT = Subtarget.getXLenVT();
9615
9616 // If this isn't a scalar, or its type is XLenVT we're done.
9617 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9618 return SDValue();
9619
9620 // Simplest case is that the operand needs to be promoted to XLenVT.
9621 if (OpVT.bitsLT(XLenVT)) {
9622 // If the operand is a constant, sign extend to increase our chances
9623 // of being able to use a .vi instruction. ANY_EXTEND would become a
9624 // a zero extend and the simm5 check in isel would fail.
9625 // FIXME: Should we ignore the upper bits in isel instead?
9626 unsigned ExtOpc =
9627 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9628 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9629 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9630 }
9631
9632 // Use the previous operand to get the vXi64 VT. The result might be a mask
9633 // VT for compares. Using the previous operand assumes that the previous
9634 // operand will never have a smaller element size than a scalar operand and
9635 // that a widening operation never uses SEW=64.
9636 // NOTE: If this fails the below assert, we can probably just find the
9637 // element count from any operand or result and use it to construct the VT.
9638 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9639 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9640
9641 // The more complex case is when the scalar is larger than XLenVT.
9642 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9643 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9644
9645 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9646 // instruction to sign-extend since SEW>XLEN.
9647 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9648 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9649 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9650 }
9651
9652 switch (IntNo) {
9653 case Intrinsic::riscv_vslide1up:
9654 case Intrinsic::riscv_vslide1down:
9655 case Intrinsic::riscv_vslide1up_mask:
9656 case Intrinsic::riscv_vslide1down_mask: {
9657 // We need to special case these when the scalar is larger than XLen.
9658 unsigned NumOps = Op.getNumOperands();
9659 bool IsMasked = NumOps == 7;
9660
9661 // Convert the vector source to the equivalent nxvXi32 vector.
9662 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9663 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9664 SDValue ScalarLo, ScalarHi;
9665 std::tie(ScalarLo, ScalarHi) =
9666 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9667
9668 // Double the VL since we halved SEW.
9669 SDValue AVL = getVLOperand(Op);
9670 SDValue I32VL;
9671
9672 // Optimize for constant AVL
9673 if (isa<ConstantSDNode>(AVL)) {
9674 const auto [MinVLMAX, MaxVLMAX] =
9676
9677 uint64_t AVLInt = AVL->getAsZExtVal();
9678 if (AVLInt <= MinVLMAX) {
9679 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9680 } else if (AVLInt >= 2 * MaxVLMAX) {
9681 // Just set vl to VLMAX in this situation
9682 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9683 } else {
9684 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9685 // is related to the hardware implementation.
9686 // So let the following code handle
9687 }
9688 }
9689 if (!I32VL) {
9691 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9692 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9693 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9694 SDValue SETVL =
9695 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9696 // Using vsetvli instruction to get actually used length which related to
9697 // the hardware implementation
9698 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9699 SEW, LMUL);
9700 I32VL =
9701 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9702 }
9703
9704 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9705
9706 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9707 // instructions.
9708 SDValue Passthru;
9709 if (IsMasked)
9710 Passthru = DAG.getUNDEF(I32VT);
9711 else
9712 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9713
9714 if (IntNo == Intrinsic::riscv_vslide1up ||
9715 IntNo == Intrinsic::riscv_vslide1up_mask) {
9716 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9717 ScalarHi, I32Mask, I32VL);
9718 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9719 ScalarLo, I32Mask, I32VL);
9720 } else {
9721 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9722 ScalarLo, I32Mask, I32VL);
9723 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9724 ScalarHi, I32Mask, I32VL);
9725 }
9726
9727 // Convert back to nxvXi64.
9728 Vec = DAG.getBitcast(VT, Vec);
9729
9730 if (!IsMasked)
9731 return Vec;
9732 // Apply mask after the operation.
9733 SDValue Mask = Operands[NumOps - 3];
9734 SDValue MaskedOff = Operands[1];
9735 // Assume Policy operand is the last operand.
9736 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9737 // We don't need to select maskedoff if it's undef.
9738 if (MaskedOff.isUndef())
9739 return Vec;
9740 // TAMU
9741 if (Policy == RISCVII::TAIL_AGNOSTIC)
9742 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9743 DAG.getUNDEF(VT), AVL);
9744 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9745 // It's fine because vmerge does not care mask policy.
9746 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9747 MaskedOff, AVL);
9748 }
9749 }
9750
9751 // We need to convert the scalar to a splat vector.
9752 SDValue VL = getVLOperand(Op);
9753 assert(VL.getValueType() == XLenVT);
9754 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9755 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9756}
9757
9758// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9759// scalable vector llvm.get.vector.length for now.
9760//
9761// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9762// (vscale * VF). The vscale and VF are independent of element width. We use
9763// SEW=8 for the vsetvli because it is the only element width that supports all
9764// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
9765// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9766// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9767// SEW and LMUL are better for the surrounding vector instructions.
9769 const RISCVSubtarget &Subtarget) {
9770 MVT XLenVT = Subtarget.getXLenVT();
9771
9772 // The smallest LMUL is only valid for the smallest element width.
9773 const unsigned ElementWidth = 8;
9774
9775 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9776 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9777 // We don't support VF==1 with ELEN==32.
9778 [[maybe_unused]] unsigned MinVF =
9779 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9780
9781 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9782 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9783 "Unexpected VF");
9784
9785 bool Fractional = VF < LMul1VF;
9786 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9787 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9788 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9789
9790 SDLoc DL(N);
9791
9792 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9793 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9794
9795 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9796
9797 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9798 SDValue Res =
9799 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9800 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9801}
9802
9804 const RISCVSubtarget &Subtarget) {
9805 SDValue Op0 = N->getOperand(1);
9806 MVT OpVT = Op0.getSimpleValueType();
9807 MVT ContainerVT = OpVT;
9808 if (OpVT.isFixedLengthVector()) {
9809 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9810 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9811 }
9812 MVT XLenVT = Subtarget.getXLenVT();
9813 SDLoc DL(N);
9814 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9815 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9816 if (isOneConstant(N->getOperand(2)))
9817 return Res;
9818
9819 // Convert -1 to VL.
9820 SDValue Setcc =
9821 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9822 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9823 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9824}
9825
9826static inline void promoteVCIXScalar(const SDValue &Op,
9828 SelectionDAG &DAG) {
9829 const RISCVSubtarget &Subtarget =
9831
9832 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9833 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9834 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9835 SDLoc DL(Op);
9836
9838 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9839 if (!II || !II->hasScalarOperand())
9840 return;
9841
9842 unsigned SplatOp = II->ScalarOperand + 1;
9843 assert(SplatOp < Op.getNumOperands());
9844
9845 SDValue &ScalarOp = Operands[SplatOp];
9846 MVT OpVT = ScalarOp.getSimpleValueType();
9847 MVT XLenVT = Subtarget.getXLenVT();
9848
9849 // The code below is partially copied from lowerVectorIntrinsicScalars.
9850 // If this isn't a scalar, or its type is XLenVT we're done.
9851 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9852 return;
9853
9854 // Manually emit promote operation for scalar operation.
9855 if (OpVT.bitsLT(XLenVT)) {
9856 unsigned ExtOpc =
9857 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9858 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9859 }
9860}
9861
9862static void processVCIXOperands(SDValue &OrigOp,
9864 SelectionDAG &DAG) {
9865 promoteVCIXScalar(OrigOp, Operands, DAG);
9866 const RISCVSubtarget &Subtarget =
9868 for (SDValue &V : Operands) {
9869 EVT ValType = V.getValueType();
9870 if (ValType.isVector() && ValType.isFloatingPoint()) {
9871 MVT InterimIVT =
9872 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9873 ValType.getVectorElementCount());
9874 V = DAG.getBitcast(InterimIVT, V);
9875 }
9876 if (ValType.isFixedLengthVector()) {
9877 MVT OpContainerVT = getContainerForFixedLengthVector(
9878 DAG, V.getSimpleValueType(), Subtarget);
9879 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9880 }
9881 }
9882}
9883
9884// LMUL * VLEN should be greater than or equal to EGS * SEW
9885static inline bool isValidEGW(int EGS, EVT VT,
9886 const RISCVSubtarget &Subtarget) {
9887 return (Subtarget.getRealMinVLen() *
9889 EGS * VT.getScalarSizeInBits();
9890}
9891
9892SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9893 SelectionDAG &DAG) const {
9894 unsigned IntNo = Op.getConstantOperandVal(0);
9895 SDLoc DL(Op);
9896 MVT XLenVT = Subtarget.getXLenVT();
9897
9898 switch (IntNo) {
9899 default:
9900 break; // Don't custom lower most intrinsics.
9901 case Intrinsic::riscv_tuple_insert: {
9902 SDValue Vec = Op.getOperand(1);
9903 SDValue SubVec = Op.getOperand(2);
9904 SDValue Index = Op.getOperand(3);
9905
9906 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9907 SubVec, Index);
9908 }
9909 case Intrinsic::riscv_tuple_extract: {
9910 SDValue Vec = Op.getOperand(1);
9911 SDValue Index = Op.getOperand(2);
9912
9913 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9914 Index);
9915 }
9916 case Intrinsic::thread_pointer: {
9917 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9918 return DAG.getRegister(RISCV::X4, PtrVT);
9919 }
9920 case Intrinsic::riscv_orc_b:
9921 case Intrinsic::riscv_brev8:
9922 case Intrinsic::riscv_sha256sig0:
9923 case Intrinsic::riscv_sha256sig1:
9924 case Intrinsic::riscv_sha256sum0:
9925 case Intrinsic::riscv_sha256sum1:
9926 case Intrinsic::riscv_sm3p0:
9927 case Intrinsic::riscv_sm3p1: {
9928 unsigned Opc;
9929 switch (IntNo) {
9930 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9931 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9932 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9933 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9934 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9935 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9936 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9937 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9938 }
9939
9940 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9941 }
9942 case Intrinsic::riscv_sm4ks:
9943 case Intrinsic::riscv_sm4ed: {
9944 unsigned Opc =
9945 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9946
9947 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9948 Op.getOperand(3));
9949 }
9950 case Intrinsic::riscv_zip:
9951 case Intrinsic::riscv_unzip: {
9952 unsigned Opc =
9953 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9954 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9955 }
9956 case Intrinsic::riscv_mopr:
9957 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9958 Op.getOperand(2));
9959
9960 case Intrinsic::riscv_moprr: {
9961 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9962 Op.getOperand(2), Op.getOperand(3));
9963 }
9964 case Intrinsic::riscv_clmul:
9965 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9966 Op.getOperand(2));
9967 case Intrinsic::riscv_clmulh:
9968 case Intrinsic::riscv_clmulr: {
9969 unsigned Opc =
9970 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9971 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9972 }
9973 case Intrinsic::experimental_get_vector_length:
9974 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9975 case Intrinsic::experimental_cttz_elts:
9976 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9977 case Intrinsic::riscv_vmv_x_s: {
9978 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9979 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9980 }
9981 case Intrinsic::riscv_vfmv_f_s:
9982 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9983 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9984 case Intrinsic::riscv_vmv_v_x:
9985 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9986 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9987 Subtarget);
9988 case Intrinsic::riscv_vfmv_v_f:
9989 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9990 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9991 case Intrinsic::riscv_vmv_s_x: {
9992 SDValue Scalar = Op.getOperand(2);
9993
9994 if (Scalar.getValueType().bitsLE(XLenVT)) {
9995 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9996 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9997 Op.getOperand(1), Scalar, Op.getOperand(3));
9998 }
9999
10000 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10001
10002 // This is an i64 value that lives in two scalar registers. We have to
10003 // insert this in a convoluted way. First we build vXi64 splat containing
10004 // the two values that we assemble using some bit math. Next we'll use
10005 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10006 // to merge element 0 from our splat into the source vector.
10007 // FIXME: This is probably not the best way to do this, but it is
10008 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10009 // point.
10010 // sw lo, (a0)
10011 // sw hi, 4(a0)
10012 // vlse vX, (a0)
10013 //
10014 // vid.v vVid
10015 // vmseq.vx mMask, vVid, 0
10016 // vmerge.vvm vDest, vSrc, vVal, mMask
10017 MVT VT = Op.getSimpleValueType();
10018 SDValue Vec = Op.getOperand(1);
10019 SDValue VL = getVLOperand(Op);
10020
10021 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10022 if (Op.getOperand(1).isUndef())
10023 return SplattedVal;
10024 SDValue SplattedIdx =
10025 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10026 DAG.getConstant(0, DL, MVT::i32), VL);
10027
10028 MVT MaskVT = getMaskTypeFor(VT);
10029 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10030 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10031 SDValue SelectCond =
10032 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10033 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10034 DAG.getUNDEF(MaskVT), Mask, VL});
10035 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10036 Vec, DAG.getUNDEF(VT), VL);
10037 }
10038 case Intrinsic::riscv_vfmv_s_f:
10039 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10040 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10041 // EGS * EEW >= 128 bits
10042 case Intrinsic::riscv_vaesdf_vv:
10043 case Intrinsic::riscv_vaesdf_vs:
10044 case Intrinsic::riscv_vaesdm_vv:
10045 case Intrinsic::riscv_vaesdm_vs:
10046 case Intrinsic::riscv_vaesef_vv:
10047 case Intrinsic::riscv_vaesef_vs:
10048 case Intrinsic::riscv_vaesem_vv:
10049 case Intrinsic::riscv_vaesem_vs:
10050 case Intrinsic::riscv_vaeskf1:
10051 case Intrinsic::riscv_vaeskf2:
10052 case Intrinsic::riscv_vaesz_vs:
10053 case Intrinsic::riscv_vsm4k:
10054 case Intrinsic::riscv_vsm4r_vv:
10055 case Intrinsic::riscv_vsm4r_vs: {
10056 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10057 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10058 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10059 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
10060 return Op;
10061 }
10062 // EGS * EEW >= 256 bits
10063 case Intrinsic::riscv_vsm3c:
10064 case Intrinsic::riscv_vsm3me: {
10065 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10066 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10067 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
10068 return Op;
10069 }
10070 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10071 case Intrinsic::riscv_vsha2ch:
10072 case Intrinsic::riscv_vsha2cl:
10073 case Intrinsic::riscv_vsha2ms: {
10074 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10075 !Subtarget.hasStdExtZvknhb())
10076 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
10077 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10078 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10079 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10080 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
10081 return Op;
10082 }
10083 case Intrinsic::riscv_sf_vc_v_x:
10084 case Intrinsic::riscv_sf_vc_v_i:
10085 case Intrinsic::riscv_sf_vc_v_xv:
10086 case Intrinsic::riscv_sf_vc_v_iv:
10087 case Intrinsic::riscv_sf_vc_v_vv:
10088 case Intrinsic::riscv_sf_vc_v_fv:
10089 case Intrinsic::riscv_sf_vc_v_xvv:
10090 case Intrinsic::riscv_sf_vc_v_ivv:
10091 case Intrinsic::riscv_sf_vc_v_vvv:
10092 case Intrinsic::riscv_sf_vc_v_fvv:
10093 case Intrinsic::riscv_sf_vc_v_xvw:
10094 case Intrinsic::riscv_sf_vc_v_ivw:
10095 case Intrinsic::riscv_sf_vc_v_vvw:
10096 case Intrinsic::riscv_sf_vc_v_fvw: {
10097 MVT VT = Op.getSimpleValueType();
10098
10099 SmallVector<SDValue> Operands{Op->op_values()};
10101
10102 MVT RetVT = VT;
10103 if (VT.isFixedLengthVector())
10105 else if (VT.isFloatingPoint())
10108
10109 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
10110
10111 if (VT.isFixedLengthVector())
10112 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
10113 else if (VT.isFloatingPoint())
10114 NewNode = DAG.getBitcast(VT, NewNode);
10115
10116 if (Op == NewNode)
10117 break;
10118
10119 return NewNode;
10120 }
10121 }
10122
10123 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10124}
10125
10127 unsigned Type) {
10128 SDLoc DL(Op);
10129 SmallVector<SDValue> Operands{Op->op_values()};
10130 Operands.erase(Operands.begin() + 1);
10131
10132 const RISCVSubtarget &Subtarget =
10134 MVT VT = Op.getSimpleValueType();
10135 MVT RetVT = VT;
10136 MVT FloatVT = VT;
10137
10138 if (VT.isFloatingPoint()) {
10139 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
10141 FloatVT = RetVT;
10142 }
10143 if (VT.isFixedLengthVector())
10145 Subtarget);
10146
10148
10149 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
10150 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
10151 SDValue Chain = NewNode.getValue(1);
10152
10153 if (VT.isFixedLengthVector())
10154 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
10155 if (VT.isFloatingPoint())
10156 NewNode = DAG.getBitcast(VT, NewNode);
10157
10158 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
10159
10160 return NewNode;
10161}
10162
10164 unsigned Type) {
10165 SmallVector<SDValue> Operands{Op->op_values()};
10166 Operands.erase(Operands.begin() + 1);
10168
10169 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10170}
10171
10172SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10173 SelectionDAG &DAG) const {
10174 unsigned IntNo = Op.getConstantOperandVal(1);
10175 switch (IntNo) {
10176 default:
10177 break;
10178 case Intrinsic::riscv_seg2_load:
10179 case Intrinsic::riscv_seg3_load:
10180 case Intrinsic::riscv_seg4_load:
10181 case Intrinsic::riscv_seg5_load:
10182 case Intrinsic::riscv_seg6_load:
10183 case Intrinsic::riscv_seg7_load:
10184 case Intrinsic::riscv_seg8_load: {
10185 SDLoc DL(Op);
10186 static const Intrinsic::ID VlsegInts[7] = {
10187 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10188 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10189 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10190 Intrinsic::riscv_vlseg8};
10191 unsigned NF = Op->getNumValues() - 1;
10192 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10193 MVT XLenVT = Subtarget.getXLenVT();
10194 MVT VT = Op->getSimpleValueType(0);
10195 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10196 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10197 ContainerVT.getScalarSizeInBits();
10198 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10199
10200 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10201 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10202 auto *Load = cast<MemIntrinsicSDNode>(Op);
10203
10204 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10205 SDValue Ops[] = {
10206 Load->getChain(),
10207 IntID,
10208 DAG.getUNDEF(VecTupTy),
10209 Op.getOperand(2),
10210 VL,
10211 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10212 SDValue Result =
10214 Load->getMemoryVT(), Load->getMemOperand());
10216 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10217 SDValue SubVec =
10218 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10219 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10220 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10221 }
10222 Results.push_back(Result.getValue(1));
10223 return DAG.getMergeValues(Results, DL);
10224 }
10225 case Intrinsic::riscv_sf_vc_v_x_se:
10227 case Intrinsic::riscv_sf_vc_v_i_se:
10229 case Intrinsic::riscv_sf_vc_v_xv_se:
10231 case Intrinsic::riscv_sf_vc_v_iv_se:
10233 case Intrinsic::riscv_sf_vc_v_vv_se:
10235 case Intrinsic::riscv_sf_vc_v_fv_se:
10237 case Intrinsic::riscv_sf_vc_v_xvv_se:
10239 case Intrinsic::riscv_sf_vc_v_ivv_se:
10241 case Intrinsic::riscv_sf_vc_v_vvv_se:
10243 case Intrinsic::riscv_sf_vc_v_fvv_se:
10245 case Intrinsic::riscv_sf_vc_v_xvw_se:
10247 case Intrinsic::riscv_sf_vc_v_ivw_se:
10249 case Intrinsic::riscv_sf_vc_v_vvw_se:
10251 case Intrinsic::riscv_sf_vc_v_fvw_se:
10253 }
10254
10255 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10256}
10257
10258SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10259 SelectionDAG &DAG) const {
10260 unsigned IntNo = Op.getConstantOperandVal(1);
10261 switch (IntNo) {
10262 default:
10263 break;
10264 case Intrinsic::riscv_seg2_store:
10265 case Intrinsic::riscv_seg3_store:
10266 case Intrinsic::riscv_seg4_store:
10267 case Intrinsic::riscv_seg5_store:
10268 case Intrinsic::riscv_seg6_store:
10269 case Intrinsic::riscv_seg7_store:
10270 case Intrinsic::riscv_seg8_store: {
10271 SDLoc DL(Op);
10272 static const Intrinsic::ID VssegInts[] = {
10273 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10274 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10275 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10276 Intrinsic::riscv_vsseg8};
10277 // Operands are (chain, int_id, vec*, ptr, vl)
10278 unsigned NF = Op->getNumOperands() - 4;
10279 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10280 MVT XLenVT = Subtarget.getXLenVT();
10281 MVT VT = Op->getOperand(2).getSimpleValueType();
10282 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10283 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10284 ContainerVT.getScalarSizeInBits();
10285 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10286
10287 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10288 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10289 SDValue Ptr = Op->getOperand(NF + 2);
10290
10291 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10292
10293 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10294 for (unsigned i = 0; i < NF; i++)
10295 StoredVal = DAG.getNode(
10296 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10298 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10299 DAG.getVectorIdxConstant(i, DL));
10300
10301 SDValue Ops[] = {
10302 FixedIntrinsic->getChain(),
10303 IntID,
10304 StoredVal,
10305 Ptr,
10306 VL,
10307 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10308
10309 return DAG.getMemIntrinsicNode(
10310 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10311 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10312 }
10313 case Intrinsic::riscv_sf_vc_xv_se:
10315 case Intrinsic::riscv_sf_vc_iv_se:
10317 case Intrinsic::riscv_sf_vc_vv_se:
10319 case Intrinsic::riscv_sf_vc_fv_se:
10321 case Intrinsic::riscv_sf_vc_xvv_se:
10323 case Intrinsic::riscv_sf_vc_ivv_se:
10325 case Intrinsic::riscv_sf_vc_vvv_se:
10327 case Intrinsic::riscv_sf_vc_fvv_se:
10329 case Intrinsic::riscv_sf_vc_xvw_se:
10331 case Intrinsic::riscv_sf_vc_ivw_se:
10333 case Intrinsic::riscv_sf_vc_vvw_se:
10335 case Intrinsic::riscv_sf_vc_fvw_se:
10337 }
10338
10339 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10340}
10341
10342static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10343 switch (ISDOpcode) {
10344 default:
10345 llvm_unreachable("Unhandled reduction");
10346 case ISD::VP_REDUCE_ADD:
10347 case ISD::VECREDUCE_ADD:
10349 case ISD::VP_REDUCE_UMAX:
10352 case ISD::VP_REDUCE_SMAX:
10355 case ISD::VP_REDUCE_UMIN:
10358 case ISD::VP_REDUCE_SMIN:
10361 case ISD::VP_REDUCE_AND:
10362 case ISD::VECREDUCE_AND:
10364 case ISD::VP_REDUCE_OR:
10365 case ISD::VECREDUCE_OR:
10367 case ISD::VP_REDUCE_XOR:
10368 case ISD::VECREDUCE_XOR:
10370 case ISD::VP_REDUCE_FADD:
10372 case ISD::VP_REDUCE_SEQ_FADD:
10374 case ISD::VP_REDUCE_FMAX:
10375 case ISD::VP_REDUCE_FMAXIMUM:
10377 case ISD::VP_REDUCE_FMIN:
10378 case ISD::VP_REDUCE_FMINIMUM:
10380 }
10381
10382}
10383
10384SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10385 SelectionDAG &DAG,
10386 bool IsVP) const {
10387 SDLoc DL(Op);
10388 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10389 MVT VecVT = Vec.getSimpleValueType();
10390 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10391 Op.getOpcode() == ISD::VECREDUCE_OR ||
10392 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10393 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10394 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10395 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10396 "Unexpected reduction lowering");
10397
10398 MVT XLenVT = Subtarget.getXLenVT();
10399
10400 MVT ContainerVT = VecVT;
10401 if (VecVT.isFixedLengthVector()) {
10402 ContainerVT = getContainerForFixedLengthVector(VecVT);
10403 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10404 }
10405
10406 SDValue Mask, VL;
10407 if (IsVP) {
10408 Mask = Op.getOperand(2);
10409 VL = Op.getOperand(3);
10410 } else {
10411 std::tie(Mask, VL) =
10412 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10413 }
10414
10416 switch (Op.getOpcode()) {
10417 default:
10418 llvm_unreachable("Unhandled reduction");
10419 case ISD::VECREDUCE_AND:
10420 case ISD::VP_REDUCE_AND: {
10421 // vcpop ~x == 0
10422 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10423 if (IsVP || VecVT.isFixedLengthVector())
10424 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10425 else
10426 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10427 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10428 CC = ISD::SETEQ;
10429 break;
10430 }
10431 case ISD::VECREDUCE_OR:
10432 case ISD::VP_REDUCE_OR:
10433 // vcpop x != 0
10434 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10435 CC = ISD::SETNE;
10436 break;
10437 case ISD::VECREDUCE_XOR:
10438 case ISD::VP_REDUCE_XOR: {
10439 // ((vcpop x) & 1) != 0
10440 SDValue One = DAG.getConstant(1, DL, XLenVT);
10441 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10442 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10443 CC = ISD::SETNE;
10444 break;
10445 }
10446 }
10447
10448 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10449 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10450 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10451
10452 if (!IsVP)
10453 return SetCC;
10454
10455 // Now include the start value in the operation.
10456 // Note that we must return the start value when no elements are operated
10457 // upon. The vcpop instructions we've emitted in each case above will return
10458 // 0 for an inactive vector, and so we've already received the neutral value:
10459 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10460 // can simply include the start value.
10461 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10462 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10463}
10464
10465static bool isNonZeroAVL(SDValue AVL) {
10466 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10467 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10468 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10469 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10470}
10471
10472/// Helper to lower a reduction sequence of the form:
10473/// scalar = reduce_op vec, scalar_start
10474static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10475 SDValue StartValue, SDValue Vec, SDValue Mask,
10476 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10477 const RISCVSubtarget &Subtarget) {
10478 const MVT VecVT = Vec.getSimpleValueType();
10479 const MVT M1VT = getLMUL1VT(VecVT);
10480 const MVT XLenVT = Subtarget.getXLenVT();
10481 const bool NonZeroAVL = isNonZeroAVL(VL);
10482
10483 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10484 // or the original VT if fractional.
10485 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10486 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10487 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10488 // be the result of the reduction operation.
10489 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10490 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10491 DAG, Subtarget);
10492 if (M1VT != InnerVT)
10493 InitialValue =
10494 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10495 InitialValue, DAG.getVectorIdxConstant(0, DL));
10496 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10497 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10498 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10499 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10500 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10501 DAG.getVectorIdxConstant(0, DL));
10502}
10503
10504SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10505 SelectionDAG &DAG) const {
10506 SDLoc DL(Op);
10507 SDValue Vec = Op.getOperand(0);
10508 EVT VecEVT = Vec.getValueType();
10509
10510 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10511
10512 // Due to ordering in legalize types we may have a vector type that needs to
10513 // be split. Do that manually so we can get down to a legal type.
10514 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10516 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10517 VecEVT = Lo.getValueType();
10518 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10519 }
10520
10521 // TODO: The type may need to be widened rather than split. Or widened before
10522 // it can be split.
10523 if (!isTypeLegal(VecEVT))
10524 return SDValue();
10525
10526 MVT VecVT = VecEVT.getSimpleVT();
10527 MVT VecEltVT = VecVT.getVectorElementType();
10528 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10529
10530 MVT ContainerVT = VecVT;
10531 if (VecVT.isFixedLengthVector()) {
10532 ContainerVT = getContainerForFixedLengthVector(VecVT);
10533 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10534 }
10535
10536 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10537
10538 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10539 switch (BaseOpc) {
10540 case ISD::AND:
10541 case ISD::OR:
10542 case ISD::UMAX:
10543 case ISD::UMIN:
10544 case ISD::SMAX:
10545 case ISD::SMIN:
10546 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10547 DAG.getVectorIdxConstant(0, DL));
10548 }
10549 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10550 Mask, VL, DL, DAG, Subtarget);
10551}
10552
10553// Given a reduction op, this function returns the matching reduction opcode,
10554// the vector SDValue and the scalar SDValue required to lower this to a
10555// RISCVISD node.
10556static std::tuple<unsigned, SDValue, SDValue>
10558 const RISCVSubtarget &Subtarget) {
10559 SDLoc DL(Op);
10560 auto Flags = Op->getFlags();
10561 unsigned Opcode = Op.getOpcode();
10562 switch (Opcode) {
10563 default:
10564 llvm_unreachable("Unhandled reduction");
10565 case ISD::VECREDUCE_FADD: {
10566 // Use positive zero if we can. It is cheaper to materialize.
10567 SDValue Zero =
10568 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10569 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10570 }
10572 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10573 Op.getOperand(0));
10577 case ISD::VECREDUCE_FMAX: {
10578 SDValue Front =
10579 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10580 DAG.getVectorIdxConstant(0, DL));
10581 unsigned RVVOpc =
10582 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10585 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10586 }
10587 }
10588}
10589
10590SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10591 SelectionDAG &DAG) const {
10592 SDLoc DL(Op);
10593 MVT VecEltVT = Op.getSimpleValueType();
10594
10595 unsigned RVVOpcode;
10596 SDValue VectorVal, ScalarVal;
10597 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10598 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10599 MVT VecVT = VectorVal.getSimpleValueType();
10600
10601 MVT ContainerVT = VecVT;
10602 if (VecVT.isFixedLengthVector()) {
10603 ContainerVT = getContainerForFixedLengthVector(VecVT);
10604 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10605 }
10606
10607 MVT ResVT = Op.getSimpleValueType();
10608 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10609 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10610 VL, DL, DAG, Subtarget);
10611 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10612 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10613 return Res;
10614
10615 if (Op->getFlags().hasNoNaNs())
10616 return Res;
10617
10618 // Force output to NaN if any element is Nan.
10619 SDValue IsNan =
10620 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10621 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10622 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10623 MVT XLenVT = Subtarget.getXLenVT();
10624 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10625 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10626 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10627 return DAG.getSelect(
10628 DL, ResVT, NoNaNs, Res,
10629 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10630}
10631
10632SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10633 SelectionDAG &DAG) const {
10634 SDLoc DL(Op);
10635 unsigned Opc = Op.getOpcode();
10636 SDValue Start = Op.getOperand(0);
10637 SDValue Vec = Op.getOperand(1);
10638 EVT VecEVT = Vec.getValueType();
10639 MVT XLenVT = Subtarget.getXLenVT();
10640
10641 // TODO: The type may need to be widened rather than split. Or widened before
10642 // it can be split.
10643 if (!isTypeLegal(VecEVT))
10644 return SDValue();
10645
10646 MVT VecVT = VecEVT.getSimpleVT();
10647 unsigned RVVOpcode = getRVVReductionOp(Opc);
10648
10649 if (VecVT.isFixedLengthVector()) {
10650 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10651 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10652 }
10653
10654 SDValue VL = Op.getOperand(3);
10655 SDValue Mask = Op.getOperand(2);
10656 SDValue Res =
10657 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10658 Vec, Mask, VL, DL, DAG, Subtarget);
10659 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10660 Op->getFlags().hasNoNaNs())
10661 return Res;
10662
10663 // Propagate NaNs.
10664 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10665 // Check if any of the elements in Vec is NaN.
10666 SDValue IsNaN = DAG.getNode(
10667 RISCVISD::SETCC_VL, DL, PredVT,
10668 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10669 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10670 // Check if the start value is NaN.
10671 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10672 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10673 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10674 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10675 MVT ResVT = Res.getSimpleValueType();
10676 return DAG.getSelect(
10677 DL, ResVT, NoNaNs, Res,
10678 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10679}
10680
10681SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10682 SelectionDAG &DAG) const {
10683 SDValue Vec = Op.getOperand(0);
10684 SDValue SubVec = Op.getOperand(1);
10685 MVT VecVT = Vec.getSimpleValueType();
10686 MVT SubVecVT = SubVec.getSimpleValueType();
10687
10688 SDLoc DL(Op);
10689 MVT XLenVT = Subtarget.getXLenVT();
10690 unsigned OrigIdx = Op.getConstantOperandVal(2);
10691 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10692
10693 if (OrigIdx == 0 && Vec.isUndef())
10694 return Op;
10695
10696 // We don't have the ability to slide mask vectors up indexed by their i1
10697 // elements; the smallest we can do is i8. Often we are able to bitcast to
10698 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10699 // into a scalable one, we might not necessarily have enough scalable
10700 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10701 if (SubVecVT.getVectorElementType() == MVT::i1) {
10702 if (VecVT.getVectorMinNumElements() >= 8 &&
10703 SubVecVT.getVectorMinNumElements() >= 8) {
10704 assert(OrigIdx % 8 == 0 && "Invalid index");
10705 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10706 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10707 "Unexpected mask vector lowering");
10708 OrigIdx /= 8;
10709 SubVecVT =
10710 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10711 SubVecVT.isScalableVector());
10712 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10713 VecVT.isScalableVector());
10714 Vec = DAG.getBitcast(VecVT, Vec);
10715 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10716 } else {
10717 // We can't slide this mask vector up indexed by its i1 elements.
10718 // This poses a problem when we wish to insert a scalable vector which
10719 // can't be re-expressed as a larger type. Just choose the slow path and
10720 // extend to a larger type, then truncate back down.
10721 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10722 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10723 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10724 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10725 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10726 Op.getOperand(2));
10727 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10728 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10729 }
10730 }
10731
10732 // If the subvector vector is a fixed-length type and we don't know VLEN
10733 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10734 // don't know which register of a LMUL group contains the specific subvector
10735 // as we only know the minimum register size. Therefore we must slide the
10736 // vector group up the full amount.
10737 const auto VLen = Subtarget.getRealVLen();
10738 if (SubVecVT.isFixedLengthVector() && !VLen) {
10739 MVT ContainerVT = VecVT;
10740 if (VecVT.isFixedLengthVector()) {
10741 ContainerVT = getContainerForFixedLengthVector(VecVT);
10742 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10743 }
10744
10745 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10746 DAG.getUNDEF(ContainerVT), SubVec,
10747 DAG.getVectorIdxConstant(0, DL));
10748
10749 SDValue Mask =
10750 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10751 // Set the vector length to only the number of elements we care about. Note
10752 // that for slideup this includes the offset.
10753 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10754 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10755
10756 // Use tail agnostic policy if we're inserting over Vec's tail.
10758 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10759 Policy = RISCVII::TAIL_AGNOSTIC;
10760
10761 // If we're inserting into the lowest elements, use a tail undisturbed
10762 // vmv.v.v.
10763 if (OrigIdx == 0) {
10764 SubVec =
10765 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10766 } else {
10767 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10768 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10769 SlideupAmt, Mask, VL, Policy);
10770 }
10771
10772 if (VecVT.isFixedLengthVector())
10773 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10774 return DAG.getBitcast(Op.getValueType(), SubVec);
10775 }
10776
10777 MVT ContainerVecVT = VecVT;
10778 if (VecVT.isFixedLengthVector()) {
10779 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10780 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10781 }
10782
10783 MVT ContainerSubVecVT = SubVecVT;
10784 if (SubVecVT.isFixedLengthVector()) {
10785 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10786 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10787 }
10788
10789 unsigned SubRegIdx;
10790 ElementCount RemIdx;
10791 // insert_subvector scales the index by vscale if the subvector is scalable,
10792 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10793 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10794 if (SubVecVT.isFixedLengthVector()) {
10795 assert(VLen);
10796 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10797 auto Decompose =
10799 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10800 SubRegIdx = Decompose.first;
10801 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10802 (OrigIdx % Vscale));
10803 } else {
10804 auto Decompose =
10806 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10807 SubRegIdx = Decompose.first;
10808 RemIdx = ElementCount::getScalable(Decompose.second);
10809 }
10810
10813 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10814 bool ExactlyVecRegSized =
10815 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10816 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10817
10818 // 1. If the Idx has been completely eliminated and this subvector's size is
10819 // a vector register or a multiple thereof, or the surrounding elements are
10820 // undef, then this is a subvector insert which naturally aligns to a vector
10821 // register. These can easily be handled using subregister manipulation.
10822 // 2. If the subvector isn't an exact multiple of a valid register group size,
10823 // then the insertion must preserve the undisturbed elements of the register.
10824 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10825 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10826 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10827 // of that LMUL=1 type back into the larger vector (resolving to another
10828 // subregister operation). See below for how our VSLIDEUP works. We go via a
10829 // LMUL=1 type to avoid allocating a large register group to hold our
10830 // subvector.
10831 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10832 if (SubVecVT.isFixedLengthVector()) {
10833 // We may get NoSubRegister if inserting at index 0 and the subvec
10834 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10835 if (SubRegIdx == RISCV::NoSubRegister) {
10836 assert(OrigIdx == 0);
10837 return Op;
10838 }
10839
10840 // Use a insert_subvector that will resolve to an insert subreg.
10841 assert(VLen);
10842 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10843 SDValue Insert =
10844 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10845 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10846 if (VecVT.isFixedLengthVector())
10847 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10848 return Insert;
10849 }
10850 return Op;
10851 }
10852
10853 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10854 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10855 // (in our case undisturbed). This means we can set up a subvector insertion
10856 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10857 // size of the subvector.
10858 MVT InterSubVT = ContainerVecVT;
10859 SDValue AlignedExtract = Vec;
10860 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10861 if (SubVecVT.isFixedLengthVector()) {
10862 assert(VLen);
10863 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10864 }
10865 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10866 InterSubVT = getLMUL1VT(ContainerVecVT);
10867 // Extract a subvector equal to the nearest full vector register type. This
10868 // should resolve to a EXTRACT_SUBREG instruction.
10869 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10870 DAG.getVectorIdxConstant(AlignedIdx, DL));
10871 }
10872
10873 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10874 DAG.getUNDEF(InterSubVT), SubVec,
10875 DAG.getVectorIdxConstant(0, DL));
10876
10877 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10878
10879 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10880 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10881
10882 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10884 if (Subtarget.expandVScale(EndIndex) ==
10885 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10886 Policy = RISCVII::TAIL_AGNOSTIC;
10887
10888 // If we're inserting into the lowest elements, use a tail undisturbed
10889 // vmv.v.v.
10890 if (RemIdx.isZero()) {
10891 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10892 SubVec, VL);
10893 } else {
10894 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10895
10896 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10897 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10898
10899 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10900 SlideupAmt, Mask, VL, Policy);
10901 }
10902
10903 // If required, insert this subvector back into the correct vector register.
10904 // This should resolve to an INSERT_SUBREG instruction.
10905 if (ContainerVecVT.bitsGT(InterSubVT))
10906 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10907 DAG.getVectorIdxConstant(AlignedIdx, DL));
10908
10909 if (VecVT.isFixedLengthVector())
10910 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10911
10912 // We might have bitcast from a mask type: cast back to the original type if
10913 // required.
10914 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10915}
10916
10917SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10918 SelectionDAG &DAG) const {
10919 SDValue Vec = Op.getOperand(0);
10920 MVT SubVecVT = Op.getSimpleValueType();
10921 MVT VecVT = Vec.getSimpleValueType();
10922
10923 SDLoc DL(Op);
10924 MVT XLenVT = Subtarget.getXLenVT();
10925 unsigned OrigIdx = Op.getConstantOperandVal(1);
10926 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10927
10928 // With an index of 0 this is a cast-like subvector, which can be performed
10929 // with subregister operations.
10930 if (OrigIdx == 0)
10931 return Op;
10932
10933 // We don't have the ability to slide mask vectors down indexed by their i1
10934 // elements; the smallest we can do is i8. Often we are able to bitcast to
10935 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10936 // from a scalable one, we might not necessarily have enough scalable
10937 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10938 if (SubVecVT.getVectorElementType() == MVT::i1) {
10939 if (VecVT.getVectorMinNumElements() >= 8 &&
10940 SubVecVT.getVectorMinNumElements() >= 8) {
10941 assert(OrigIdx % 8 == 0 && "Invalid index");
10942 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10943 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10944 "Unexpected mask vector lowering");
10945 OrigIdx /= 8;
10946 SubVecVT =
10947 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10948 SubVecVT.isScalableVector());
10949 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10950 VecVT.isScalableVector());
10951 Vec = DAG.getBitcast(VecVT, Vec);
10952 } else {
10953 // We can't slide this mask vector down, indexed by its i1 elements.
10954 // This poses a problem when we wish to extract a scalable vector which
10955 // can't be re-expressed as a larger type. Just choose the slow path and
10956 // extend to a larger type, then truncate back down.
10957 // TODO: We could probably improve this when extracting certain fixed
10958 // from fixed, where we can extract as i8 and shift the correct element
10959 // right to reach the desired subvector?
10960 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10961 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10962 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10963 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10964 Op.getOperand(1));
10965 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10966 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10967 }
10968 }
10969
10970 const auto VLen = Subtarget.getRealVLen();
10971
10972 // If the subvector vector is a fixed-length type and we don't know VLEN
10973 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10974 // don't know which register of a LMUL group contains the specific subvector
10975 // as we only know the minimum register size. Therefore we must slide the
10976 // vector group down the full amount.
10977 if (SubVecVT.isFixedLengthVector() && !VLen) {
10978 MVT ContainerVT = VecVT;
10979 if (VecVT.isFixedLengthVector()) {
10980 ContainerVT = getContainerForFixedLengthVector(VecVT);
10981 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10982 }
10983
10984 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10985 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10986 if (auto ShrunkVT =
10987 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10988 ContainerVT = *ShrunkVT;
10989 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10990 DAG.getVectorIdxConstant(0, DL));
10991 }
10992
10993 SDValue Mask =
10994 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10995 // Set the vector length to only the number of elements we care about. This
10996 // avoids sliding down elements we're going to discard straight away.
10997 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10998 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10999 SDValue Slidedown =
11000 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11001 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
11002 // Now we can use a cast-like subvector extract to get the result.
11003 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
11004 DAG.getVectorIdxConstant(0, DL));
11005 return DAG.getBitcast(Op.getValueType(), Slidedown);
11006 }
11007
11008 if (VecVT.isFixedLengthVector()) {
11009 VecVT = getContainerForFixedLengthVector(VecVT);
11010 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
11011 }
11012
11013 MVT ContainerSubVecVT = SubVecVT;
11014 if (SubVecVT.isFixedLengthVector())
11015 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11016
11017 unsigned SubRegIdx;
11018 ElementCount RemIdx;
11019 // extract_subvector scales the index by vscale if the subvector is scalable,
11020 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11021 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11022 if (SubVecVT.isFixedLengthVector()) {
11023 assert(VLen);
11024 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11025 auto Decompose =
11027 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11028 SubRegIdx = Decompose.first;
11029 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11030 (OrigIdx % Vscale));
11031 } else {
11032 auto Decompose =
11034 VecVT, ContainerSubVecVT, OrigIdx, TRI);
11035 SubRegIdx = Decompose.first;
11036 RemIdx = ElementCount::getScalable(Decompose.second);
11037 }
11038
11039 // If the Idx has been completely eliminated then this is a subvector extract
11040 // which naturally aligns to a vector register. These can easily be handled
11041 // using subregister manipulation. We use an extract_subvector that will
11042 // resolve to an extract subreg.
11043 if (RemIdx.isZero()) {
11044 if (SubVecVT.isFixedLengthVector()) {
11045 assert(VLen);
11046 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11047 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
11048 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
11049 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
11050 }
11051 return Op;
11052 }
11053
11054 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
11055 // was > M1 then the index would need to be a multiple of VLMAX, and so would
11056 // divide exactly.
11057 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
11058 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
11059
11060 // If the vector type is an LMUL-group type, extract a subvector equal to the
11061 // nearest full vector register type.
11062 MVT InterSubVT = VecVT;
11063 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
11064 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
11065 // we should have successfully decomposed the extract into a subregister.
11066 // We use an extract_subvector that will resolve to a subreg extract.
11067 assert(SubRegIdx != RISCV::NoSubRegister);
11068 (void)SubRegIdx;
11069 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
11070 if (SubVecVT.isFixedLengthVector()) {
11071 assert(VLen);
11072 Idx /= *VLen / RISCV::RVVBitsPerBlock;
11073 }
11074 InterSubVT = getLMUL1VT(VecVT);
11075 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
11076 DAG.getConstant(Idx, DL, XLenVT));
11077 }
11078
11079 // Slide this vector register down by the desired number of elements in order
11080 // to place the desired subvector starting at element 0.
11081 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11082 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
11083 if (SubVecVT.isFixedLengthVector())
11084 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11085 SDValue Slidedown =
11086 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
11087 Vec, SlidedownAmt, Mask, VL);
11088
11089 // Now the vector is in the right position, extract our final subvector. This
11090 // should resolve to a COPY.
11091 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
11092 DAG.getVectorIdxConstant(0, DL));
11093
11094 // We might have bitcast from a mask type: cast back to the original type if
11095 // required.
11096 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
11097}
11098
11099// Widen a vector's operands to i8, then truncate its results back to the
11100// original type, typically i1. All operand and result types must be the same.
11102 SelectionDAG &DAG) {
11103 MVT VT = N.getSimpleValueType();
11104 MVT WideVT = VT.changeVectorElementType(MVT::i8);
11106 for (SDValue Op : N->ops()) {
11107 assert(Op.getSimpleValueType() == VT &&
11108 "Operands and result must be same type");
11109 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
11110 }
11111
11112 unsigned NumVals = N->getNumValues();
11113
11115 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
11116 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
11117 SmallVector<SDValue, 4> TruncVals;
11118 for (unsigned I = 0; I < NumVals; I++) {
11119 TruncVals.push_back(
11120 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
11121 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
11122 }
11123
11124 if (TruncVals.size() > 1)
11125 return DAG.getMergeValues(TruncVals, DL);
11126 return TruncVals.front();
11127}
11128
11129SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
11130 SelectionDAG &DAG) const {
11131 SDLoc DL(Op);
11132 MVT VecVT = Op.getSimpleValueType();
11133
11134 const unsigned Factor = Op->getNumValues();
11135 assert(Factor <= 8);
11136
11137 // 1 bit element vectors need to be widened to e8
11138 if (VecVT.getVectorElementType() == MVT::i1)
11139 return widenVectorOpsToi8(Op, DL, DAG);
11140
11141 // Convert to scalable vectors first.
11142 if (VecVT.isFixedLengthVector()) {
11143 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
11144 SmallVector<SDValue, 8> Ops(Factor);
11145 for (unsigned i = 0U; i < Factor; ++i)
11146 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
11147 Subtarget);
11148
11149 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
11150 SDValue NewDeinterleave =
11151 DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs, Ops);
11152
11153 SmallVector<SDValue, 8> Res(Factor);
11154 for (unsigned i = 0U; i < Factor; ++i)
11155 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
11156 DAG, Subtarget);
11157 return DAG.getMergeValues(Res, DL);
11158 }
11159
11160 // If concatenating would exceed LMUL=8, we need to split.
11161 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
11162 (8 * RISCV::RVVBitsPerBlock)) {
11163 SmallVector<SDValue, 8> Ops(Factor * 2);
11164 for (unsigned i = 0; i != Factor; ++i) {
11165 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
11166 Ops[i * 2] = OpLo;
11167 Ops[i * 2 + 1] = OpHi;
11168 }
11169
11170 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
11171
11173 ArrayRef(Ops).slice(0, Factor));
11175 ArrayRef(Ops).slice(Factor, Factor));
11176
11177 SmallVector<SDValue, 8> Res(Factor);
11178 for (unsigned i = 0; i != Factor; ++i)
11179 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
11180 Hi.getValue(i));
11181
11182 return DAG.getMergeValues(Res, DL);
11183 }
11184
11185 SmallVector<SDValue, 8> Ops(Op->op_values());
11186
11187 // Concatenate the vectors as one vector to deinterleave
11188 MVT ConcatVT =
11191 PowerOf2Ceil(Factor)));
11192 if (Ops.size() < PowerOf2Ceil(Factor))
11193 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
11194 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
11195
11196 if (Factor == 2) {
11197 // We can deinterleave through vnsrl.wi if the element type is smaller than
11198 // ELEN
11199 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11200 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11201 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11202 return DAG.getMergeValues({Even, Odd}, DL);
11203 }
11204
11205 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11206 // possibly mask vector, then extract the required subvector. Doing this
11207 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11208 // creation to be rematerialized during register allocation to reduce
11209 // register pressure if needed.
11210
11211 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11212
11213 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11214 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11215 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT,
11216 EvenSplat, DAG.getVectorIdxConstant(0, DL));
11217
11218 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11219 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11220 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11221 DAG.getVectorIdxConstant(0, DL));
11222
11223 // vcompress the even and odd elements into two separate vectors
11224 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11225 EvenMask, DAG.getUNDEF(ConcatVT));
11226 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11227 OddMask, DAG.getUNDEF(ConcatVT));
11228
11229 // Extract the result half of the gather for even and odd
11230 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11231 DAG.getVectorIdxConstant(0, DL));
11232 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11233 DAG.getVectorIdxConstant(0, DL));
11234
11235 return DAG.getMergeValues({Even, Odd}, DL);
11236 }
11237
11238 // Store with unit-stride store and load it back with segmented load.
11239 MVT XLenVT = Subtarget.getXLenVT();
11240 SDValue VL = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget).second;
11241 SDValue Passthru = DAG.getUNDEF(ConcatVT);
11242
11243 // Allocate a stack slot.
11244 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
11246 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
11247 auto &MF = DAG.getMachineFunction();
11248 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11249 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11250
11251 SDValue StoreOps[] = {DAG.getEntryNode(),
11252 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
11253 Concat, StackPtr, VL};
11254
11255 SDValue Chain = DAG.getMemIntrinsicNode(
11256 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
11257 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
11259
11260 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
11261 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, Intrinsic::riscv_vlseg4,
11262 Intrinsic::riscv_vlseg5, Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
11263 Intrinsic::riscv_vlseg8};
11264
11265 SDValue LoadOps[] = {
11266 Chain,
11267 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
11268 Passthru,
11269 StackPtr,
11270 VL,
11271 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
11272
11273 unsigned Sz =
11274 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
11275 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
11276
11278 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
11279 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
11281
11282 SmallVector<SDValue, 8> Res(Factor);
11283
11284 for (unsigned i = 0U; i < Factor; ++i)
11285 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
11286 DAG.getVectorIdxConstant(i, DL));
11287
11288 return DAG.getMergeValues(Res, DL);
11289}
11290
11291SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11292 SelectionDAG &DAG) const {
11293 SDLoc DL(Op);
11294 MVT VecVT = Op.getSimpleValueType();
11295
11296 const unsigned Factor = Op.getNumOperands();
11297 assert(Factor <= 8);
11298
11299 // i1 vectors need to be widened to i8
11300 if (VecVT.getVectorElementType() == MVT::i1)
11301 return widenVectorOpsToi8(Op, DL, DAG);
11302
11303 // Convert to scalable vectors first.
11304 if (VecVT.isFixedLengthVector()) {
11305 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
11306 SmallVector<SDValue, 8> Ops(Factor);
11307 for (unsigned i = 0U; i < Factor; ++i)
11308 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
11309 Subtarget);
11310
11311 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
11312 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
11313
11314 SmallVector<SDValue, 8> Res(Factor);
11315 for (unsigned i = 0U; i < Factor; ++i)
11316 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
11317 Subtarget);
11318 return DAG.getMergeValues(Res, DL);
11319 }
11320
11321 MVT XLenVT = Subtarget.getXLenVT();
11322 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11323
11324 // If the VT is larger than LMUL=8, we need to split and reassemble.
11325 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
11326 (8 * RISCV::RVVBitsPerBlock)) {
11327 SmallVector<SDValue, 8> Ops(Factor * 2);
11328 for (unsigned i = 0; i != Factor; ++i) {
11329 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
11330 Ops[i] = OpLo;
11331 Ops[i + Factor] = OpHi;
11332 }
11333
11334 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
11335
11336 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
11337 ArrayRef(Ops).take_front(Factor)),
11339 ArrayRef(Ops).drop_front(Factor))};
11340
11341 SmallVector<SDValue, 8> Concats(Factor);
11342 for (unsigned i = 0; i != Factor; ++i) {
11343 unsigned IdxLo = 2 * i;
11344 unsigned IdxHi = 2 * i + 1;
11345 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11346 Res[IdxLo / Factor].getValue(IdxLo % Factor),
11347 Res[IdxHi / Factor].getValue(IdxHi % Factor));
11348 }
11349
11350 return DAG.getMergeValues(Concats, DL);
11351 }
11352
11353 SDValue Interleaved;
11354
11355 // Spill to the stack using a segment store for simplicity.
11356 if (Factor != 2) {
11357 EVT MemVT =
11359 VecVT.getVectorElementCount() * Factor);
11360
11361 // Allocate a stack slot.
11362 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
11364 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11365 EVT PtrVT = StackPtr.getValueType();
11366 auto &MF = DAG.getMachineFunction();
11367 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11368 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11369
11370 static const Intrinsic::ID IntrIds[] = {
11371 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
11372 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
11373 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
11374 Intrinsic::riscv_vsseg8,
11375 };
11376
11377 unsigned Sz =
11378 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
11379 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
11380
11381 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11382 for (unsigned i = 0; i < Factor; i++)
11383 StoredVal = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11384 Op.getOperand(i), DAG.getConstant(i, DL, XLenVT));
11385
11386 SDValue Ops[] = {DAG.getEntryNode(),
11387 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
11388 StoredVal,
11389 StackPtr,
11390 VL,
11392 DL, XLenVT)};
11393
11394 SDValue Chain = DAG.getMemIntrinsicNode(
11395 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11396 VecVT.getVectorElementType(), PtrInfo, Alignment,
11398
11399 SmallVector<SDValue, 8> Loads(Factor);
11400
11402 DAG.getVScale(DL, PtrVT,
11403 APInt(PtrVT.getFixedSizeInBits(),
11404 VecVT.getStoreSize().getKnownMinValue()));
11405 for (unsigned i = 0; i != Factor; ++i) {
11406 if (i != 0)
11407 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
11408
11409 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11410 }
11411
11412 return DAG.getMergeValues(Loads, DL);
11413 }
11414
11415 // If the element type is smaller than ELEN, then we can interleave with
11416 // vwaddu.vv and vwmaccu.vx
11417 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11418 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11419 DAG, Subtarget);
11420 } else {
11421 // Otherwise, fallback to using vrgathere16.vv
11422 MVT ConcatVT =
11425 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11426 Op.getOperand(0), Op.getOperand(1));
11427
11428 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11429
11430 // 0 1 2 3 4 5 6 7 ...
11431 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11432
11433 // 1 1 1 1 1 1 1 1 ...
11434 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11435
11436 // 1 0 1 0 1 0 1 0 ...
11437 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11438 OddMask = DAG.getSetCC(
11439 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11440 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11442
11443 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11444
11445 // Build up the index vector for interleaving the concatenated vector
11446 // 0 0 1 1 2 2 3 3 ...
11447 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11448 // 0 n 1 n+1 2 n+2 3 n+3 ...
11449 Idx =
11450 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11451
11452 // Then perform the interleave
11453 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11454 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11455 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11456 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11457 }
11458
11459 // Extract the two halves from the interleaved result
11460 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11461 DAG.getVectorIdxConstant(0, DL));
11462 SDValue Hi = DAG.getNode(
11463 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11465
11466 return DAG.getMergeValues({Lo, Hi}, DL);
11467}
11468
11469// Lower step_vector to the vid instruction. Any non-identity step value must
11470// be accounted for my manual expansion.
11471SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11472 SelectionDAG &DAG) const {
11473 SDLoc DL(Op);
11474 MVT VT = Op.getSimpleValueType();
11475 assert(VT.isScalableVector() && "Expected scalable vector");
11476 MVT XLenVT = Subtarget.getXLenVT();
11477 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11478 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11479 uint64_t StepValImm = Op.getConstantOperandVal(0);
11480 if (StepValImm != 1) {
11481 if (isPowerOf2_64(StepValImm)) {
11482 SDValue StepVal =
11483 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11484 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11485 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11486 } else {
11487 SDValue StepVal = lowerScalarSplat(
11488 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11489 VL, VT, DL, DAG, Subtarget);
11490 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11491 }
11492 }
11493 return StepVec;
11494}
11495
11496// Implement vector_reverse using vrgather.vv with indices determined by
11497// subtracting the id of each element from (VLMAX-1). This will convert
11498// the indices like so:
11499// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11500// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11501SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11502 SelectionDAG &DAG) const {
11503 SDLoc DL(Op);
11504 MVT VecVT = Op.getSimpleValueType();
11505 if (VecVT.getVectorElementType() == MVT::i1) {
11506 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11507 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11508 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11509 return DAG.getSetCC(DL, VecVT, Op2,
11510 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11511 }
11512
11513 MVT ContainerVT = VecVT;
11514 SDValue Vec = Op.getOperand(0);
11515 if (VecVT.isFixedLengthVector()) {
11516 ContainerVT = getContainerForFixedLengthVector(VecVT);
11517 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11518 }
11519
11520 MVT XLenVT = Subtarget.getXLenVT();
11521 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11522
11523 // On some uarchs vrgather.vv will read from every input register for each
11524 // output register, regardless of the indices. However to reverse a vector
11525 // each output register only needs to read from one register. So decompose it
11526 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11527 // O(LMUL^2).
11528 //
11529 // vsetvli a1, zero, e64, m4, ta, ma
11530 // vrgatherei16.vv v12, v8, v16
11531 // ->
11532 // vsetvli a1, zero, e64, m1, ta, ma
11533 // vrgather.vv v15, v8, v16
11534 // vrgather.vv v14, v9, v16
11535 // vrgather.vv v13, v10, v16
11536 // vrgather.vv v12, v11, v16
11537 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11538 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11539 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11540 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11541 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11542 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11543
11544 // Fixed length vectors might not fit exactly into their container, and so
11545 // leave a gap in the front of the vector after being reversed. Slide this
11546 // away.
11547 //
11548 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11549 // 0 1 2 3 x x x x <- reverse
11550 // x x x x 0 1 2 3 <- vslidedown.vx
11551 if (VecVT.isFixedLengthVector()) {
11552 SDValue Offset = DAG.getNode(
11553 ISD::SUB, DL, XLenVT,
11554 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11555 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11556 Concat =
11557 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11558 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11559 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11560 }
11561 return Concat;
11562 }
11563
11564 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11565 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11566 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11567 unsigned MaxVLMAX =
11568 VecVT.isFixedLengthVector()
11569 ? VecVT.getVectorNumElements()
11570 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11571
11572 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11573 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11574
11575 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11576 // to use vrgatherei16.vv.
11577 if (MaxVLMAX > 256 && EltSize == 8) {
11578 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11579 // Reverse each half, then reassemble them in reverse order.
11580 // NOTE: It's also possible that after splitting that VLMAX no longer
11581 // requires vrgatherei16.vv.
11582 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11583 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11584 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11585 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11586 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11587 // Reassemble the low and high pieces reversed.
11588 // FIXME: This is a CONCAT_VECTORS.
11589 SDValue Res =
11590 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11591 DAG.getVectorIdxConstant(0, DL));
11592 return DAG.getNode(
11593 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11594 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11595 }
11596
11597 // Just promote the int type to i16 which will double the LMUL.
11598 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11599 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11600 }
11601
11602 // At LMUL > 1, do the index computation in 16 bits to reduce register
11603 // pressure.
11604 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11605 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11606 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11607 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11608 IntVT = IntVT.changeVectorElementType(MVT::i16);
11609 }
11610
11611 // Calculate VLMAX-1 for the desired SEW.
11612 SDValue VLMinus1 = DAG.getNode(
11613 ISD::SUB, DL, XLenVT,
11614 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11615 DAG.getConstant(1, DL, XLenVT));
11616
11617 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11618 bool IsRV32E64 =
11619 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11620 SDValue SplatVL;
11621 if (!IsRV32E64)
11622 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11623 else
11624 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11625 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11626
11627 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11628 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11629 DAG.getUNDEF(IntVT), Mask, VL);
11630
11631 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11632 DAG.getUNDEF(ContainerVT), Mask, VL);
11633 if (VecVT.isFixedLengthVector())
11634 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11635 return Gather;
11636}
11637
11638SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11639 SelectionDAG &DAG) const {
11640 SDLoc DL(Op);
11641 SDValue V1 = Op.getOperand(0);
11642 SDValue V2 = Op.getOperand(1);
11643 MVT XLenVT = Subtarget.getXLenVT();
11644 MVT VecVT = Op.getSimpleValueType();
11645
11646 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11647
11648 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11649 SDValue DownOffset, UpOffset;
11650 if (ImmValue >= 0) {
11651 // The operand is a TargetConstant, we need to rebuild it as a regular
11652 // constant.
11653 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11654 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11655 } else {
11656 // The operand is a TargetConstant, we need to rebuild it as a regular
11657 // constant rather than negating the original operand.
11658 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11659 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11660 }
11661
11662 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11663
11664 SDValue SlideDown =
11665 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11666 DownOffset, TrueMask, UpOffset);
11667 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11668 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11670}
11671
11672SDValue
11673RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11674 SelectionDAG &DAG) const {
11675 SDLoc DL(Op);
11676 auto *Load = cast<LoadSDNode>(Op);
11677
11679 Load->getMemoryVT(),
11680 *Load->getMemOperand()) &&
11681 "Expecting a correctly-aligned load");
11682
11683 MVT VT = Op.getSimpleValueType();
11684 MVT XLenVT = Subtarget.getXLenVT();
11685 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11686
11687 // If we know the exact VLEN and our fixed length vector completely fills
11688 // the container, use a whole register load instead.
11689 const auto [MinVLMAX, MaxVLMAX] =
11690 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11691 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11692 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11693 MachineMemOperand *MMO = Load->getMemOperand();
11694 SDValue NewLoad =
11695 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11696 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11697 MMO->getAAInfo(), MMO->getRanges());
11698 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11699 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11700 }
11701
11702 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11703
11704 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11705 SDValue IntID = DAG.getTargetConstant(
11706 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11707 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11708 if (!IsMaskOp)
11709 Ops.push_back(DAG.getUNDEF(ContainerVT));
11710 Ops.push_back(Load->getBasePtr());
11711 Ops.push_back(VL);
11712 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11713 SDValue NewLoad =
11715 Load->getMemoryVT(), Load->getMemOperand());
11716
11717 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11718 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11719}
11720
11721SDValue
11722RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11723 SelectionDAG &DAG) const {
11724 SDLoc DL(Op);
11725 auto *Store = cast<StoreSDNode>(Op);
11726
11728 Store->getMemoryVT(),
11729 *Store->getMemOperand()) &&
11730 "Expecting a correctly-aligned store");
11731
11732 SDValue StoreVal = Store->getValue();
11733 MVT VT = StoreVal.getSimpleValueType();
11734 MVT XLenVT = Subtarget.getXLenVT();
11735
11736 // If the size less than a byte, we need to pad with zeros to make a byte.
11737 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11738 VT = MVT::v8i1;
11739 StoreVal =
11740 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11741 StoreVal, DAG.getVectorIdxConstant(0, DL));
11742 }
11743
11744 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11745
11746 SDValue NewValue =
11747 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11748
11749 // If we know the exact VLEN and our fixed length vector completely fills
11750 // the container, use a whole register store instead.
11751 const auto [MinVLMAX, MaxVLMAX] =
11752 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11753 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11754 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11755 MachineMemOperand *MMO = Store->getMemOperand();
11756 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11757 MMO->getPointerInfo(), MMO->getBaseAlign(),
11758 MMO->getFlags(), MMO->getAAInfo());
11759 }
11760
11761 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11762
11763 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11764 SDValue IntID = DAG.getTargetConstant(
11765 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11766 return DAG.getMemIntrinsicNode(
11767 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11768 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11769 Store->getMemoryVT(), Store->getMemOperand());
11770}
11771
11772SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11773 SelectionDAG &DAG) const {
11774 SDLoc DL(Op);
11775 MVT VT = Op.getSimpleValueType();
11776
11777 const auto *MemSD = cast<MemSDNode>(Op);
11778 EVT MemVT = MemSD->getMemoryVT();
11779 MachineMemOperand *MMO = MemSD->getMemOperand();
11780 SDValue Chain = MemSD->getChain();
11781 SDValue BasePtr = MemSD->getBasePtr();
11782
11783 SDValue Mask, PassThru, VL;
11784 bool IsExpandingLoad = false;
11785 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11786 Mask = VPLoad->getMask();
11787 PassThru = DAG.getUNDEF(VT);
11788 VL = VPLoad->getVectorLength();
11789 } else {
11790 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11791 Mask = MLoad->getMask();
11792 PassThru = MLoad->getPassThru();
11793 IsExpandingLoad = MLoad->isExpandingLoad();
11794 }
11795
11796 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11797
11798 MVT XLenVT = Subtarget.getXLenVT();
11799
11800 MVT ContainerVT = VT;
11801 if (VT.isFixedLengthVector()) {
11802 ContainerVT = getContainerForFixedLengthVector(VT);
11803 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11804 if (!IsUnmasked) {
11805 MVT MaskVT = getMaskTypeFor(ContainerVT);
11806 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11807 }
11808 }
11809
11810 if (!VL)
11811 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11812
11813 SDValue ExpandingVL;
11814 if (!IsUnmasked && IsExpandingLoad) {
11815 ExpandingVL = VL;
11816 VL =
11817 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11818 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11819 }
11820
11821 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11822 : Intrinsic::riscv_vle_mask;
11823 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11824 if (IntID == Intrinsic::riscv_vle)
11825 Ops.push_back(DAG.getUNDEF(ContainerVT));
11826 else
11827 Ops.push_back(PassThru);
11828 Ops.push_back(BasePtr);
11829 if (IntID == Intrinsic::riscv_vle_mask)
11830 Ops.push_back(Mask);
11831 Ops.push_back(VL);
11832 if (IntID == Intrinsic::riscv_vle_mask)
11833 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11834
11835 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11836
11837 SDValue Result =
11838 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11839 Chain = Result.getValue(1);
11840 if (ExpandingVL) {
11841 MVT IndexVT = ContainerVT;
11842 if (ContainerVT.isFloatingPoint())
11843 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11844
11845 MVT IndexEltVT = IndexVT.getVectorElementType();
11846 bool UseVRGATHEREI16 = false;
11847 // If index vector is an i8 vector and the element count exceeds 256, we
11848 // should change the element type of index vector to i16 to avoid
11849 // overflow.
11850 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11851 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11852 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11853 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11854 UseVRGATHEREI16 = true;
11855 }
11856
11857 SDValue Iota =
11858 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11859 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11860 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11861 Result =
11862 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11864 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11865 }
11866
11867 if (VT.isFixedLengthVector())
11868 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11869
11870 return DAG.getMergeValues({Result, Chain}, DL);
11871}
11872
11873SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11874 SelectionDAG &DAG) const {
11875 SDLoc DL(Op);
11876
11877 const auto *MemSD = cast<MemSDNode>(Op);
11878 EVT MemVT = MemSD->getMemoryVT();
11879 MachineMemOperand *MMO = MemSD->getMemOperand();
11880 SDValue Chain = MemSD->getChain();
11881 SDValue BasePtr = MemSD->getBasePtr();
11882 SDValue Val, Mask, VL;
11883
11884 bool IsCompressingStore = false;
11885 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11886 Val = VPStore->getValue();
11887 Mask = VPStore->getMask();
11888 VL = VPStore->getVectorLength();
11889 } else {
11890 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11891 Val = MStore->getValue();
11892 Mask = MStore->getMask();
11893 IsCompressingStore = MStore->isCompressingStore();
11894 }
11895
11896 bool IsUnmasked =
11897 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11898
11899 MVT VT = Val.getSimpleValueType();
11900 MVT XLenVT = Subtarget.getXLenVT();
11901
11902 MVT ContainerVT = VT;
11903 if (VT.isFixedLengthVector()) {
11904 ContainerVT = getContainerForFixedLengthVector(VT);
11905
11906 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11907 if (!IsUnmasked || IsCompressingStore) {
11908 MVT MaskVT = getMaskTypeFor(ContainerVT);
11909 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11910 }
11911 }
11912
11913 if (!VL)
11914 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11915
11916 if (IsCompressingStore) {
11917 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11918 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11919 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11920 VL =
11921 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11922 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11923 }
11924
11925 unsigned IntID =
11926 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11927 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11928 Ops.push_back(Val);
11929 Ops.push_back(BasePtr);
11930 if (!IsUnmasked)
11931 Ops.push_back(Mask);
11932 Ops.push_back(VL);
11933
11935 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11936}
11937
11938SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11939 SelectionDAG &DAG) const {
11940 SDLoc DL(Op);
11941 SDValue Val = Op.getOperand(0);
11942 SDValue Mask = Op.getOperand(1);
11943 SDValue Passthru = Op.getOperand(2);
11944
11945 MVT VT = Val.getSimpleValueType();
11946 MVT XLenVT = Subtarget.getXLenVT();
11947 MVT ContainerVT = VT;
11948 if (VT.isFixedLengthVector()) {
11949 ContainerVT = getContainerForFixedLengthVector(VT);
11950 MVT MaskVT = getMaskTypeFor(ContainerVT);
11951 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11952 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11953 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11954 }
11955
11956 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11957 SDValue Res =
11958 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11959 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11960 Passthru, Val, Mask, VL);
11961
11962 if (VT.isFixedLengthVector())
11963 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11964
11965 return Res;
11966}
11967
11968SDValue
11969RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11970 SelectionDAG &DAG) const {
11971 MVT InVT = Op.getOperand(0).getSimpleValueType();
11972 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11973
11974 MVT VT = Op.getSimpleValueType();
11975
11976 SDValue Op1 =
11977 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11978 SDValue Op2 =
11979 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11980
11981 SDLoc DL(Op);
11982 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11983 DAG, Subtarget);
11984 MVT MaskVT = getMaskTypeFor(ContainerVT);
11985
11986 SDValue Cmp =
11987 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11988 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11989
11990 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11991}
11992
11993SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11994 SelectionDAG &DAG) const {
11995 unsigned Opc = Op.getOpcode();
11996 SDLoc DL(Op);
11997 SDValue Chain = Op.getOperand(0);
11998 SDValue Op1 = Op.getOperand(1);
11999 SDValue Op2 = Op.getOperand(2);
12000 SDValue CC = Op.getOperand(3);
12001 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12002 MVT VT = Op.getSimpleValueType();
12003 MVT InVT = Op1.getSimpleValueType();
12004
12005 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
12006 // condition code.
12007 if (Opc == ISD::STRICT_FSETCCS) {
12008 // Expand strict_fsetccs(x, oeq) to
12009 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
12010 SDVTList VTList = Op->getVTList();
12011 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
12012 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
12013 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
12014 Op2, OLECCVal);
12015 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
12016 Op1, OLECCVal);
12017 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12018 Tmp1.getValue(1), Tmp2.getValue(1));
12019 // Tmp1 and Tmp2 might be the same node.
12020 if (Tmp1 != Tmp2)
12021 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
12022 return DAG.getMergeValues({Tmp1, OutChain}, DL);
12023 }
12024
12025 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
12026 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
12027 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
12028 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
12029 Op2, OEQCCVal);
12030 SDValue Res = DAG.getNOT(DL, OEQ, VT);
12031 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
12032 }
12033 }
12034
12035 MVT ContainerInVT = InVT;
12036 if (InVT.isFixedLengthVector()) {
12037 ContainerInVT = getContainerForFixedLengthVector(InVT);
12038 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
12039 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
12040 }
12041 MVT MaskVT = getMaskTypeFor(ContainerInVT);
12042
12043 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
12044
12045 SDValue Res;
12046 if (Opc == ISD::STRICT_FSETCC &&
12047 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
12048 CCVal == ISD::SETOLE)) {
12049 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
12050 // active when both input elements are ordered.
12051 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
12052 SDValue OrderMask1 = DAG.getNode(
12053 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
12054 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
12055 True, VL});
12056 SDValue OrderMask2 = DAG.getNode(
12057 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
12058 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
12059 True, VL});
12060 Mask =
12061 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
12062 // Use Mask as the passthru operand to let the result be 0 if either of the
12063 // inputs is unordered.
12065 DAG.getVTList(MaskVT, MVT::Other),
12066 {Chain, Op1, Op2, CC, Mask, Mask, VL});
12067 } else {
12068 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
12070 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
12071 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
12072 }
12073
12074 if (VT.isFixedLengthVector()) {
12075 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
12076 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
12077 }
12078 return Res;
12079}
12080
12081// Lower vector ABS to smax(X, sub(0, X)).
12082SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
12083 SDLoc DL(Op);
12084 MVT VT = Op.getSimpleValueType();
12085 SDValue X = Op.getOperand(0);
12086
12087 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
12088 "Unexpected type for ISD::ABS");
12089
12090 MVT ContainerVT = VT;
12091 if (VT.isFixedLengthVector()) {
12092 ContainerVT = getContainerForFixedLengthVector(VT);
12093 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
12094 }
12095
12096 SDValue Mask, VL;
12097 if (Op->getOpcode() == ISD::VP_ABS) {
12098 Mask = Op->getOperand(1);
12099 if (VT.isFixedLengthVector())
12100 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
12101 Subtarget);
12102 VL = Op->getOperand(2);
12103 } else
12104 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
12105
12106 SDValue SplatZero = DAG.getNode(
12107 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12108 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
12109 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
12110 DAG.getUNDEF(ContainerVT), Mask, VL);
12111 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
12112 DAG.getUNDEF(ContainerVT), Mask, VL);
12113
12114 if (VT.isFixedLengthVector())
12115 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
12116 return Max;
12117}
12118
12119SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
12120 SDValue Op, SelectionDAG &DAG) const {
12121 SDLoc DL(Op);
12122 MVT VT = Op.getSimpleValueType();
12123 SDValue Mag = Op.getOperand(0);
12124 SDValue Sign = Op.getOperand(1);
12125 assert(Mag.getValueType() == Sign.getValueType() &&
12126 "Can only handle COPYSIGN with matching types.");
12127
12128 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12129 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
12130 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
12131
12132 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
12133
12134 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
12135 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
12136
12137 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
12138}
12139
12140SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
12141 SDValue Op, SelectionDAG &DAG) const {
12142 MVT VT = Op.getSimpleValueType();
12143 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12144
12145 MVT I1ContainerVT =
12146 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
12147
12148 SDValue CC =
12149 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
12150 SDValue Op1 =
12151 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
12152 SDValue Op2 =
12153 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
12154
12155 SDLoc DL(Op);
12156 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12157
12158 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
12159 Op2, DAG.getUNDEF(ContainerVT), VL);
12160
12161 return convertFromScalableVector(VT, Select, DAG, Subtarget);
12162}
12163
12164SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
12165 SelectionDAG &DAG) const {
12166 unsigned NewOpc = getRISCVVLOp(Op);
12167 bool HasPassthruOp = hasPassthruOp(NewOpc);
12168 bool HasMask = hasMaskOp(NewOpc);
12169
12170 MVT VT = Op.getSimpleValueType();
12171 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12172
12173 // Create list of operands by converting existing ones to scalable types.
12175 for (const SDValue &V : Op->op_values()) {
12176 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
12177
12178 // Pass through non-vector operands.
12179 if (!V.getValueType().isVector()) {
12180 Ops.push_back(V);
12181 continue;
12182 }
12183
12184 // "cast" fixed length vector to a scalable vector.
12185 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
12186 "Only fixed length vectors are supported!");
12187 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
12188 }
12189
12190 SDLoc DL(Op);
12191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
12192 if (HasPassthruOp)
12193 Ops.push_back(DAG.getUNDEF(ContainerVT));
12194 if (HasMask)
12195 Ops.push_back(Mask);
12196 Ops.push_back(VL);
12197
12198 // StrictFP operations have two result values. Their lowered result should
12199 // have same result count.
12200 if (Op->isStrictFPOpcode()) {
12201 SDValue ScalableRes =
12202 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
12203 Op->getFlags());
12204 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
12205 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
12206 }
12207
12208 SDValue ScalableRes =
12209 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
12210 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
12211}
12212
12213// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
12214// * Operands of each node are assumed to be in the same order.
12215// * The EVL operand is promoted from i32 to i64 on RV64.
12216// * Fixed-length vectors are converted to their scalable-vector container
12217// types.
12218SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
12219 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12220 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
12221
12222 SDLoc DL(Op);
12223 MVT VT = Op.getSimpleValueType();
12225
12226 MVT ContainerVT = VT;
12227 if (VT.isFixedLengthVector())
12228 ContainerVT = getContainerForFixedLengthVector(VT);
12229
12230 for (const auto &OpIdx : enumerate(Op->ops())) {
12231 SDValue V = OpIdx.value();
12232 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
12233 // Add dummy passthru value before the mask. Or if there isn't a mask,
12234 // before EVL.
12235 if (HasPassthruOp) {
12236 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
12237 if (MaskIdx) {
12238 if (*MaskIdx == OpIdx.index())
12239 Ops.push_back(DAG.getUNDEF(ContainerVT));
12240 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
12241 OpIdx.index()) {
12242 if (Op.getOpcode() == ISD::VP_MERGE) {
12243 // For VP_MERGE, copy the false operand instead of an undef value.
12244 Ops.push_back(Ops.back());
12245 } else {
12246 assert(Op.getOpcode() == ISD::VP_SELECT);
12247 // For VP_SELECT, add an undef value.
12248 Ops.push_back(DAG.getUNDEF(ContainerVT));
12249 }
12250 }
12251 }
12252 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
12253 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
12254 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
12256 Subtarget.getXLenVT()));
12257 // Pass through operands which aren't fixed-length vectors.
12258 if (!V.getValueType().isFixedLengthVector()) {
12259 Ops.push_back(V);
12260 continue;
12261 }
12262 // "cast" fixed length vector to a scalable vector.
12263 MVT OpVT = V.getSimpleValueType();
12264 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
12265 assert(useRVVForFixedLengthVectorVT(OpVT) &&
12266 "Only fixed length vectors are supported!");
12267 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
12268 }
12269
12270 if (!VT.isFixedLengthVector())
12271 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
12272
12273 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
12274
12275 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
12276}
12277
12278SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
12279 SelectionDAG &DAG) const {
12280 SDLoc DL(Op);
12281 MVT VT = Op.getSimpleValueType();
12282
12283 SDValue Src = Op.getOperand(0);
12284 // NOTE: Mask is dropped.
12285 SDValue VL = Op.getOperand(2);
12286
12287 MVT ContainerVT = VT;
12288 if (VT.isFixedLengthVector()) {
12289 ContainerVT = getContainerForFixedLengthVector(VT);
12290 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
12291 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12292 }
12293
12294 MVT XLenVT = Subtarget.getXLenVT();
12295 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12296 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12297 DAG.getUNDEF(ContainerVT), Zero, VL);
12298
12299 SDValue SplatValue = DAG.getSignedConstant(
12300 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
12301 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12302 DAG.getUNDEF(ContainerVT), SplatValue, VL);
12303
12304 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
12305 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
12306 if (!VT.isFixedLengthVector())
12307 return Result;
12308 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12309}
12310
12311SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
12312 SelectionDAG &DAG) const {
12313 SDLoc DL(Op);
12314 MVT VT = Op.getSimpleValueType();
12315
12316 SDValue Op1 = Op.getOperand(0);
12317 SDValue Op2 = Op.getOperand(1);
12318 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
12319 // NOTE: Mask is dropped.
12320 SDValue VL = Op.getOperand(4);
12321
12322 MVT ContainerVT = VT;
12323 if (VT.isFixedLengthVector()) {
12324 ContainerVT = getContainerForFixedLengthVector(VT);
12325 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12326 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12327 }
12328
12330 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12331
12332 switch (Condition) {
12333 default:
12334 break;
12335 // X != Y --> (X^Y)
12336 case ISD::SETNE:
12337 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12338 break;
12339 // X == Y --> ~(X^Y)
12340 case ISD::SETEQ: {
12341 SDValue Temp =
12342 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12343 Result =
12344 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12345 break;
12346 }
12347 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12348 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12349 case ISD::SETGT:
12350 case ISD::SETULT: {
12351 SDValue Temp =
12352 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12353 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12354 break;
12355 }
12356 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12357 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12358 case ISD::SETLT:
12359 case ISD::SETUGT: {
12360 SDValue Temp =
12361 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12362 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12363 break;
12364 }
12365 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12366 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12367 case ISD::SETGE:
12368 case ISD::SETULE: {
12369 SDValue Temp =
12370 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12371 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12372 break;
12373 }
12374 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12375 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12376 case ISD::SETLE:
12377 case ISD::SETUGE: {
12378 SDValue Temp =
12379 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12380 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12381 break;
12382 }
12383 }
12384
12385 if (!VT.isFixedLengthVector())
12386 return Result;
12387 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12388}
12389
12390// Lower Floating-Point/Integer Type-Convert VP SDNodes
12391SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12392 SelectionDAG &DAG) const {
12393 SDLoc DL(Op);
12394
12395 SDValue Src = Op.getOperand(0);
12396 SDValue Mask = Op.getOperand(1);
12397 SDValue VL = Op.getOperand(2);
12398 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12399
12400 MVT DstVT = Op.getSimpleValueType();
12401 MVT SrcVT = Src.getSimpleValueType();
12402 if (DstVT.isFixedLengthVector()) {
12403 DstVT = getContainerForFixedLengthVector(DstVT);
12404 SrcVT = getContainerForFixedLengthVector(SrcVT);
12405 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12406 MVT MaskVT = getMaskTypeFor(DstVT);
12407 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12408 }
12409
12410 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12411 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12412
12414 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12415 if (SrcVT.isInteger()) {
12416 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12417
12418 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12421
12422 // Do we need to do any pre-widening before converting?
12423 if (SrcEltSize == 1) {
12424 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12425 MVT XLenVT = Subtarget.getXLenVT();
12426 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12427 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12428 DAG.getUNDEF(IntVT), Zero, VL);
12429 SDValue One = DAG.getSignedConstant(
12430 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12431 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12432 DAG.getUNDEF(IntVT), One, VL);
12433 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12434 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12435 } else if (DstEltSize > (2 * SrcEltSize)) {
12436 // Widen before converting.
12437 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12438 DstVT.getVectorElementCount());
12439 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12440 }
12441
12442 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12443 } else {
12444 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12445 "Wrong input/output vector types");
12446
12447 // Convert f16 to f32 then convert f32 to i64.
12448 if (DstEltSize > (2 * SrcEltSize)) {
12449 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12450 MVT InterimFVT =
12451 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12452 Src =
12453 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12454 }
12455
12456 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12457 }
12458 } else { // Narrowing + Conversion
12459 if (SrcVT.isInteger()) {
12460 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12461 // First do a narrowing convert to an FP type half the size, then round
12462 // the FP type to a small FP type if needed.
12463
12464 MVT InterimFVT = DstVT;
12465 if (SrcEltSize > (2 * DstEltSize)) {
12466 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12467 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12468 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12469 }
12470
12471 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12472
12473 if (InterimFVT != DstVT) {
12474 Src = Result;
12475 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12476 }
12477 } else {
12478 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12479 "Wrong input/output vector types");
12480 // First do a narrowing conversion to an integer half the size, then
12481 // truncate if needed.
12482
12483 if (DstEltSize == 1) {
12484 // First convert to the same size integer, then convert to mask using
12485 // setcc.
12486 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12487 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12488 DstVT.getVectorElementCount());
12489 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12490
12491 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12492 // otherwise the conversion was undefined.
12493 MVT XLenVT = Subtarget.getXLenVT();
12494 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12495 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12496 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12497 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12498 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12499 DAG.getUNDEF(DstVT), Mask, VL});
12500 } else {
12501 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12502 DstVT.getVectorElementCount());
12503
12504 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12505
12506 while (InterimIVT != DstVT) {
12507 SrcEltSize /= 2;
12508 Src = Result;
12509 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12510 DstVT.getVectorElementCount());
12511 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12512 Src, Mask, VL);
12513 }
12514 }
12515 }
12516 }
12517
12518 MVT VT = Op.getSimpleValueType();
12519 if (!VT.isFixedLengthVector())
12520 return Result;
12521 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12522}
12523
12524SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12525 SelectionDAG &DAG) const {
12526 SDLoc DL(Op);
12527 MVT VT = Op.getSimpleValueType();
12528 MVT XLenVT = Subtarget.getXLenVT();
12529
12530 SDValue Mask = Op.getOperand(0);
12531 SDValue TrueVal = Op.getOperand(1);
12532 SDValue FalseVal = Op.getOperand(2);
12533 SDValue VL = Op.getOperand(3);
12534
12535 // Use default legalization if a vector of EVL type would be legal.
12536 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12538 if (isTypeLegal(EVLVecVT))
12539 return SDValue();
12540
12541 MVT ContainerVT = VT;
12542 if (VT.isFixedLengthVector()) {
12543 ContainerVT = getContainerForFixedLengthVector(VT);
12544 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12545 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12546 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12547 }
12548
12549 // Promote to a vector of i8.
12550 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12551
12552 // Promote TrueVal and FalseVal using VLMax.
12553 // FIXME: Is there a better way to do this?
12554 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12555 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12556 DAG.getUNDEF(PromotedVT),
12557 DAG.getConstant(1, DL, XLenVT), VLMax);
12558 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12559 DAG.getUNDEF(PromotedVT),
12560 DAG.getConstant(0, DL, XLenVT), VLMax);
12561 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12562 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12563 // Any element past VL uses FalseVal, so use VLMax
12564 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12565 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12566
12567 // VP_MERGE the two promoted values.
12568 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12569 TrueVal, FalseVal, FalseVal, VL);
12570
12571 // Convert back to mask.
12572 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12573 SDValue Result = DAG.getNode(
12574 RISCVISD::SETCC_VL, DL, ContainerVT,
12575 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12576 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12577
12578 if (VT.isFixedLengthVector())
12579 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12580 return Result;
12581}
12582
12583SDValue
12584RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12585 SelectionDAG &DAG) const {
12586 SDLoc DL(Op);
12587
12588 SDValue Op1 = Op.getOperand(0);
12589 SDValue Op2 = Op.getOperand(1);
12590 SDValue Offset = Op.getOperand(2);
12591 SDValue Mask = Op.getOperand(3);
12592 SDValue EVL1 = Op.getOperand(4);
12593 SDValue EVL2 = Op.getOperand(5);
12594
12595 const MVT XLenVT = Subtarget.getXLenVT();
12596 MVT VT = Op.getSimpleValueType();
12597 MVT ContainerVT = VT;
12598 if (VT.isFixedLengthVector()) {
12599 ContainerVT = getContainerForFixedLengthVector(VT);
12600 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12601 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12602 MVT MaskVT = getMaskTypeFor(ContainerVT);
12603 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12604 }
12605
12606 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12607 if (IsMaskVector) {
12608 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12609
12610 // Expand input operands
12611 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12612 DAG.getUNDEF(ContainerVT),
12613 DAG.getConstant(1, DL, XLenVT), EVL1);
12614 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12615 DAG.getUNDEF(ContainerVT),
12616 DAG.getConstant(0, DL, XLenVT), EVL1);
12617 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12618 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12619
12620 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12621 DAG.getUNDEF(ContainerVT),
12622 DAG.getConstant(1, DL, XLenVT), EVL2);
12623 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12624 DAG.getUNDEF(ContainerVT),
12625 DAG.getConstant(0, DL, XLenVT), EVL2);
12626 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12627 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12628 }
12629
12630 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12631 SDValue DownOffset, UpOffset;
12632 if (ImmValue >= 0) {
12633 // The operand is a TargetConstant, we need to rebuild it as a regular
12634 // constant.
12635 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12636 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12637 } else {
12638 // The operand is a TargetConstant, we need to rebuild it as a regular
12639 // constant rather than negating the original operand.
12640 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12641 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12642 }
12643
12644 SDValue SlideDown =
12645 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12646 Op1, DownOffset, Mask, UpOffset);
12647 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12648 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12649
12650 if (IsMaskVector) {
12651 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12652 Result = DAG.getNode(
12653 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12654 {Result, DAG.getConstant(0, DL, ContainerVT),
12655 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12656 Mask, EVL2});
12657 }
12658
12659 if (!VT.isFixedLengthVector())
12660 return Result;
12661 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12662}
12663
12664SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12665 SelectionDAG &DAG) const {
12666 SDLoc DL(Op);
12667 SDValue Val = Op.getOperand(0);
12668 SDValue Mask = Op.getOperand(1);
12669 SDValue VL = Op.getOperand(2);
12670 MVT VT = Op.getSimpleValueType();
12671
12672 MVT ContainerVT = VT;
12673 if (VT.isFixedLengthVector()) {
12674 ContainerVT = getContainerForFixedLengthVector(VT);
12675 MVT MaskVT = getMaskTypeFor(ContainerVT);
12676 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12677 }
12678
12679 SDValue Result =
12680 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12681
12682 if (!VT.isFixedLengthVector())
12683 return Result;
12684 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12685}
12686
12687SDValue
12688RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12689 SelectionDAG &DAG) const {
12690 SDLoc DL(Op);
12691 MVT VT = Op.getSimpleValueType();
12692 MVT XLenVT = Subtarget.getXLenVT();
12693
12694 SDValue Op1 = Op.getOperand(0);
12695 SDValue Mask = Op.getOperand(1);
12696 SDValue EVL = Op.getOperand(2);
12697
12698 MVT ContainerVT = VT;
12699 if (VT.isFixedLengthVector()) {
12700 ContainerVT = getContainerForFixedLengthVector(VT);
12701 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12702 MVT MaskVT = getMaskTypeFor(ContainerVT);
12703 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12704 }
12705
12706 MVT GatherVT = ContainerVT;
12707 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12708 // Check if we are working with mask vectors
12709 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12710 if (IsMaskVector) {
12711 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12712
12713 // Expand input operand
12714 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12715 DAG.getUNDEF(IndicesVT),
12716 DAG.getConstant(1, DL, XLenVT), EVL);
12717 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12718 DAG.getUNDEF(IndicesVT),
12719 DAG.getConstant(0, DL, XLenVT), EVL);
12720 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12721 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12722 }
12723
12724 unsigned EltSize = GatherVT.getScalarSizeInBits();
12725 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12726 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12727 unsigned MaxVLMAX =
12728 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12729
12730 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12731 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12732 // to use vrgatherei16.vv.
12733 // TODO: It's also possible to use vrgatherei16.vv for other types to
12734 // decrease register width for the index calculation.
12735 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12736 if (MaxVLMAX > 256 && EltSize == 8) {
12737 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12738 // Split the vector in half and reverse each half using a full register
12739 // reverse.
12740 // Swap the halves and concatenate them.
12741 // Slide the concatenated result by (VLMax - VL).
12742 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12743 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12744 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12745
12746 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12747 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12748
12749 // Reassemble the low and high pieces reversed.
12750 // NOTE: this Result is unmasked (because we do not need masks for
12751 // shuffles). If in the future this has to change, we can use a SELECT_VL
12752 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12753 SDValue Result =
12754 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12755
12756 // Slide off any elements from past EVL that were reversed into the low
12757 // elements.
12758 unsigned MinElts = GatherVT.getVectorMinNumElements();
12759 SDValue VLMax =
12760 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12761 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12762
12763 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12764 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12765
12766 if (IsMaskVector) {
12767 // Truncate Result back to a mask vector
12768 Result =
12769 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12770 {Result, DAG.getConstant(0, DL, GatherVT),
12772 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12773 }
12774
12775 if (!VT.isFixedLengthVector())
12776 return Result;
12777 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12778 }
12779
12780 // Just promote the int type to i16 which will double the LMUL.
12781 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12782 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12783 }
12784
12785 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12786 SDValue VecLen =
12787 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12788 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12789 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12790 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12791 DAG.getUNDEF(IndicesVT), Mask, EVL);
12792 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12793 DAG.getUNDEF(GatherVT), Mask, EVL);
12794
12795 if (IsMaskVector) {
12796 // Truncate Result back to a mask vector
12797 Result = DAG.getNode(
12798 RISCVISD::SETCC_VL, DL, ContainerVT,
12799 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12800 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12801 }
12802
12803 if (!VT.isFixedLengthVector())
12804 return Result;
12805 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12806}
12807
12808SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12809 SelectionDAG &DAG) const {
12810 MVT VT = Op.getSimpleValueType();
12811 if (VT.getVectorElementType() != MVT::i1)
12812 return lowerVPOp(Op, DAG);
12813
12814 // It is safe to drop mask parameter as masked-off elements are undef.
12815 SDValue Op1 = Op->getOperand(0);
12816 SDValue Op2 = Op->getOperand(1);
12817 SDValue VL = Op->getOperand(3);
12818
12819 MVT ContainerVT = VT;
12820 const bool IsFixed = VT.isFixedLengthVector();
12821 if (IsFixed) {
12822 ContainerVT = getContainerForFixedLengthVector(VT);
12823 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12824 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12825 }
12826
12827 SDLoc DL(Op);
12828 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12829 if (!IsFixed)
12830 return Val;
12831 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12832}
12833
12834SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12835 SelectionDAG &DAG) const {
12836 SDLoc DL(Op);
12837 MVT XLenVT = Subtarget.getXLenVT();
12838 MVT VT = Op.getSimpleValueType();
12839 MVT ContainerVT = VT;
12840 if (VT.isFixedLengthVector())
12841 ContainerVT = getContainerForFixedLengthVector(VT);
12842
12843 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12844
12845 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12846 // Check if the mask is known to be all ones
12847 SDValue Mask = VPNode->getMask();
12848 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12849
12850 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12851 : Intrinsic::riscv_vlse_mask,
12852 DL, XLenVT);
12853 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12854 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12855 VPNode->getStride()};
12856 if (!IsUnmasked) {
12857 if (VT.isFixedLengthVector()) {
12858 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12859 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12860 }
12861 Ops.push_back(Mask);
12862 }
12863 Ops.push_back(VPNode->getVectorLength());
12864 if (!IsUnmasked) {
12865 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12866 Ops.push_back(Policy);
12867 }
12868
12869 SDValue Result =
12871 VPNode->getMemoryVT(), VPNode->getMemOperand());
12872 SDValue Chain = Result.getValue(1);
12873
12874 if (VT.isFixedLengthVector())
12875 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12876
12877 return DAG.getMergeValues({Result, Chain}, DL);
12878}
12879
12880SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12881 SelectionDAG &DAG) const {
12882 SDLoc DL(Op);
12883 MVT XLenVT = Subtarget.getXLenVT();
12884
12885 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12886 SDValue StoreVal = VPNode->getValue();
12887 MVT VT = StoreVal.getSimpleValueType();
12888 MVT ContainerVT = VT;
12889 if (VT.isFixedLengthVector()) {
12890 ContainerVT = getContainerForFixedLengthVector(VT);
12891 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12892 }
12893
12894 // Check if the mask is known to be all ones
12895 SDValue Mask = VPNode->getMask();
12896 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12897
12898 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12899 : Intrinsic::riscv_vsse_mask,
12900 DL, XLenVT);
12901 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12902 VPNode->getBasePtr(), VPNode->getStride()};
12903 if (!IsUnmasked) {
12904 if (VT.isFixedLengthVector()) {
12905 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12906 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12907 }
12908 Ops.push_back(Mask);
12909 }
12910 Ops.push_back(VPNode->getVectorLength());
12911
12912 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12913 Ops, VPNode->getMemoryVT(),
12914 VPNode->getMemOperand());
12915}
12916
12917// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12918// matched to a RVV indexed load. The RVV indexed load instructions only
12919// support the "unsigned unscaled" addressing mode; indices are implicitly
12920// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12921// signed or scaled indexing is extended to the XLEN value type and scaled
12922// accordingly.
12923SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12924 SelectionDAG &DAG) const {
12925 SDLoc DL(Op);
12926 MVT VT = Op.getSimpleValueType();
12927
12928 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12929 EVT MemVT = MemSD->getMemoryVT();
12930 MachineMemOperand *MMO = MemSD->getMemOperand();
12931 SDValue Chain = MemSD->getChain();
12932 SDValue BasePtr = MemSD->getBasePtr();
12933
12934 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12935 SDValue Index, Mask, PassThru, VL;
12936
12937 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12938 Index = VPGN->getIndex();
12939 Mask = VPGN->getMask();
12940 PassThru = DAG.getUNDEF(VT);
12941 VL = VPGN->getVectorLength();
12942 // VP doesn't support extending loads.
12944 } else {
12945 // Else it must be a MGATHER.
12946 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12947 Index = MGN->getIndex();
12948 Mask = MGN->getMask();
12949 PassThru = MGN->getPassThru();
12950 LoadExtType = MGN->getExtensionType();
12951 }
12952
12953 MVT IndexVT = Index.getSimpleValueType();
12954 MVT XLenVT = Subtarget.getXLenVT();
12955
12957 "Unexpected VTs!");
12958 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12959 // Targets have to explicitly opt-in for extending vector loads.
12960 assert(LoadExtType == ISD::NON_EXTLOAD &&
12961 "Unexpected extending MGATHER/VP_GATHER");
12962
12963 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12964 // the selection of the masked intrinsics doesn't do this for us.
12965 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12966
12967 MVT ContainerVT = VT;
12968 if (VT.isFixedLengthVector()) {
12969 ContainerVT = getContainerForFixedLengthVector(VT);
12970 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12971 ContainerVT.getVectorElementCount());
12972
12973 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12974
12975 if (!IsUnmasked) {
12976 MVT MaskVT = getMaskTypeFor(ContainerVT);
12977 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12978 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12979 }
12980 }
12981
12982 if (!VL)
12983 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12984
12985 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12986 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12987 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12988 }
12989
12990 unsigned IntID =
12991 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12992 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12993 if (IsUnmasked)
12994 Ops.push_back(DAG.getUNDEF(ContainerVT));
12995 else
12996 Ops.push_back(PassThru);
12997 Ops.push_back(BasePtr);
12998 Ops.push_back(Index);
12999 if (!IsUnmasked)
13000 Ops.push_back(Mask);
13001 Ops.push_back(VL);
13002 if (!IsUnmasked)
13004
13005 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13006 SDValue Result =
13007 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13008 Chain = Result.getValue(1);
13009
13010 if (VT.isFixedLengthVector())
13011 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13012
13013 return DAG.getMergeValues({Result, Chain}, DL);
13014}
13015
13016// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
13017// matched to a RVV indexed store. The RVV indexed store instructions only
13018// support the "unsigned unscaled" addressing mode; indices are implicitly
13019// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13020// signed or scaled indexing is extended to the XLEN value type and scaled
13021// accordingly.
13022SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
13023 SelectionDAG &DAG) const {
13024 SDLoc DL(Op);
13025 const auto *MemSD = cast<MemSDNode>(Op.getNode());
13026 EVT MemVT = MemSD->getMemoryVT();
13027 MachineMemOperand *MMO = MemSD->getMemOperand();
13028 SDValue Chain = MemSD->getChain();
13029 SDValue BasePtr = MemSD->getBasePtr();
13030
13031 [[maybe_unused]] bool IsTruncatingStore = false;
13032 SDValue Index, Mask, Val, VL;
13033
13034 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
13035 Index = VPSN->getIndex();
13036 Mask = VPSN->getMask();
13037 Val = VPSN->getValue();
13038 VL = VPSN->getVectorLength();
13039 // VP doesn't support truncating stores.
13040 IsTruncatingStore = false;
13041 } else {
13042 // Else it must be a MSCATTER.
13043 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
13044 Index = MSN->getIndex();
13045 Mask = MSN->getMask();
13046 Val = MSN->getValue();
13047 IsTruncatingStore = MSN->isTruncatingStore();
13048 }
13049
13050 MVT VT = Val.getSimpleValueType();
13051 MVT IndexVT = Index.getSimpleValueType();
13052 MVT XLenVT = Subtarget.getXLenVT();
13053
13055 "Unexpected VTs!");
13056 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
13057 // Targets have to explicitly opt-in for extending vector loads and
13058 // truncating vector stores.
13059 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
13060
13061 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
13062 // the selection of the masked intrinsics doesn't do this for us.
13063 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13064
13065 MVT ContainerVT = VT;
13066 if (VT.isFixedLengthVector()) {
13067 ContainerVT = getContainerForFixedLengthVector(VT);
13068 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
13069 ContainerVT.getVectorElementCount());
13070
13071 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
13072 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13073
13074 if (!IsUnmasked) {
13075 MVT MaskVT = getMaskTypeFor(ContainerVT);
13076 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13077 }
13078 }
13079
13080 if (!VL)
13081 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13082
13083 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
13084 IndexVT = IndexVT.changeVectorElementType(XLenVT);
13085 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
13086 }
13087
13088 unsigned IntID =
13089 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
13090 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13091 Ops.push_back(Val);
13092 Ops.push_back(BasePtr);
13093 Ops.push_back(Index);
13094 if (!IsUnmasked)
13095 Ops.push_back(Mask);
13096 Ops.push_back(VL);
13097
13099 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13100}
13101
13102SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
13103 SelectionDAG &DAG) const {
13104 const MVT XLenVT = Subtarget.getXLenVT();
13105 SDLoc DL(Op);
13106 SDValue Chain = Op->getOperand(0);
13107 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
13108 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
13109 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
13110
13111 // Encoding used for rounding mode in RISC-V differs from that used in
13112 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
13113 // table, which consists of a sequence of 4-bit fields, each representing
13114 // corresponding FLT_ROUNDS mode.
13115 static const int Table =
13121
13122 SDValue Shift =
13123 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
13124 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
13125 DAG.getConstant(Table, DL, XLenVT), Shift);
13126 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
13127 DAG.getConstant(7, DL, XLenVT));
13128
13129 return DAG.getMergeValues({Masked, Chain}, DL);
13130}
13131
13132SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
13133 SelectionDAG &DAG) const {
13134 const MVT XLenVT = Subtarget.getXLenVT();
13135 SDLoc DL(Op);
13136 SDValue Chain = Op->getOperand(0);
13137 SDValue RMValue = Op->getOperand(1);
13138 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
13139
13140 // Encoding used for rounding mode in RISC-V differs from that used in
13141 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
13142 // a table, which consists of a sequence of 4-bit fields, each representing
13143 // corresponding RISC-V mode.
13144 static const unsigned Table =
13150
13151 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
13152
13153 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
13154 DAG.getConstant(2, DL, XLenVT));
13155 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
13156 DAG.getConstant(Table, DL, XLenVT), Shift);
13157 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
13158 DAG.getConstant(0x7, DL, XLenVT));
13159 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
13160 RMValue);
13161}
13162
13163SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
13164 SelectionDAG &DAG) const {
13166
13167 bool isRISCV64 = Subtarget.is64Bit();
13168 EVT PtrVT = getPointerTy(DAG.getDataLayout());
13169
13170 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
13171 return DAG.getFrameIndex(FI, PtrVT);
13172}
13173
13174// Returns the opcode of the target-specific SDNode that implements the 32-bit
13175// form of the given Opcode.
13176static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
13177 switch (Opcode) {
13178 default:
13179 llvm_unreachable("Unexpected opcode");
13180 case ISD::SHL:
13181 return RISCVISD::SLLW;
13182 case ISD::SRA:
13183 return RISCVISD::SRAW;
13184 case ISD::SRL:
13185 return RISCVISD::SRLW;
13186 case ISD::SDIV:
13187 return RISCVISD::DIVW;
13188 case ISD::UDIV:
13189 return RISCVISD::DIVUW;
13190 case ISD::UREM:
13191 return RISCVISD::REMUW;
13192 case ISD::ROTL:
13193 return RISCVISD::ROLW;
13194 case ISD::ROTR:
13195 return RISCVISD::RORW;
13196 }
13197}
13198
13199// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
13200// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
13201// otherwise be promoted to i64, making it difficult to select the
13202// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
13203// type i8/i16/i32 is lost.
13205 unsigned ExtOpc = ISD::ANY_EXTEND) {
13206 SDLoc DL(N);
13207 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
13208 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
13209 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
13210 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
13211 // ReplaceNodeResults requires we maintain the same type for the return value.
13212 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
13213}
13214
13215// Converts the given 32-bit operation to a i64 operation with signed extension
13216// semantic to reduce the signed extension instructions.
13218 SDLoc DL(N);
13219 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13220 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13221 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
13222 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13223 DAG.getValueType(MVT::i32));
13224 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
13225}
13226
13229 SelectionDAG &DAG) const {
13230 SDLoc DL(N);
13231 switch (N->getOpcode()) {
13232 default:
13233 llvm_unreachable("Don't know how to custom type legalize this operation!");
13236 case ISD::FP_TO_SINT:
13237 case ISD::FP_TO_UINT: {
13238 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13239 "Unexpected custom legalisation");
13240 bool IsStrict = N->isStrictFPOpcode();
13241 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
13242 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
13243 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
13244 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
13246 if (!isTypeLegal(Op0.getValueType()))
13247 return;
13248 if (IsStrict) {
13249 SDValue Chain = N->getOperand(0);
13250 // In absence of Zfh, promote f16 to f32, then convert.
13251 if (Op0.getValueType() == MVT::f16 &&
13252 !Subtarget.hasStdExtZfhOrZhinx()) {
13253 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
13254 {Chain, Op0});
13255 Chain = Op0.getValue(1);
13256 }
13257 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
13259 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
13260 SDValue Res = DAG.getNode(
13261 Opc, DL, VTs, Chain, Op0,
13262 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
13263 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13264 Results.push_back(Res.getValue(1));
13265 return;
13266 }
13267 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
13268 // convert.
13269 if ((Op0.getValueType() == MVT::f16 &&
13270 !Subtarget.hasStdExtZfhOrZhinx()) ||
13271 Op0.getValueType() == MVT::bf16)
13272 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
13273
13274 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
13275 SDValue Res =
13276 DAG.getNode(Opc, DL, MVT::i64, Op0,
13277 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
13278 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13279 return;
13280 }
13281 // If the FP type needs to be softened, emit a library call using the 'si'
13282 // version. If we left it to default legalization we'd end up with 'di'. If
13283 // the FP type doesn't need to be softened just let generic type
13284 // legalization promote the result type.
13285 RTLIB::Libcall LC;
13286 if (IsSigned)
13287 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
13288 else
13289 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
13290 MakeLibCallOptions CallOptions;
13291 EVT OpVT = Op0.getValueType();
13292 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
13293 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
13294 SDValue Result;
13295 std::tie(Result, Chain) =
13296 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
13297 Results.push_back(Result);
13298 if (IsStrict)
13299 Results.push_back(Chain);
13300 break;
13301 }
13302 case ISD::LROUND: {
13303 SDValue Op0 = N->getOperand(0);
13304 EVT Op0VT = Op0.getValueType();
13305 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
13307 if (!isTypeLegal(Op0VT))
13308 return;
13309
13310 // In absence of Zfh, promote f16 to f32, then convert.
13311 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
13312 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
13313
13314 SDValue Res =
13315 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
13316 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
13317 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13318 return;
13319 }
13320 // If the FP type needs to be softened, emit a library call to lround. We'll
13321 // need to truncate the result. We assume any value that doesn't fit in i32
13322 // is allowed to return an unspecified value.
13323 RTLIB::Libcall LC =
13324 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
13325 MakeLibCallOptions CallOptions;
13326 EVT OpVT = Op0.getValueType();
13327 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
13328 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
13329 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
13330 Results.push_back(Result);
13331 break;
13332 }
13335 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13336 "has custom type legalization on riscv32");
13337
13338 SDValue LoCounter, HiCounter;
13339 MVT XLenVT = Subtarget.getXLenVT();
13340 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13341 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13342 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13343 } else {
13344 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13345 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13346 }
13347 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13349 N->getOperand(0), LoCounter, HiCounter);
13350
13351 Results.push_back(
13352 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13353 Results.push_back(RCW.getValue(2));
13354 break;
13355 }
13356 case ISD::LOAD: {
13357 if (!ISD::isNON_EXTLoad(N))
13358 return;
13359
13360 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13361 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13362 LoadSDNode *Ld = cast<LoadSDNode>(N);
13363
13364 SDLoc dl(N);
13365 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13366 Ld->getBasePtr(), Ld->getMemoryVT(),
13367 Ld->getMemOperand());
13368 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13369 Results.push_back(Res.getValue(1));
13370 return;
13371 }
13372 case ISD::MUL: {
13373 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13374 unsigned XLen = Subtarget.getXLen();
13375 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13376 if (Size > XLen) {
13377 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13378 SDValue LHS = N->getOperand(0);
13379 SDValue RHS = N->getOperand(1);
13380 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13381
13382 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13383 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13384 // We need exactly one side to be unsigned.
13385 if (LHSIsU == RHSIsU)
13386 return;
13387
13388 auto MakeMULPair = [&](SDValue S, SDValue U) {
13389 MVT XLenVT = Subtarget.getXLenVT();
13390 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13391 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13392 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13393 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13394 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13395 };
13396
13397 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13398 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13399
13400 // The other operand should be signed, but still prefer MULH when
13401 // possible.
13402 if (RHSIsU && LHSIsS && !RHSIsS)
13403 Results.push_back(MakeMULPair(LHS, RHS));
13404 else if (LHSIsU && RHSIsS && !LHSIsS)
13405 Results.push_back(MakeMULPair(RHS, LHS));
13406
13407 return;
13408 }
13409 [[fallthrough]];
13410 }
13411 case ISD::ADD:
13412 case ISD::SUB:
13413 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13414 "Unexpected custom legalisation");
13415 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13416 break;
13417 case ISD::SHL:
13418 case ISD::SRA:
13419 case ISD::SRL:
13420 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13421 "Unexpected custom legalisation");
13422 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13423 // If we can use a BSET instruction, allow default promotion to apply.
13424 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13425 isOneConstant(N->getOperand(0)))
13426 break;
13427 Results.push_back(customLegalizeToWOp(N, DAG));
13428 break;
13429 }
13430
13431 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13432 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13433 // shift amount.
13434 if (N->getOpcode() == ISD::SHL) {
13435 SDLoc DL(N);
13436 SDValue NewOp0 =
13437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13438 SDValue NewOp1 =
13439 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13440 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13441 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13442 DAG.getValueType(MVT::i32));
13443 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13444 }
13445
13446 break;
13447 case ISD::ROTL:
13448 case ISD::ROTR:
13449 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13450 "Unexpected custom legalisation");
13451 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13452 Subtarget.hasVendorXTHeadBb()) &&
13453 "Unexpected custom legalization");
13454 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13455 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13456 return;
13457 Results.push_back(customLegalizeToWOp(N, DAG));
13458 break;
13459 case ISD::CTTZ:
13461 case ISD::CTLZ:
13462 case ISD::CTLZ_ZERO_UNDEF: {
13463 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13464 "Unexpected custom legalisation");
13465
13466 SDValue NewOp0 =
13467 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13468 bool IsCTZ =
13469 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13470 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13471 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13472 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13473 return;
13474 }
13475 case ISD::SDIV:
13476 case ISD::UDIV:
13477 case ISD::UREM: {
13478 MVT VT = N->getSimpleValueType(0);
13479 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13480 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13481 "Unexpected custom legalisation");
13482 // Don't promote division/remainder by constant since we should expand those
13483 // to multiply by magic constant.
13485 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13486 !isIntDivCheap(N->getValueType(0), Attr))
13487 return;
13488
13489 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13490 // the upper 32 bits. For other types we need to sign or zero extend
13491 // based on the opcode.
13492 unsigned ExtOpc = ISD::ANY_EXTEND;
13493 if (VT != MVT::i32)
13494 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13496
13497 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13498 break;
13499 }
13500 case ISD::SADDO: {
13501 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13502 "Unexpected custom legalisation");
13503
13504 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13505 // use the default legalization.
13506 if (!isa<ConstantSDNode>(N->getOperand(1)))
13507 return;
13508
13509 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13510 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13511 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13512 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13513 DAG.getValueType(MVT::i32));
13514
13515 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13516
13517 // For an addition, the result should be less than one of the operands (LHS)
13518 // if and only if the other operand (RHS) is negative, otherwise there will
13519 // be overflow.
13520 // For a subtraction, the result should be less than one of the operands
13521 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13522 // otherwise there will be overflow.
13523 EVT OType = N->getValueType(1);
13524 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13525 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13526
13527 SDValue Overflow =
13528 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13529 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13530 Results.push_back(Overflow);
13531 return;
13532 }
13533 case ISD::UADDO:
13534 case ISD::USUBO: {
13535 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13536 "Unexpected custom legalisation");
13537 bool IsAdd = N->getOpcode() == ISD::UADDO;
13538 // Create an ADDW or SUBW.
13539 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13540 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13541 SDValue Res =
13542 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13543 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13544 DAG.getValueType(MVT::i32));
13545
13546 SDValue Overflow;
13547 if (IsAdd && isOneConstant(RHS)) {
13548 // Special case uaddo X, 1 overflowed if the addition result is 0.
13549 // The general case (X + C) < C is not necessarily beneficial. Although we
13550 // reduce the live range of X, we may introduce the materialization of
13551 // constant C, especially when the setcc result is used by branch. We have
13552 // no compare with constant and branch instructions.
13553 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13554 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13555 } else if (IsAdd && isAllOnesConstant(RHS)) {
13556 // Special case uaddo X, -1 overflowed if X != 0.
13557 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13558 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13559 } else {
13560 // Sign extend the LHS and perform an unsigned compare with the ADDW
13561 // result. Since the inputs are sign extended from i32, this is equivalent
13562 // to comparing the lower 32 bits.
13563 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13564 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13565 IsAdd ? ISD::SETULT : ISD::SETUGT);
13566 }
13567
13568 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13569 Results.push_back(Overflow);
13570 return;
13571 }
13572 case ISD::UADDSAT:
13573 case ISD::USUBSAT: {
13574 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13575 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13576 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13577 // promotion for UADDO/USUBO.
13578 Results.push_back(expandAddSubSat(N, DAG));
13579 return;
13580 }
13581 case ISD::SADDSAT:
13582 case ISD::SSUBSAT: {
13583 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13584 "Unexpected custom legalisation");
13585 Results.push_back(expandAddSubSat(N, DAG));
13586 return;
13587 }
13588 case ISD::ABS: {
13589 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13590 "Unexpected custom legalisation");
13591
13592 if (Subtarget.hasStdExtZbb()) {
13593 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13594 // This allows us to remember that the result is sign extended. Expanding
13595 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13596 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13597 N->getOperand(0));
13598 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13599 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13600 return;
13601 }
13602
13603 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13604 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13605
13606 // Freeze the source so we can increase it's use count.
13607 Src = DAG.getFreeze(Src);
13608
13609 // Copy sign bit to all bits using the sraiw pattern.
13610 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13611 DAG.getValueType(MVT::i32));
13612 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13613 DAG.getConstant(31, DL, MVT::i64));
13614
13615 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13616 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13617
13618 // NOTE: The result is only required to be anyextended, but sext is
13619 // consistent with type legalization of sub.
13620 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13621 DAG.getValueType(MVT::i32));
13622 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13623 return;
13624 }
13625 case ISD::BITCAST: {
13626 EVT VT = N->getValueType(0);
13627 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13628 SDValue Op0 = N->getOperand(0);
13629 EVT Op0VT = Op0.getValueType();
13630 MVT XLenVT = Subtarget.getXLenVT();
13631 if (VT == MVT::i16 &&
13632 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13633 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13634 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13635 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13636 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13637 Subtarget.hasStdExtFOrZfinx()) {
13638 SDValue FPConv =
13639 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13640 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13641 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13642 Subtarget.hasStdExtDOrZdinx()) {
13643 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13644 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13645 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13646 NewReg.getValue(0), NewReg.getValue(1));
13647 Results.push_back(RetReg);
13648 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13649 isTypeLegal(Op0VT)) {
13650 // Custom-legalize bitcasts from fixed-length vector types to illegal
13651 // scalar types in order to improve codegen. Bitcast the vector to a
13652 // one-element vector type whose element type is the same as the result
13653 // type, and extract the first element.
13654 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13655 if (isTypeLegal(BVT)) {
13656 SDValue BVec = DAG.getBitcast(BVT, Op0);
13657 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13658 DAG.getVectorIdxConstant(0, DL)));
13659 }
13660 }
13661 break;
13662 }
13663 case RISCVISD::BREV8:
13664 case RISCVISD::ORC_B: {
13665 MVT VT = N->getSimpleValueType(0);
13666 MVT XLenVT = Subtarget.getXLenVT();
13667 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13668 "Unexpected custom legalisation");
13669 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13670 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13671 "Unexpected extension");
13672 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13673 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13674 // ReplaceNodeResults requires we maintain the same type for the return
13675 // value.
13676 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13677 break;
13678 }
13680 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13681 // type is illegal (currently only vXi64 RV32).
13682 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13683 // transferred to the destination register. We issue two of these from the
13684 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13685 // first element.
13686 SDValue Vec = N->getOperand(0);
13687 SDValue Idx = N->getOperand(1);
13688
13689 // The vector type hasn't been legalized yet so we can't issue target
13690 // specific nodes if it needs legalization.
13691 // FIXME: We would manually legalize if it's important.
13692 if (!isTypeLegal(Vec.getValueType()))
13693 return;
13694
13695 MVT VecVT = Vec.getSimpleValueType();
13696
13697 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13698 VecVT.getVectorElementType() == MVT::i64 &&
13699 "Unexpected EXTRACT_VECTOR_ELT legalization");
13700
13701 // If this is a fixed vector, we need to convert it to a scalable vector.
13702 MVT ContainerVT = VecVT;
13703 if (VecVT.isFixedLengthVector()) {
13704 ContainerVT = getContainerForFixedLengthVector(VecVT);
13705 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13706 }
13707
13708 MVT XLenVT = Subtarget.getXLenVT();
13709
13710 // Use a VL of 1 to avoid processing more elements than we need.
13711 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13712
13713 // Unless the index is known to be 0, we must slide the vector down to get
13714 // the desired element into index 0.
13715 if (!isNullConstant(Idx)) {
13716 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13717 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13718 }
13719
13720 // Extract the lower XLEN bits of the correct vector element.
13721 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13722
13723 // To extract the upper XLEN bits of the vector element, shift the first
13724 // element right by 32 bits and re-extract the lower XLEN bits.
13725 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13726 DAG.getUNDEF(ContainerVT),
13727 DAG.getConstant(32, DL, XLenVT), VL);
13728 SDValue LShr32 =
13729 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13730 DAG.getUNDEF(ContainerVT), Mask, VL);
13731
13732 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13733
13734 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13735 break;
13736 }
13738 unsigned IntNo = N->getConstantOperandVal(0);
13739 switch (IntNo) {
13740 default:
13742 "Don't know how to custom type legalize this intrinsic!");
13743 case Intrinsic::experimental_get_vector_length: {
13744 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13745 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13746 return;
13747 }
13748 case Intrinsic::experimental_cttz_elts: {
13749 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13750 Results.push_back(
13751 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13752 return;
13753 }
13754 case Intrinsic::riscv_orc_b:
13755 case Intrinsic::riscv_brev8:
13756 case Intrinsic::riscv_sha256sig0:
13757 case Intrinsic::riscv_sha256sig1:
13758 case Intrinsic::riscv_sha256sum0:
13759 case Intrinsic::riscv_sha256sum1:
13760 case Intrinsic::riscv_sm3p0:
13761 case Intrinsic::riscv_sm3p1: {
13762 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13763 return;
13764 unsigned Opc;
13765 switch (IntNo) {
13766 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13767 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13768 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13769 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13770 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13771 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13772 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13773 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13774 }
13775
13776 SDValue NewOp =
13777 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13778 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13779 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13780 return;
13781 }
13782 case Intrinsic::riscv_sm4ks:
13783 case Intrinsic::riscv_sm4ed: {
13784 unsigned Opc =
13785 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13786 SDValue NewOp0 =
13787 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13788 SDValue NewOp1 =
13789 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13790 SDValue Res =
13791 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13792 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13793 return;
13794 }
13795 case Intrinsic::riscv_mopr: {
13796 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13797 return;
13798 SDValue NewOp =
13799 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13800 SDValue Res = DAG.getNode(
13801 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13802 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13803 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13804 return;
13805 }
13806 case Intrinsic::riscv_moprr: {
13807 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13808 return;
13809 SDValue NewOp0 =
13810 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13811 SDValue NewOp1 =
13812 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13813 SDValue Res = DAG.getNode(
13814 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13815 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13816 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13817 return;
13818 }
13819 case Intrinsic::riscv_clmul: {
13820 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13821 return;
13822
13823 SDValue NewOp0 =
13824 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13825 SDValue NewOp1 =
13826 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13827 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13828 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13829 return;
13830 }
13831 case Intrinsic::riscv_clmulh:
13832 case Intrinsic::riscv_clmulr: {
13833 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13834 return;
13835
13836 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13837 // to the full 128-bit clmul result of multiplying two xlen values.
13838 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13839 // upper 32 bits.
13840 //
13841 // The alternative is to mask the inputs to 32 bits and use clmul, but
13842 // that requires two shifts to mask each input without zext.w.
13843 // FIXME: If the inputs are known zero extended or could be freely
13844 // zero extended, the mask form would be better.
13845 SDValue NewOp0 =
13846 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13847 SDValue NewOp1 =
13848 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13849 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13850 DAG.getConstant(32, DL, MVT::i64));
13851 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13852 DAG.getConstant(32, DL, MVT::i64));
13853 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13855 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13856 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13857 DAG.getConstant(32, DL, MVT::i64));
13858 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13859 return;
13860 }
13861 case Intrinsic::riscv_vmv_x_s: {
13862 EVT VT = N->getValueType(0);
13863 MVT XLenVT = Subtarget.getXLenVT();
13864 if (VT.bitsLT(XLenVT)) {
13865 // Simple case just extract using vmv.x.s and truncate.
13866 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13867 Subtarget.getXLenVT(), N->getOperand(1));
13868 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13869 return;
13870 }
13871
13872 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13873 "Unexpected custom legalization");
13874
13875 // We need to do the move in two steps.
13876 SDValue Vec = N->getOperand(1);
13877 MVT VecVT = Vec.getSimpleValueType();
13878
13879 // First extract the lower XLEN bits of the element.
13880 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13881
13882 // To extract the upper XLEN bits of the vector element, shift the first
13883 // element right by 32 bits and re-extract the lower XLEN bits.
13884 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13885
13886 SDValue ThirtyTwoV =
13887 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13888 DAG.getConstant(32, DL, XLenVT), VL);
13889 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13890 DAG.getUNDEF(VecVT), Mask, VL);
13891 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13892
13893 Results.push_back(
13894 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13895 break;
13896 }
13897 }
13898 break;
13899 }
13900 case ISD::VECREDUCE_ADD:
13901 case ISD::VECREDUCE_AND:
13902 case ISD::VECREDUCE_OR:
13903 case ISD::VECREDUCE_XOR:
13908 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13909 Results.push_back(V);
13910 break;
13911 case ISD::VP_REDUCE_ADD:
13912 case ISD::VP_REDUCE_AND:
13913 case ISD::VP_REDUCE_OR:
13914 case ISD::VP_REDUCE_XOR:
13915 case ISD::VP_REDUCE_SMAX:
13916 case ISD::VP_REDUCE_UMAX:
13917 case ISD::VP_REDUCE_SMIN:
13918 case ISD::VP_REDUCE_UMIN:
13919 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13920 Results.push_back(V);
13921 break;
13922 case ISD::GET_ROUNDING: {
13923 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13924 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13925 Results.push_back(Res.getValue(0));
13926 Results.push_back(Res.getValue(1));
13927 break;
13928 }
13929 }
13930}
13931
13932/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13933/// which corresponds to it.
13934static unsigned getVecReduceOpcode(unsigned Opc) {
13935 switch (Opc) {
13936 default:
13937 llvm_unreachable("Unhandled binary to transform reduction");
13938 case ISD::ADD:
13939 return ISD::VECREDUCE_ADD;
13940 case ISD::UMAX:
13941 return ISD::VECREDUCE_UMAX;
13942 case ISD::SMAX:
13943 return ISD::VECREDUCE_SMAX;
13944 case ISD::UMIN:
13945 return ISD::VECREDUCE_UMIN;
13946 case ISD::SMIN:
13947 return ISD::VECREDUCE_SMIN;
13948 case ISD::AND:
13949 return ISD::VECREDUCE_AND;
13950 case ISD::OR:
13951 return ISD::VECREDUCE_OR;
13952 case ISD::XOR:
13953 return ISD::VECREDUCE_XOR;
13954 case ISD::FADD:
13955 // Note: This is the associative form of the generic reduction opcode.
13956 return ISD::VECREDUCE_FADD;
13957 }
13958}
13959
13960/// Perform two related transforms whose purpose is to incrementally recognize
13961/// an explode_vector followed by scalar reduction as a vector reduction node.
13962/// This exists to recover from a deficiency in SLP which can't handle
13963/// forests with multiple roots sharing common nodes. In some cases, one
13964/// of the trees will be vectorized, and the other will remain (unprofitably)
13965/// scalarized.
13966static SDValue
13968 const RISCVSubtarget &Subtarget) {
13969
13970 // This transforms need to run before all integer types have been legalized
13971 // to i64 (so that the vector element type matches the add type), and while
13972 // it's safe to introduce odd sized vector types.
13974 return SDValue();
13975
13976 // Without V, this transform isn't useful. We could form the (illegal)
13977 // operations and let them be scalarized again, but there's really no point.
13978 if (!Subtarget.hasVInstructions())
13979 return SDValue();
13980
13981 const SDLoc DL(N);
13982 const EVT VT = N->getValueType(0);
13983 const unsigned Opc = N->getOpcode();
13984
13985 // For FADD, we only handle the case with reassociation allowed. We
13986 // could handle strict reduction order, but at the moment, there's no
13987 // known reason to, and the complexity isn't worth it.
13988 // TODO: Handle fminnum and fmaxnum here
13989 if (!VT.isInteger() &&
13990 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13991 return SDValue();
13992
13993 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13994 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13995 "Inconsistent mappings");
13996 SDValue LHS = N->getOperand(0);
13997 SDValue RHS = N->getOperand(1);
13998
13999 if (!LHS.hasOneUse() || !RHS.hasOneUse())
14000 return SDValue();
14001
14002 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14003 std::swap(LHS, RHS);
14004
14005 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14006 !isa<ConstantSDNode>(RHS.getOperand(1)))
14007 return SDValue();
14008
14009 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
14010 SDValue SrcVec = RHS.getOperand(0);
14011 EVT SrcVecVT = SrcVec.getValueType();
14012 assert(SrcVecVT.getVectorElementType() == VT);
14013 if (SrcVecVT.isScalableVector())
14014 return SDValue();
14015
14016 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
14017 return SDValue();
14018
14019 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
14020 // reduce_op (extract_subvector [2 x VT] from V). This will form the
14021 // root of our reduction tree. TODO: We could extend this to any two
14022 // adjacent aligned constant indices if desired.
14023 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14024 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
14025 uint64_t LHSIdx =
14026 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
14027 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
14028 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
14029 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
14030 DAG.getVectorIdxConstant(0, DL));
14031 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
14032 }
14033 }
14034
14035 // Match (binop (reduce (extract_subvector V, 0),
14036 // (extract_vector_elt V, sizeof(SubVec))))
14037 // into a reduction of one more element from the original vector V.
14038 if (LHS.getOpcode() != ReduceOpc)
14039 return SDValue();
14040
14041 SDValue ReduceVec = LHS.getOperand(0);
14042 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
14043 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
14044 isNullConstant(ReduceVec.getOperand(1)) &&
14045 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
14046 // For illegal types (e.g. 3xi32), most will be combined again into a
14047 // wider (hopefully legal) type. If this is a terminal state, we are
14048 // relying on type legalization here to produce something reasonable
14049 // and this lowering quality could probably be improved. (TODO)
14050 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
14051 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
14052 DAG.getVectorIdxConstant(0, DL));
14053 return DAG.getNode(ReduceOpc, DL, VT, Vec,
14054 ReduceVec->getFlags() & N->getFlags());
14055 }
14056
14057 return SDValue();
14058}
14059
14060
14061// Try to fold (<bop> x, (reduction.<bop> vec, start))
14063 const RISCVSubtarget &Subtarget) {
14064 auto BinOpToRVVReduce = [](unsigned Opc) {
14065 switch (Opc) {
14066 default:
14067 llvm_unreachable("Unhandled binary to transform reduction");
14068 case ISD::ADD:
14070 case ISD::UMAX:
14072 case ISD::SMAX:
14074 case ISD::UMIN:
14076 case ISD::SMIN:
14078 case ISD::AND:
14080 case ISD::OR:
14082 case ISD::XOR:
14084 case ISD::FADD:
14086 case ISD::FMAXNUM:
14088 case ISD::FMINNUM:
14090 }
14091 };
14092
14093 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
14094 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14095 isNullConstant(V.getOperand(1)) &&
14096 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
14097 };
14098
14099 unsigned Opc = N->getOpcode();
14100 unsigned ReduceIdx;
14101 if (IsReduction(N->getOperand(0), Opc))
14102 ReduceIdx = 0;
14103 else if (IsReduction(N->getOperand(1), Opc))
14104 ReduceIdx = 1;
14105 else
14106 return SDValue();
14107
14108 // Skip if FADD disallows reassociation but the combiner needs.
14109 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
14110 return SDValue();
14111
14112 SDValue Extract = N->getOperand(ReduceIdx);
14113 SDValue Reduce = Extract.getOperand(0);
14114 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
14115 return SDValue();
14116
14117 SDValue ScalarV = Reduce.getOperand(2);
14118 EVT ScalarVT = ScalarV.getValueType();
14119 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
14120 ScalarV.getOperand(0)->isUndef() &&
14121 isNullConstant(ScalarV.getOperand(2)))
14122 ScalarV = ScalarV.getOperand(1);
14123
14124 // Make sure that ScalarV is a splat with VL=1.
14125 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
14126 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
14127 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
14128 return SDValue();
14129
14130 if (!isNonZeroAVL(ScalarV.getOperand(2)))
14131 return SDValue();
14132
14133 // Check the scalar of ScalarV is neutral element
14134 // TODO: Deal with value other than neutral element.
14135 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
14136 0))
14137 return SDValue();
14138
14139 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
14140 // FIXME: We might be able to improve this if operand 0 is undef.
14141 if (!isNonZeroAVL(Reduce.getOperand(5)))
14142 return SDValue();
14143
14144 SDValue NewStart = N->getOperand(1 - ReduceIdx);
14145
14146 SDLoc DL(N);
14147 SDValue NewScalarV =
14148 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
14149 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
14150
14151 // If we looked through an INSERT_SUBVECTOR we need to restore it.
14152 if (ScalarVT != ScalarV.getValueType())
14153 NewScalarV =
14154 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
14155 NewScalarV, DAG.getVectorIdxConstant(0, DL));
14156
14157 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
14158 NewScalarV, Reduce.getOperand(3),
14159 Reduce.getOperand(4), Reduce.getOperand(5)};
14160 SDValue NewReduce =
14161 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
14162 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
14163 Extract.getOperand(1));
14164}
14165
14166// Optimize (add (shl x, c0), (shl y, c1)) ->
14167// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
14169 const RISCVSubtarget &Subtarget) {
14170 // Perform this optimization only in the zba extension.
14171 if (!Subtarget.hasStdExtZba())
14172 return SDValue();
14173
14174 // Skip for vector types and larger types.
14175 EVT VT = N->getValueType(0);
14176 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
14177 return SDValue();
14178
14179 // The two operand nodes must be SHL and have no other use.
14180 SDValue N0 = N->getOperand(0);
14181 SDValue N1 = N->getOperand(1);
14182 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
14183 !N0->hasOneUse() || !N1->hasOneUse())
14184 return SDValue();
14185
14186 // Check c0 and c1.
14187 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14188 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
14189 if (!N0C || !N1C)
14190 return SDValue();
14191 int64_t C0 = N0C->getSExtValue();
14192 int64_t C1 = N1C->getSExtValue();
14193 if (C0 <= 0 || C1 <= 0)
14194 return SDValue();
14195
14196 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
14197 int64_t Bits = std::min(C0, C1);
14198 int64_t Diff = std::abs(C0 - C1);
14199 if (Diff != 1 && Diff != 2 && Diff != 3)
14200 return SDValue();
14201
14202 // Build nodes.
14203 SDLoc DL(N);
14204 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
14205 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
14206 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
14207 DAG.getConstant(Diff, DL, VT), NS);
14208 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
14209}
14210
14211// Combine a constant select operand into its use:
14212//
14213// (and (select cond, -1, c), x)
14214// -> (select cond, x, (and x, c)) [AllOnes=1]
14215// (or (select cond, 0, c), x)
14216// -> (select cond, x, (or x, c)) [AllOnes=0]
14217// (xor (select cond, 0, c), x)
14218// -> (select cond, x, (xor x, c)) [AllOnes=0]
14219// (add (select cond, 0, c), x)
14220// -> (select cond, x, (add x, c)) [AllOnes=0]
14221// (sub x, (select cond, 0, c))
14222// -> (select cond, x, (sub x, c)) [AllOnes=0]
14224 SelectionDAG &DAG, bool AllOnes,
14225 const RISCVSubtarget &Subtarget) {
14226 EVT VT = N->getValueType(0);
14227
14228 // Skip vectors.
14229 if (VT.isVector())
14230 return SDValue();
14231
14232 if (!Subtarget.hasConditionalMoveFusion()) {
14233 // (select cond, x, (and x, c)) has custom lowering with Zicond.
14234 if ((!Subtarget.hasStdExtZicond() &&
14235 !Subtarget.hasVendorXVentanaCondOps()) ||
14236 N->getOpcode() != ISD::AND)
14237 return SDValue();
14238
14239 // Maybe harmful when condition code has multiple use.
14240 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
14241 return SDValue();
14242
14243 // Maybe harmful when VT is wider than XLen.
14244 if (VT.getSizeInBits() > Subtarget.getXLen())
14245 return SDValue();
14246 }
14247
14248 if ((Slct.getOpcode() != ISD::SELECT &&
14249 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
14250 !Slct.hasOneUse())
14251 return SDValue();
14252
14253 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
14255 };
14256
14257 bool SwapSelectOps;
14258 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
14259 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
14260 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
14261 SDValue NonConstantVal;
14262 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
14263 SwapSelectOps = false;
14264 NonConstantVal = FalseVal;
14265 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
14266 SwapSelectOps = true;
14267 NonConstantVal = TrueVal;
14268 } else
14269 return SDValue();
14270
14271 // Slct is now know to be the desired identity constant when CC is true.
14272 TrueVal = OtherOp;
14273 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
14274 // Unless SwapSelectOps says the condition should be false.
14275 if (SwapSelectOps)
14276 std::swap(TrueVal, FalseVal);
14277
14278 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
14279 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
14280 {Slct.getOperand(0), Slct.getOperand(1),
14281 Slct.getOperand(2), TrueVal, FalseVal});
14282
14283 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
14284 {Slct.getOperand(0), TrueVal, FalseVal});
14285}
14286
14287// Attempt combineSelectAndUse on each operand of a commutative operator N.
14289 bool AllOnes,
14290 const RISCVSubtarget &Subtarget) {
14291 SDValue N0 = N->getOperand(0);
14292 SDValue N1 = N->getOperand(1);
14293 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
14294 return Result;
14295 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
14296 return Result;
14297 return SDValue();
14298}
14299
14300// Transform (add (mul x, c0), c1) ->
14301// (add (mul (add x, c1/c0), c0), c1%c0).
14302// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
14303// that should be excluded is when c0*(c1/c0) is simm12, which will lead
14304// to an infinite loop in DAGCombine if transformed.
14305// Or transform (add (mul x, c0), c1) ->
14306// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
14307// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
14308// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
14309// lead to an infinite loop in DAGCombine if transformed.
14310// Or transform (add (mul x, c0), c1) ->
14311// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
14312// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
14313// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
14314// lead to an infinite loop in DAGCombine if transformed.
14315// Or transform (add (mul x, c0), c1) ->
14316// (mul (add x, c1/c0), c0).
14317// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
14319 const RISCVSubtarget &Subtarget) {
14320 // Skip for vector types and larger types.
14321 EVT VT = N->getValueType(0);
14322 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
14323 return SDValue();
14324 // The first operand node must be a MUL and has no other use.
14325 SDValue N0 = N->getOperand(0);
14326 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
14327 return SDValue();
14328 // Check if c0 and c1 match above conditions.
14329 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14330 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14331 if (!N0C || !N1C)
14332 return SDValue();
14333 // If N0C has multiple uses it's possible one of the cases in
14334 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14335 // in an infinite loop.
14336 if (!N0C->hasOneUse())
14337 return SDValue();
14338 int64_t C0 = N0C->getSExtValue();
14339 int64_t C1 = N1C->getSExtValue();
14340 int64_t CA, CB;
14341 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14342 return SDValue();
14343 // Search for proper CA (non-zero) and CB that both are simm12.
14344 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14345 !isInt<12>(C0 * (C1 / C0))) {
14346 CA = C1 / C0;
14347 CB = C1 % C0;
14348 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14349 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14350 CA = C1 / C0 + 1;
14351 CB = C1 % C0 - C0;
14352 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14353 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14354 CA = C1 / C0 - 1;
14355 CB = C1 % C0 + C0;
14356 } else
14357 return SDValue();
14358 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14359 SDLoc DL(N);
14360 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14361 DAG.getSignedConstant(CA, DL, VT));
14362 SDValue New1 =
14363 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14364 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14365}
14366
14367// add (zext, zext) -> zext (add (zext, zext))
14368// sub (zext, zext) -> sext (sub (zext, zext))
14369// mul (zext, zext) -> zext (mul (zext, zext))
14370// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14371// udiv (zext, zext) -> zext (udiv (zext, zext))
14372// srem (zext, zext) -> zext (srem (zext, zext))
14373// urem (zext, zext) -> zext (urem (zext, zext))
14374//
14375// where the sum of the extend widths match, and the the range of the bin op
14376// fits inside the width of the narrower bin op. (For profitability on rvv, we
14377// use a power of two for both inner and outer extend.)
14379
14380 EVT VT = N->getValueType(0);
14381 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14382 return SDValue();
14383
14384 SDValue N0 = N->getOperand(0);
14385 SDValue N1 = N->getOperand(1);
14387 return SDValue();
14388 if (!N0.hasOneUse() || !N1.hasOneUse())
14389 return SDValue();
14390
14391 SDValue Src0 = N0.getOperand(0);
14392 SDValue Src1 = N1.getOperand(0);
14393 EVT SrcVT = Src0.getValueType();
14394 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14395 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14396 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14397 return SDValue();
14398
14399 LLVMContext &C = *DAG.getContext();
14401 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14402
14403 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14404 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14405
14406 // Src0 and Src1 are zero extended, so they're always positive if signed.
14407 //
14408 // sub can produce a negative from two positive operands, so it needs sign
14409 // extended. Other nodes produce a positive from two positive operands, so
14410 // zero extend instead.
14411 unsigned OuterExtend =
14412 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14413
14414 return DAG.getNode(
14415 OuterExtend, SDLoc(N), VT,
14416 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14417}
14418
14419// Try to turn (add (xor bool, 1) -1) into (neg bool).
14421 SDValue N0 = N->getOperand(0);
14422 SDValue N1 = N->getOperand(1);
14423 EVT VT = N->getValueType(0);
14424 SDLoc DL(N);
14425
14426 // RHS should be -1.
14427 if (!isAllOnesConstant(N1))
14428 return SDValue();
14429
14430 // Look for (xor X, 1).
14431 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14432 return SDValue();
14433
14434 // First xor input should be 0 or 1.
14436 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14437 return SDValue();
14438
14439 // Emit a negate of the setcc.
14440 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14441 N0.getOperand(0));
14442}
14443
14446 const RISCVSubtarget &Subtarget) {
14447 SelectionDAG &DAG = DCI.DAG;
14448 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14449 return V;
14450 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14451 return V;
14452 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14453 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14454 return V;
14455 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14456 return V;
14457 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14458 return V;
14459 if (SDValue V = combineBinOpOfZExt(N, DAG))
14460 return V;
14461
14462 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14463 // (select lhs, rhs, cc, x, (add x, y))
14464 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14465}
14466
14467// Try to turn a sub boolean RHS and constant LHS into an addi.
14469 SDValue N0 = N->getOperand(0);
14470 SDValue N1 = N->getOperand(1);
14471 EVT VT = N->getValueType(0);
14472 SDLoc DL(N);
14473
14474 // Require a constant LHS.
14475 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14476 if (!N0C)
14477 return SDValue();
14478
14479 // All our optimizations involve subtracting 1 from the immediate and forming
14480 // an ADDI. Make sure the new immediate is valid for an ADDI.
14481 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14482 if (!ImmValMinus1.isSignedIntN(12))
14483 return SDValue();
14484
14485 SDValue NewLHS;
14486 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14487 // (sub constant, (setcc x, y, eq/neq)) ->
14488 // (add (setcc x, y, neq/eq), constant - 1)
14489 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14490 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14491 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14492 return SDValue();
14493 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14494 NewLHS =
14495 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14496 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14497 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14498 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14499 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14500 NewLHS = N1.getOperand(0);
14501 } else
14502 return SDValue();
14503
14504 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14505 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14506}
14507
14508// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14509// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14510// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14511// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14513 const RISCVSubtarget &Subtarget) {
14514 if (!Subtarget.hasStdExtZbb())
14515 return SDValue();
14516
14517 EVT VT = N->getValueType(0);
14518
14519 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14520 return SDValue();
14521
14522 SDValue N0 = N->getOperand(0);
14523 SDValue N1 = N->getOperand(1);
14524
14525 if (N0->getOpcode() != ISD::SHL)
14526 return SDValue();
14527
14528 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14529 if (!ShAmtCLeft)
14530 return SDValue();
14531 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14532
14533 if (ShiftedAmount >= 8)
14534 return SDValue();
14535
14536 SDValue LeftShiftOperand = N0->getOperand(0);
14537 SDValue RightShiftOperand = N1;
14538
14539 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14540 if (N1->getOpcode() != ISD::SRL)
14541 return SDValue();
14542 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14543 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14544 return SDValue();
14545 RightShiftOperand = N1.getOperand(0);
14546 }
14547
14548 // At least one shift should have a single use.
14549 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14550 return SDValue();
14551
14552 if (LeftShiftOperand != RightShiftOperand)
14553 return SDValue();
14554
14555 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14556 Mask <<= ShiftedAmount;
14557 // Check that X has indeed the right shape (only the Y-th bit can be set in
14558 // every byte).
14559 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14560 return SDValue();
14561
14562 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14563}
14564
14566 const RISCVSubtarget &Subtarget) {
14567 if (SDValue V = combineSubOfBoolean(N, DAG))
14568 return V;
14569
14570 EVT VT = N->getValueType(0);
14571 SDValue N0 = N->getOperand(0);
14572 SDValue N1 = N->getOperand(1);
14573 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14574 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14575 isNullConstant(N1.getOperand(1))) {
14576 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14577 if (CCVal == ISD::SETLT) {
14578 SDLoc DL(N);
14579 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14580 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14581 DAG.getConstant(ShAmt, DL, VT));
14582 }
14583 }
14584
14585 if (SDValue V = combineBinOpOfZExt(N, DAG))
14586 return V;
14587 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14588 return V;
14589
14590 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14591 // (select lhs, rhs, cc, x, (sub x, y))
14592 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14593}
14594
14595// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14596// Legalizing setcc can introduce xors like this. Doing this transform reduces
14597// the number of xors and may allow the xor to fold into a branch condition.
14599 SDValue N0 = N->getOperand(0);
14600 SDValue N1 = N->getOperand(1);
14601 bool IsAnd = N->getOpcode() == ISD::AND;
14602
14603 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14604 return SDValue();
14605
14606 if (!N0.hasOneUse() || !N1.hasOneUse())
14607 return SDValue();
14608
14609 SDValue N01 = N0.getOperand(1);
14610 SDValue N11 = N1.getOperand(1);
14611
14612 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14613 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14614 // operation is And, allow one of the Xors to use -1.
14615 if (isOneConstant(N01)) {
14616 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14617 return SDValue();
14618 } else if (isOneConstant(N11)) {
14619 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14620 if (!(IsAnd && isAllOnesConstant(N01)))
14621 return SDValue();
14622 } else
14623 return SDValue();
14624
14625 EVT VT = N->getValueType(0);
14626
14627 SDValue N00 = N0.getOperand(0);
14628 SDValue N10 = N1.getOperand(0);
14629
14630 // The LHS of the xors needs to be 0/1.
14632 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14633 return SDValue();
14634
14635 // Invert the opcode and insert a new xor.
14636 SDLoc DL(N);
14637 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14638 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14639 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14640}
14641
14642// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14643// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14644// value to an unsigned value. This will be lowered to vmax and series of
14645// vnclipu instructions later. This can be extended to other truncated types
14646// other than i8 by replacing 256 and 255 with the equivalent constants for the
14647// type.
14649 EVT VT = N->getValueType(0);
14650 SDValue N0 = N->getOperand(0);
14651 EVT SrcVT = N0.getValueType();
14652
14653 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14654 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14655 return SDValue();
14656
14657 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14658 return SDValue();
14659
14660 SDValue Cond = N0.getOperand(0);
14661 SDValue True = N0.getOperand(1);
14662 SDValue False = N0.getOperand(2);
14663
14664 if (Cond.getOpcode() != ISD::SETCC)
14665 return SDValue();
14666
14667 // FIXME: Support the version of this pattern with the select operands
14668 // swapped.
14669 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14670 if (CCVal != ISD::SETULT)
14671 return SDValue();
14672
14673 SDValue CondLHS = Cond.getOperand(0);
14674 SDValue CondRHS = Cond.getOperand(1);
14675
14676 if (CondLHS != True)
14677 return SDValue();
14678
14679 unsigned ScalarBits = VT.getScalarSizeInBits();
14680
14681 // FIXME: Support other constants.
14682 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14683 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14684 return SDValue();
14685
14686 if (False.getOpcode() != ISD::SIGN_EXTEND)
14687 return SDValue();
14688
14689 False = False.getOperand(0);
14690
14691 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14692 return SDValue();
14693
14694 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14695 if (!FalseRHSC || !FalseRHSC->isZero())
14696 return SDValue();
14697
14698 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14699 if (CCVal2 != ISD::SETGT)
14700 return SDValue();
14701
14702 // Emit the signed to unsigned saturation pattern.
14703 SDLoc DL(N);
14704 SDValue Max =
14705 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14706 SDValue Min =
14707 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14708 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14709 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14710}
14711
14713 const RISCVSubtarget &Subtarget) {
14714 SDValue N0 = N->getOperand(0);
14715 EVT VT = N->getValueType(0);
14716
14717 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14718 // extending X. This is safe since we only need the LSB after the shift and
14719 // shift amounts larger than 31 would produce poison. If we wait until
14720 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14721 // to use a BEXT instruction.
14722 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14723 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14724 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14725 SDLoc DL(N0);
14726 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14727 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14728 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14729 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14730 }
14731
14732 return combineTruncSelectToSMaxUSat(N, DAG);
14733}
14734
14735// Combines two comparison operation and logic operation to one selection
14736// operation(min, max) and logic operation. Returns new constructed Node if
14737// conditions for optimization are satisfied.
14740 const RISCVSubtarget &Subtarget) {
14741 SelectionDAG &DAG = DCI.DAG;
14742
14743 SDValue N0 = N->getOperand(0);
14744 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14745 // extending X. This is safe since we only need the LSB after the shift and
14746 // shift amounts larger than 31 would produce poison. If we wait until
14747 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14748 // to use a BEXT instruction.
14749 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14750 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14751 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14752 N0.hasOneUse()) {
14753 SDLoc DL(N);
14754 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14755 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14756 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14757 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14758 DAG.getConstant(1, DL, MVT::i64));
14759 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14760 }
14761
14762 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14763 return V;
14764 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14765 return V;
14766
14767 if (DCI.isAfterLegalizeDAG())
14768 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14769 return V;
14770
14771 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14772 // (select lhs, rhs, cc, x, (and x, y))
14773 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14774}
14775
14776// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14777// FIXME: Generalize to other binary operators with same operand.
14779 SelectionDAG &DAG) {
14780 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14781
14782 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14784 !N0.hasOneUse() || !N1.hasOneUse())
14785 return SDValue();
14786
14787 // Should have the same condition.
14788 SDValue Cond = N0.getOperand(1);
14789 if (Cond != N1.getOperand(1))
14790 return SDValue();
14791
14792 SDValue TrueV = N0.getOperand(0);
14793 SDValue FalseV = N1.getOperand(0);
14794
14795 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14796 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14797 !isOneConstant(TrueV.getOperand(1)) ||
14798 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14799 return SDValue();
14800
14801 EVT VT = N->getValueType(0);
14802 SDLoc DL(N);
14803
14804 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14805 Cond);
14806 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14807 Cond);
14808 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14809 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14810}
14811
14813 const RISCVSubtarget &Subtarget) {
14814 SelectionDAG &DAG = DCI.DAG;
14815
14816 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14817 return V;
14818 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14819 return V;
14820
14821 if (DCI.isAfterLegalizeDAG())
14822 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14823 return V;
14824
14825 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14826 // We may be able to pull a common operation out of the true and false value.
14827 SDValue N0 = N->getOperand(0);
14828 SDValue N1 = N->getOperand(1);
14829 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14830 return V;
14831 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14832 return V;
14833
14834 // fold (or (select cond, 0, y), x) ->
14835 // (select cond, x, (or x, y))
14836 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14837}
14838
14840 const RISCVSubtarget &Subtarget) {
14841 SDValue N0 = N->getOperand(0);
14842 SDValue N1 = N->getOperand(1);
14843
14844 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14845 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14846 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14847 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14848 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14849 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14850 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14851 SDLoc DL(N);
14852 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14853 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14854 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14855 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14856 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14857 }
14858
14859 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14860 // NOTE: Assumes ROL being legal means ROLW is legal.
14861 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14862 if (N0.getOpcode() == RISCVISD::SLLW &&
14864 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14865 SDLoc DL(N);
14866 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14867 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14868 }
14869
14870 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14871 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14872 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14873 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14874 if (ConstN00 && CC == ISD::SETLT) {
14875 EVT VT = N0.getValueType();
14876 SDLoc DL(N0);
14877 const APInt &Imm = ConstN00->getAPIntValue();
14878 if ((Imm + 1).isSignedIntN(12))
14879 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14880 DAG.getConstant(Imm + 1, DL, VT), CC);
14881 }
14882 }
14883
14884 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14885 return V;
14886 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14887 return V;
14888
14889 // fold (xor (select cond, 0, y), x) ->
14890 // (select cond, x, (xor x, y))
14891 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14892}
14893
14894// Try to expand a scalar multiply to a faster sequence.
14897 const RISCVSubtarget &Subtarget) {
14898
14899 EVT VT = N->getValueType(0);
14900
14901 // LI + MUL is usually smaller than the alternative sequence.
14903 return SDValue();
14904
14905 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14906 return SDValue();
14907
14908 if (VT != Subtarget.getXLenVT())
14909 return SDValue();
14910
14911 const bool HasShlAdd =
14912 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14913
14914 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14915 if (!CNode)
14916 return SDValue();
14917 uint64_t MulAmt = CNode->getZExtValue();
14918
14919 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14920 // We're adding additional uses of X here, and in principle, we should be freezing
14921 // X before doing so. However, adding freeze here causes real regressions, and no
14922 // other target properly freezes X in these cases either.
14923 SDValue X = N->getOperand(0);
14924
14925 if (HasShlAdd) {
14926 for (uint64_t Divisor : {3, 5, 9}) {
14927 if (MulAmt % Divisor != 0)
14928 continue;
14929 uint64_t MulAmt2 = MulAmt / Divisor;
14930 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14931 if (isPowerOf2_64(MulAmt2)) {
14932 SDLoc DL(N);
14933 SDValue X = N->getOperand(0);
14934 // Put the shift first if we can fold a zext into the
14935 // shift forming a slli.uw.
14936 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14937 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14938 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14939 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14940 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14941 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14942 Shl);
14943 }
14944 // Otherwise, put rhe shl second so that it can fold with following
14945 // instructions (e.g. sext or add).
14946 SDValue Mul359 =
14947 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14948 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14949 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14950 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14951 }
14952
14953 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14954 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14955 SDLoc DL(N);
14956 SDValue Mul359 =
14957 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14958 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14959 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14960 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14961 Mul359);
14962 }
14963 }
14964
14965 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14966 // shXadd. First check if this a sum of two power of 2s because that's
14967 // easy. Then count how many zeros are up to the first bit.
14968 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14969 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14970 if (ScaleShift >= 1 && ScaleShift < 4) {
14971 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14972 SDLoc DL(N);
14973 SDValue Shift1 =
14974 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14975 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14976 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14977 }
14978 }
14979
14980 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14981 // This is the two instruction form, there are also three instruction
14982 // variants we could implement. e.g.
14983 // (2^(1,2,3) * 3,5,9 + 1) << C2
14984 // 2^(C1>3) * 3,5,9 +/- 1
14985 for (uint64_t Divisor : {3, 5, 9}) {
14986 uint64_t C = MulAmt - 1;
14987 if (C <= Divisor)
14988 continue;
14989 unsigned TZ = llvm::countr_zero(C);
14990 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14991 SDLoc DL(N);
14992 SDValue Mul359 =
14993 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14994 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14995 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14996 DAG.getConstant(TZ, DL, VT), X);
14997 }
14998 }
14999
15000 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
15001 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
15002 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
15003 if (ScaleShift >= 1 && ScaleShift < 4) {
15004 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
15005 SDLoc DL(N);
15006 SDValue Shift1 =
15007 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
15008 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
15009 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
15010 DAG.getConstant(ScaleShift, DL, VT), X));
15011 }
15012 }
15013
15014 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
15015 for (uint64_t Offset : {3, 5, 9}) {
15016 if (isPowerOf2_64(MulAmt + Offset)) {
15017 SDLoc DL(N);
15018 SDValue Shift1 =
15019 DAG.getNode(ISD::SHL, DL, VT, X,
15020 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
15021 SDValue Mul359 =
15022 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
15023 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
15024 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
15025 }
15026 }
15027 }
15028
15029 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
15030 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15031 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
15032 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
15033 SDLoc DL(N);
15034 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15035 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
15036 SDValue Shift2 =
15037 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15038 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
15039 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
15040 }
15041
15042 if (HasShlAdd) {
15043 for (uint64_t Divisor : {3, 5, 9}) {
15044 if (MulAmt % Divisor != 0)
15045 continue;
15046 uint64_t MulAmt2 = MulAmt / Divisor;
15047 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
15048 // of 25 which happen to be quite common.
15049 for (uint64_t Divisor2 : {3, 5, 9}) {
15050 if (MulAmt2 % Divisor2 != 0)
15051 continue;
15052 uint64_t MulAmt3 = MulAmt2 / Divisor2;
15053 if (isPowerOf2_64(MulAmt3)) {
15054 SDLoc DL(N);
15055 SDValue Mul359A =
15056 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
15057 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
15058 SDValue Mul359B = DAG.getNode(
15059 RISCVISD::SHL_ADD, DL, VT, Mul359A,
15060 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
15061 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
15062 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
15063 }
15064 }
15065 }
15066 }
15067
15068 return SDValue();
15069}
15070
15071// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
15072// (bitcast (sra (v2Xi16 (bitcast X)), 15))
15073// Same for other equivalent types with other equivalent constants.
15075 EVT VT = N->getValueType(0);
15076 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15077
15078 // Do this for legal vectors unless they are i1 or i8 vectors.
15079 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
15080 return SDValue();
15081
15082 if (N->getOperand(0).getOpcode() != ISD::AND ||
15083 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
15084 return SDValue();
15085
15086 SDValue And = N->getOperand(0);
15087 SDValue Srl = And.getOperand(0);
15088
15089 APInt V1, V2, V3;
15090 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
15091 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
15093 return SDValue();
15094
15095 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
15096 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
15097 V3 != (HalfSize - 1))
15098 return SDValue();
15099
15100 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
15101 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
15102 VT.getVectorElementCount() * 2);
15103 SDLoc DL(N);
15104 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
15105 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
15106 DAG.getConstant(HalfSize - 1, DL, HalfVT));
15107 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
15108}
15109
15112 const RISCVSubtarget &Subtarget) {
15113 EVT VT = N->getValueType(0);
15114 if (!VT.isVector())
15115 return expandMul(N, DAG, DCI, Subtarget);
15116
15117 SDLoc DL(N);
15118 SDValue N0 = N->getOperand(0);
15119 SDValue N1 = N->getOperand(1);
15120 SDValue MulOper;
15121 unsigned AddSubOpc;
15122
15123 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
15124 // (mul x, add (y, 1)) -> (add x, (mul x, y))
15125 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
15126 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
15127 auto IsAddSubWith1 = [&](SDValue V) -> bool {
15128 AddSubOpc = V->getOpcode();
15129 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
15130 SDValue Opnd = V->getOperand(1);
15131 MulOper = V->getOperand(0);
15132 if (AddSubOpc == ISD::SUB)
15133 std::swap(Opnd, MulOper);
15134 if (isOneOrOneSplat(Opnd))
15135 return true;
15136 }
15137 return false;
15138 };
15139
15140 if (IsAddSubWith1(N0)) {
15141 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
15142 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
15143 }
15144
15145 if (IsAddSubWith1(N1)) {
15146 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
15147 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
15148 }
15149
15150 if (SDValue V = combineBinOpOfZExt(N, DAG))
15151 return V;
15152
15154 return V;
15155
15156 return SDValue();
15157}
15158
15159/// According to the property that indexed load/store instructions zero-extend
15160/// their indices, try to narrow the type of index operand.
15161static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
15162 if (isIndexTypeSigned(IndexType))
15163 return false;
15164
15165 if (!N->hasOneUse())
15166 return false;
15167
15168 EVT VT = N.getValueType();
15169 SDLoc DL(N);
15170
15171 // In general, what we're doing here is seeing if we can sink a truncate to
15172 // a smaller element type into the expression tree building our index.
15173 // TODO: We can generalize this and handle a bunch more cases if useful.
15174
15175 // Narrow a buildvector to the narrowest element type. This requires less
15176 // work and less register pressure at high LMUL, and creates smaller constants
15177 // which may be cheaper to materialize.
15178 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
15179 KnownBits Known = DAG.computeKnownBits(N);
15180 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
15181 LLVMContext &C = *DAG.getContext();
15182 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
15183 if (ResultVT.bitsLT(VT.getVectorElementType())) {
15184 N = DAG.getNode(ISD::TRUNCATE, DL,
15185 VT.changeVectorElementType(ResultVT), N);
15186 return true;
15187 }
15188 }
15189
15190 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
15191 if (N.getOpcode() != ISD::SHL)
15192 return false;
15193
15194 SDValue N0 = N.getOperand(0);
15195 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
15197 return false;
15198 if (!N0->hasOneUse())
15199 return false;
15200
15201 APInt ShAmt;
15202 SDValue N1 = N.getOperand(1);
15203 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
15204 return false;
15205
15206 SDValue Src = N0.getOperand(0);
15207 EVT SrcVT = Src.getValueType();
15208 unsigned SrcElen = SrcVT.getScalarSizeInBits();
15209 unsigned ShAmtV = ShAmt.getZExtValue();
15210 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
15211 NewElen = std::max(NewElen, 8U);
15212
15213 // Skip if NewElen is not narrower than the original extended type.
15214 if (NewElen >= N0.getValueType().getScalarSizeInBits())
15215 return false;
15216
15217 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
15218 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
15219
15220 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
15221 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
15222 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
15223 return true;
15224}
15225
15226// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
15227// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
15228// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
15229// can become a sext.w instead of a shift pair.
15231 const RISCVSubtarget &Subtarget) {
15232 SDValue N0 = N->getOperand(0);
15233 SDValue N1 = N->getOperand(1);
15234 EVT VT = N->getValueType(0);
15235 EVT OpVT = N0.getValueType();
15236
15237 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
15238 return SDValue();
15239
15240 // RHS needs to be a constant.
15241 auto *N1C = dyn_cast<ConstantSDNode>(N1);
15242 if (!N1C)
15243 return SDValue();
15244
15245 // LHS needs to be (and X, 0xffffffff).
15246 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
15247 !isa<ConstantSDNode>(N0.getOperand(1)) ||
15248 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
15249 return SDValue();
15250
15251 // Looking for an equality compare.
15252 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
15253 if (!isIntEqualitySetCC(Cond))
15254 return SDValue();
15255
15256 // Don't do this if the sign bit is provably zero, it will be turned back into
15257 // an AND.
15258 APInt SignMask = APInt::getOneBitSet(64, 31);
15259 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
15260 return SDValue();
15261
15262 const APInt &C1 = N1C->getAPIntValue();
15263
15264 SDLoc dl(N);
15265 // If the constant is larger than 2^32 - 1 it is impossible for both sides
15266 // to be equal.
15267 if (C1.getActiveBits() > 32)
15268 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
15269
15270 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
15271 N0.getOperand(0), DAG.getValueType(MVT::i32));
15272 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
15273 dl, OpVT), Cond);
15274}
15275
15276static SDValue
15278 const RISCVSubtarget &Subtarget) {
15279 SDValue Src = N->getOperand(0);
15280 EVT VT = N->getValueType(0);
15281 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15282 unsigned Opc = Src.getOpcode();
15283
15284 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
15285 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
15286 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
15287 Subtarget.hasStdExtZfhmin())
15288 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
15289 Src.getOperand(0));
15290
15291 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
15292 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
15293 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
15294 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
15295 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
15296 Src.getOperand(1));
15297
15298 return SDValue();
15299}
15300
15301namespace {
15302// Forward declaration of the structure holding the necessary information to
15303// apply a combine.
15304struct CombineResult;
15305
15306enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
15307/// Helper class for folding sign/zero extensions.
15308/// In particular, this class is used for the following combines:
15309/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15310/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15311/// mul | mul_vl -> vwmul(u) | vwmul_su
15312/// shl | shl_vl -> vwsll
15313/// fadd -> vfwadd | vfwadd_w
15314/// fsub -> vfwsub | vfwsub_w
15315/// fmul -> vfwmul
15316/// An object of this class represents an operand of the operation we want to
15317/// combine.
15318/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
15319/// NodeExtensionHelper for `a` and one for `b`.
15320///
15321/// This class abstracts away how the extension is materialized and
15322/// how its number of users affect the combines.
15323///
15324/// In particular:
15325/// - VWADD_W is conceptually == add(op0, sext(op1))
15326/// - VWADDU_W == add(op0, zext(op1))
15327/// - VWSUB_W == sub(op0, sext(op1))
15328/// - VWSUBU_W == sub(op0, zext(op1))
15329/// - VFWADD_W == fadd(op0, fpext(op1))
15330/// - VFWSUB_W == fsub(op0, fpext(op1))
15331/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15332/// zext|sext(smaller_value).
15333struct NodeExtensionHelper {
15334 /// Records if this operand is like being zero extended.
15335 bool SupportsZExt;
15336 /// Records if this operand is like being sign extended.
15337 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15338 /// instance, a splat constant (e.g., 3), would support being both sign and
15339 /// zero extended.
15340 bool SupportsSExt;
15341 /// Records if this operand is like being floating-Point extended.
15342 bool SupportsFPExt;
15343 /// This boolean captures whether we care if this operand would still be
15344 /// around after the folding happens.
15345 bool EnforceOneUse;
15346 /// Original value that this NodeExtensionHelper represents.
15347 SDValue OrigOperand;
15348
15349 /// Get the value feeding the extension or the value itself.
15350 /// E.g., for zext(a), this would return a.
15351 SDValue getSource() const {
15352 switch (OrigOperand.getOpcode()) {
15353 case ISD::ZERO_EXTEND:
15354 case ISD::SIGN_EXTEND:
15355 case RISCVISD::VSEXT_VL:
15356 case RISCVISD::VZEXT_VL:
15358 return OrigOperand.getOperand(0);
15359 default:
15360 return OrigOperand;
15361 }
15362 }
15363
15364 /// Check if this instance represents a splat.
15365 bool isSplat() const {
15366 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15367 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15368 }
15369
15370 /// Get the extended opcode.
15371 unsigned getExtOpc(ExtKind SupportsExt) const {
15372 switch (SupportsExt) {
15373 case ExtKind::SExt:
15374 return RISCVISD::VSEXT_VL;
15375 case ExtKind::ZExt:
15376 return RISCVISD::VZEXT_VL;
15377 case ExtKind::FPExt:
15379 }
15380 llvm_unreachable("Unknown ExtKind enum");
15381 }
15382
15383 /// Get or create a value that can feed \p Root with the given extension \p
15384 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15385 /// operand. \see ::getSource().
15386 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15387 const RISCVSubtarget &Subtarget,
15388 std::optional<ExtKind> SupportsExt) const {
15389 if (!SupportsExt.has_value())
15390 return OrigOperand;
15391
15392 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15393
15394 SDValue Source = getSource();
15395 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15396 if (Source.getValueType() == NarrowVT)
15397 return Source;
15398
15399 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15400 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15401 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15402 Root->getOpcode() == RISCVISD::VFMADD_VL);
15403 return Source;
15404 }
15405
15406 unsigned ExtOpc = getExtOpc(*SupportsExt);
15407
15408 // If we need an extension, we should be changing the type.
15409 SDLoc DL(OrigOperand);
15410 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15411 switch (OrigOperand.getOpcode()) {
15412 case ISD::ZERO_EXTEND:
15413 case ISD::SIGN_EXTEND:
15414 case RISCVISD::VSEXT_VL:
15415 case RISCVISD::VZEXT_VL:
15417 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15418 case ISD::SPLAT_VECTOR:
15419 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15421 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15422 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15424 Source = Source.getOperand(1);
15425 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15426 Source = Source.getOperand(0);
15427 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15428 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15429 DAG.getUNDEF(NarrowVT), Source, VL);
15430 default:
15431 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15432 // and that operand should already have the right NarrowVT so no
15433 // extension should be required at this point.
15434 llvm_unreachable("Unsupported opcode");
15435 }
15436 }
15437
15438 /// Helper function to get the narrow type for \p Root.
15439 /// The narrow type is the type of \p Root where we divided the size of each
15440 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15441 /// \pre Both the narrow type and the original type should be legal.
15442 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15443 MVT VT = Root->getSimpleValueType(0);
15444
15445 // Determine the narrow size.
15446 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15447
15448 MVT EltVT = SupportsExt == ExtKind::FPExt
15449 ? MVT::getFloatingPointVT(NarrowSize)
15450 : MVT::getIntegerVT(NarrowSize);
15451
15452 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15453 "Trying to extend something we can't represent");
15454 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15455 return NarrowVT;
15456 }
15457
15458 /// Get the opcode to materialize:
15459 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15460 static unsigned getSExtOpcode(unsigned Opcode) {
15461 switch (Opcode) {
15462 case ISD::ADD:
15463 case RISCVISD::ADD_VL:
15466 case ISD::OR:
15467 return RISCVISD::VWADD_VL;
15468 case ISD::SUB:
15469 case RISCVISD::SUB_VL:
15472 return RISCVISD::VWSUB_VL;
15473 case ISD::MUL:
15474 case RISCVISD::MUL_VL:
15475 return RISCVISD::VWMUL_VL;
15476 default:
15477 llvm_unreachable("Unexpected opcode");
15478 }
15479 }
15480
15481 /// Get the opcode to materialize:
15482 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15483 static unsigned getZExtOpcode(unsigned Opcode) {
15484 switch (Opcode) {
15485 case ISD::ADD:
15486 case RISCVISD::ADD_VL:
15489 case ISD::OR:
15490 return RISCVISD::VWADDU_VL;
15491 case ISD::SUB:
15492 case RISCVISD::SUB_VL:
15495 return RISCVISD::VWSUBU_VL;
15496 case ISD::MUL:
15497 case RISCVISD::MUL_VL:
15498 return RISCVISD::VWMULU_VL;
15499 case ISD::SHL:
15500 case RISCVISD::SHL_VL:
15501 return RISCVISD::VWSLL_VL;
15502 default:
15503 llvm_unreachable("Unexpected opcode");
15504 }
15505 }
15506
15507 /// Get the opcode to materialize:
15508 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15509 static unsigned getFPExtOpcode(unsigned Opcode) {
15510 switch (Opcode) {
15511 case RISCVISD::FADD_VL:
15513 return RISCVISD::VFWADD_VL;
15514 case RISCVISD::FSUB_VL:
15516 return RISCVISD::VFWSUB_VL;
15517 case RISCVISD::FMUL_VL:
15518 return RISCVISD::VFWMUL_VL;
15520 return RISCVISD::VFWMADD_VL;
15522 return RISCVISD::VFWMSUB_VL;
15524 return RISCVISD::VFWNMADD_VL;
15526 return RISCVISD::VFWNMSUB_VL;
15527 default:
15528 llvm_unreachable("Unexpected opcode");
15529 }
15530 }
15531
15532 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15533 /// newOpcode(a, b).
15534 static unsigned getSUOpcode(unsigned Opcode) {
15535 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15536 "SU is only supported for MUL");
15537 return RISCVISD::VWMULSU_VL;
15538 }
15539
15540 /// Get the opcode to materialize
15541 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15542 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15543 switch (Opcode) {
15544 case ISD::ADD:
15545 case RISCVISD::ADD_VL:
15546 case ISD::OR:
15547 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15549 case ISD::SUB:
15550 case RISCVISD::SUB_VL:
15551 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15553 case RISCVISD::FADD_VL:
15554 return RISCVISD::VFWADD_W_VL;
15555 case RISCVISD::FSUB_VL:
15556 return RISCVISD::VFWSUB_W_VL;
15557 default:
15558 llvm_unreachable("Unexpected opcode");
15559 }
15560 }
15561
15562 using CombineToTry = std::function<std::optional<CombineResult>(
15563 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15564 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15565 const RISCVSubtarget &)>;
15566
15567 /// Check if this node needs to be fully folded or extended for all users.
15568 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15569
15570 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15571 const RISCVSubtarget &Subtarget) {
15572 unsigned Opc = OrigOperand.getOpcode();
15573 MVT VT = OrigOperand.getSimpleValueType();
15574
15575 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15576 "Unexpected Opcode");
15577
15578 // The pasthru must be undef for tail agnostic.
15579 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15580 return;
15581
15582 // Get the scalar value.
15583 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15584 : OrigOperand.getOperand(1);
15585
15586 // See if we have enough sign bits or zero bits in the scalar to use a
15587 // widening opcode by splatting to smaller element size.
15588 unsigned EltBits = VT.getScalarSizeInBits();
15589 unsigned ScalarBits = Op.getValueSizeInBits();
15590 // If we're not getting all bits from the element, we need special handling.
15591 if (ScalarBits < EltBits) {
15592 // This should only occur on RV32.
15593 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15594 !Subtarget.is64Bit() && "Unexpected splat");
15595 // vmv.v.x sign extends narrow inputs.
15596 SupportsSExt = true;
15597
15598 // If the input is positive, then sign extend is also zero extend.
15599 if (DAG.SignBitIsZero(Op))
15600 SupportsZExt = true;
15601
15602 EnforceOneUse = false;
15603 return;
15604 }
15605
15606 unsigned NarrowSize = EltBits / 2;
15607 // If the narrow type cannot be expressed with a legal VMV,
15608 // this is not a valid candidate.
15609 if (NarrowSize < 8)
15610 return;
15611
15612 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15613 SupportsSExt = true;
15614
15615 if (DAG.MaskedValueIsZero(Op,
15616 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15617 SupportsZExt = true;
15618
15619 EnforceOneUse = false;
15620 }
15621
15622 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15623 const RISCVSubtarget &Subtarget) {
15624 // Any f16 extension will need zvfh
15625 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15626 return false;
15627 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15628 // zvfbfwma
15629 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15630 Root->getOpcode() != RISCVISD::VFMADD_VL))
15631 return false;
15632 return true;
15633 }
15634
15635 /// Helper method to set the various fields of this struct based on the
15636 /// type of \p Root.
15637 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15638 const RISCVSubtarget &Subtarget) {
15639 SupportsZExt = false;
15640 SupportsSExt = false;
15641 SupportsFPExt = false;
15642 EnforceOneUse = true;
15643 unsigned Opc = OrigOperand.getOpcode();
15644 // For the nodes we handle below, we end up using their inputs directly: see
15645 // getSource(). However since they either don't have a passthru or we check
15646 // that their passthru is undef, we can safely ignore their mask and VL.
15647 switch (Opc) {
15648 case ISD::ZERO_EXTEND:
15649 case ISD::SIGN_EXTEND: {
15650 MVT VT = OrigOperand.getSimpleValueType();
15651 if (!VT.isVector())
15652 break;
15653
15654 SDValue NarrowElt = OrigOperand.getOperand(0);
15655 MVT NarrowVT = NarrowElt.getSimpleValueType();
15656 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15657 if (NarrowVT.getVectorElementType() == MVT::i1)
15658 break;
15659
15660 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15661 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15662 break;
15663 }
15664 case RISCVISD::VZEXT_VL:
15665 SupportsZExt = true;
15666 break;
15667 case RISCVISD::VSEXT_VL:
15668 SupportsSExt = true;
15669 break;
15671 MVT NarrowEltVT =
15673 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15674 break;
15675 SupportsFPExt = true;
15676 break;
15677 }
15678 case ISD::SPLAT_VECTOR:
15680 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15681 break;
15682 case RISCVISD::VFMV_V_F_VL: {
15683 MVT VT = OrigOperand.getSimpleValueType();
15684
15685 if (!OrigOperand.getOperand(0).isUndef())
15686 break;
15687
15688 SDValue Op = OrigOperand.getOperand(1);
15689 if (Op.getOpcode() != ISD::FP_EXTEND)
15690 break;
15691
15692 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15693 Subtarget))
15694 break;
15695
15696 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15697 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15698 if (NarrowSize != ScalarBits)
15699 break;
15700
15701 SupportsFPExt = true;
15702 break;
15703 }
15704 default:
15705 break;
15706 }
15707 }
15708
15709 /// Check if \p Root supports any extension folding combines.
15710 static bool isSupportedRoot(const SDNode *Root,
15711 const RISCVSubtarget &Subtarget) {
15712 switch (Root->getOpcode()) {
15713 case ISD::ADD:
15714 case ISD::SUB:
15715 case ISD::MUL: {
15716 return Root->getValueType(0).isScalableVector();
15717 }
15718 case ISD::OR: {
15719 return Root->getValueType(0).isScalableVector() &&
15720 Root->getFlags().hasDisjoint();
15721 }
15722 // Vector Widening Integer Add/Sub/Mul Instructions
15723 case RISCVISD::ADD_VL:
15724 case RISCVISD::MUL_VL:
15727 case RISCVISD::SUB_VL:
15730 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15731 case RISCVISD::FADD_VL:
15732 case RISCVISD::FSUB_VL:
15733 case RISCVISD::FMUL_VL:
15736 return true;
15737 case ISD::SHL:
15738 return Root->getValueType(0).isScalableVector() &&
15739 Subtarget.hasStdExtZvbb();
15740 case RISCVISD::SHL_VL:
15741 return Subtarget.hasStdExtZvbb();
15746 return true;
15747 default:
15748 return false;
15749 }
15750 }
15751
15752 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15753 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15754 const RISCVSubtarget &Subtarget) {
15755 assert(isSupportedRoot(Root, Subtarget) &&
15756 "Trying to build an helper with an "
15757 "unsupported root");
15758 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15760 OrigOperand = Root->getOperand(OperandIdx);
15761
15762 unsigned Opc = Root->getOpcode();
15763 switch (Opc) {
15764 // We consider
15765 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15766 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15767 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15774 if (OperandIdx == 1) {
15775 SupportsZExt =
15777 SupportsSExt =
15779 SupportsFPExt =
15781 // There's no existing extension here, so we don't have to worry about
15782 // making sure it gets removed.
15783 EnforceOneUse = false;
15784 break;
15785 }
15786 [[fallthrough]];
15787 default:
15788 fillUpExtensionSupport(Root, DAG, Subtarget);
15789 break;
15790 }
15791 }
15792
15793 /// Helper function to get the Mask and VL from \p Root.
15794 static std::pair<SDValue, SDValue>
15795 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15796 const RISCVSubtarget &Subtarget) {
15797 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15798 switch (Root->getOpcode()) {
15799 case ISD::ADD:
15800 case ISD::SUB:
15801 case ISD::MUL:
15802 case ISD::OR:
15803 case ISD::SHL: {
15804 SDLoc DL(Root);
15805 MVT VT = Root->getSimpleValueType(0);
15806 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15807 }
15808 default:
15809 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15810 }
15811 }
15812
15813 /// Helper function to check if \p N is commutative with respect to the
15814 /// foldings that are supported by this class.
15815 static bool isCommutative(const SDNode *N) {
15816 switch (N->getOpcode()) {
15817 case ISD::ADD:
15818 case ISD::MUL:
15819 case ISD::OR:
15820 case RISCVISD::ADD_VL:
15821 case RISCVISD::MUL_VL:
15824 case RISCVISD::FADD_VL:
15825 case RISCVISD::FMUL_VL:
15831 return true;
15832 case ISD::SUB:
15833 case RISCVISD::SUB_VL:
15836 case RISCVISD::FSUB_VL:
15838 case ISD::SHL:
15839 case RISCVISD::SHL_VL:
15840 return false;
15841 default:
15842 llvm_unreachable("Unexpected opcode");
15843 }
15844 }
15845
15846 /// Get a list of combine to try for folding extensions in \p Root.
15847 /// Note that each returned CombineToTry function doesn't actually modify
15848 /// anything. Instead they produce an optional CombineResult that if not None,
15849 /// need to be materialized for the combine to be applied.
15850 /// \see CombineResult::materialize.
15851 /// If the related CombineToTry function returns std::nullopt, that means the
15852 /// combine didn't match.
15853 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15854};
15855
15856/// Helper structure that holds all the necessary information to materialize a
15857/// combine that does some extension folding.
15858struct CombineResult {
15859 /// Opcode to be generated when materializing the combine.
15860 unsigned TargetOpcode;
15861 // No value means no extension is needed.
15862 std::optional<ExtKind> LHSExt;
15863 std::optional<ExtKind> RHSExt;
15864 /// Root of the combine.
15865 SDNode *Root;
15866 /// LHS of the TargetOpcode.
15867 NodeExtensionHelper LHS;
15868 /// RHS of the TargetOpcode.
15869 NodeExtensionHelper RHS;
15870
15871 CombineResult(unsigned TargetOpcode, SDNode *Root,
15872 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15873 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15874 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15875 LHS(LHS), RHS(RHS) {}
15876
15877 /// Return a value that uses TargetOpcode and that can be used to replace
15878 /// Root.
15879 /// The actual replacement is *not* done in that method.
15880 SDValue materialize(SelectionDAG &DAG,
15881 const RISCVSubtarget &Subtarget) const {
15882 SDValue Mask, VL, Passthru;
15883 std::tie(Mask, VL) =
15884 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15885 switch (Root->getOpcode()) {
15886 default:
15887 Passthru = Root->getOperand(2);
15888 break;
15889 case ISD::ADD:
15890 case ISD::SUB:
15891 case ISD::MUL:
15892 case ISD::OR:
15893 case ISD::SHL:
15894 Passthru = DAG.getUNDEF(Root->getValueType(0));
15895 break;
15896 }
15897 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15898 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15899 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15900 Passthru, Mask, VL);
15901 }
15902};
15903
15904/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15905/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15906/// are zext) and LHS and RHS can be folded into Root.
15907/// AllowExtMask define which form `ext` can take in this pattern.
15908///
15909/// \note If the pattern can match with both zext and sext, the returned
15910/// CombineResult will feature the zext result.
15911///
15912/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15913/// can be used to apply the pattern.
15914static std::optional<CombineResult>
15915canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15916 const NodeExtensionHelper &RHS,
15917 uint8_t AllowExtMask, SelectionDAG &DAG,
15918 const RISCVSubtarget &Subtarget) {
15919 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15920 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15921 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15922 /*RHSExt=*/{ExtKind::ZExt});
15923 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15924 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15925 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15926 /*RHSExt=*/{ExtKind::SExt});
15927 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15928 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15929 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15930 /*RHSExt=*/{ExtKind::FPExt});
15931 return std::nullopt;
15932}
15933
15934/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15935/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15936/// are zext) and LHS and RHS can be folded into Root.
15937///
15938/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15939/// can be used to apply the pattern.
15940static std::optional<CombineResult>
15941canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15942 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15943 const RISCVSubtarget &Subtarget) {
15944 return canFoldToVWWithSameExtensionImpl(
15945 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15946 Subtarget);
15947}
15948
15949/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15950///
15951/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15952/// can be used to apply the pattern.
15953static std::optional<CombineResult>
15954canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15955 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15956 const RISCVSubtarget &Subtarget) {
15957 if (RHS.SupportsFPExt)
15958 return CombineResult(
15959 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15960 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15961
15962 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15963 // sext/zext?
15964 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15965 // purposes.
15966 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15967 return CombineResult(
15968 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15969 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15970 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15971 return CombineResult(
15972 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15973 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15974 return std::nullopt;
15975}
15976
15977/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15978///
15979/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15980/// can be used to apply the pattern.
15981static std::optional<CombineResult>
15982canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15983 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15984 const RISCVSubtarget &Subtarget) {
15985 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15986 Subtarget);
15987}
15988
15989/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15990///
15991/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15992/// can be used to apply the pattern.
15993static std::optional<CombineResult>
15994canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15995 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15996 const RISCVSubtarget &Subtarget) {
15997 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15998 Subtarget);
15999}
16000
16001/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
16002///
16003/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
16004/// can be used to apply the pattern.
16005static std::optional<CombineResult>
16006canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
16007 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
16008 const RISCVSubtarget &Subtarget) {
16009 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
16010 Subtarget);
16011}
16012
16013/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
16014///
16015/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
16016/// can be used to apply the pattern.
16017static std::optional<CombineResult>
16018canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
16019 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
16020 const RISCVSubtarget &Subtarget) {
16021
16022 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
16023 return std::nullopt;
16024 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
16025 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
16026 /*RHSExt=*/{ExtKind::ZExt});
16027}
16028
16030NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
16031 SmallVector<CombineToTry> Strategies;
16032 switch (Root->getOpcode()) {
16033 case ISD::ADD:
16034 case ISD::SUB:
16035 case ISD::OR:
16036 case RISCVISD::ADD_VL:
16037 case RISCVISD::SUB_VL:
16038 case RISCVISD::FADD_VL:
16039 case RISCVISD::FSUB_VL:
16040 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
16041 Strategies.push_back(canFoldToVWWithSameExtension);
16042 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
16043 Strategies.push_back(canFoldToVW_W);
16044 break;
16045 case RISCVISD::FMUL_VL:
16050 Strategies.push_back(canFoldToVWWithSameExtension);
16051 break;
16052 case ISD::MUL:
16053 case RISCVISD::MUL_VL:
16054 // mul -> vwmul(u)
16055 Strategies.push_back(canFoldToVWWithSameExtension);
16056 // mul -> vwmulsu
16057 Strategies.push_back(canFoldToVW_SU);
16058 break;
16059 case ISD::SHL:
16060 case RISCVISD::SHL_VL:
16061 // shl -> vwsll
16062 Strategies.push_back(canFoldToVWWithZEXT);
16063 break;
16066 // vwadd_w|vwsub_w -> vwadd|vwsub
16067 Strategies.push_back(canFoldToVWWithSEXT);
16068 break;
16071 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
16072 Strategies.push_back(canFoldToVWWithZEXT);
16073 break;
16076 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
16077 Strategies.push_back(canFoldToVWWithFPEXT);
16078 break;
16079 default:
16080 llvm_unreachable("Unexpected opcode");
16081 }
16082 return Strategies;
16083}
16084} // End anonymous namespace.
16085
16086/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
16087/// The supported combines are:
16088/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16089/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16090/// mul | mul_vl -> vwmul(u) | vwmul_su
16091/// shl | shl_vl -> vwsll
16092/// fadd_vl -> vfwadd | vfwadd_w
16093/// fsub_vl -> vfwsub | vfwsub_w
16094/// fmul_vl -> vfwmul
16095/// vwadd_w(u) -> vwadd(u)
16096/// vwsub_w(u) -> vwsub(u)
16097/// vfwadd_w -> vfwadd
16098/// vfwsub_w -> vfwsub
16101 const RISCVSubtarget &Subtarget) {
16102 SelectionDAG &DAG = DCI.DAG;
16103 if (DCI.isBeforeLegalize())
16104 return SDValue();
16105
16106 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
16107 return SDValue();
16108
16109 SmallVector<SDNode *> Worklist;
16110 SmallSet<SDNode *, 8> Inserted;
16111 Worklist.push_back(N);
16112 Inserted.insert(N);
16113 SmallVector<CombineResult> CombinesToApply;
16114
16115 while (!Worklist.empty()) {
16116 SDNode *Root = Worklist.pop_back_val();
16117
16118 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
16119 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
16120 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
16121 &Inserted](const NodeExtensionHelper &Op) {
16122 if (Op.needToPromoteOtherUsers()) {
16123 for (SDUse &Use : Op.OrigOperand->uses()) {
16124 SDNode *TheUser = Use.getUser();
16125 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
16126 return false;
16127 // We only support the first 2 operands of FMA.
16128 if (Use.getOperandNo() >= 2)
16129 return false;
16130 if (Inserted.insert(TheUser).second)
16131 Worklist.push_back(TheUser);
16132 }
16133 }
16134 return true;
16135 };
16136
16137 // Control the compile time by limiting the number of node we look at in
16138 // total.
16139 if (Inserted.size() > ExtensionMaxWebSize)
16140 return SDValue();
16141
16143 NodeExtensionHelper::getSupportedFoldings(Root);
16144
16145 assert(!FoldingStrategies.empty() && "Nothing to be folded");
16146 bool Matched = false;
16147 for (int Attempt = 0;
16148 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
16149 ++Attempt) {
16150
16151 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
16152 FoldingStrategies) {
16153 std::optional<CombineResult> Res =
16154 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
16155 if (Res) {
16156 Matched = true;
16157 CombinesToApply.push_back(*Res);
16158 // All the inputs that are extended need to be folded, otherwise
16159 // we would be leaving the old input (since it is may still be used),
16160 // and the new one.
16161 if (Res->LHSExt.has_value())
16162 if (!AppendUsersIfNeeded(LHS))
16163 return SDValue();
16164 if (Res->RHSExt.has_value())
16165 if (!AppendUsersIfNeeded(RHS))
16166 return SDValue();
16167 break;
16168 }
16169 }
16170 std::swap(LHS, RHS);
16171 }
16172 // Right now we do an all or nothing approach.
16173 if (!Matched)
16174 return SDValue();
16175 }
16176 // Store the value for the replacement of the input node separately.
16177 SDValue InputRootReplacement;
16178 // We do the RAUW after we materialize all the combines, because some replaced
16179 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
16180 // some of these nodes may appear in the NodeExtensionHelpers of some of the
16181 // yet-to-be-visited CombinesToApply roots.
16183 ValuesToReplace.reserve(CombinesToApply.size());
16184 for (CombineResult Res : CombinesToApply) {
16185 SDValue NewValue = Res.materialize(DAG, Subtarget);
16186 if (!InputRootReplacement) {
16187 assert(Res.Root == N &&
16188 "First element is expected to be the current node");
16189 InputRootReplacement = NewValue;
16190 } else {
16191 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
16192 }
16193 }
16194 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
16195 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
16196 DCI.AddToWorklist(OldNewValues.second.getNode());
16197 }
16198 return InputRootReplacement;
16199}
16200
16201// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
16202// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
16203// y will be the Passthru and cond will be the Mask.
16205 unsigned Opc = N->getOpcode();
16208
16209 SDValue Y = N->getOperand(0);
16210 SDValue MergeOp = N->getOperand(1);
16211 unsigned MergeOpc = MergeOp.getOpcode();
16212
16213 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
16214 return SDValue();
16215
16216 SDValue X = MergeOp->getOperand(1);
16217
16218 if (!MergeOp.hasOneUse())
16219 return SDValue();
16220
16221 // Passthru should be undef
16222 SDValue Passthru = N->getOperand(2);
16223 if (!Passthru.isUndef())
16224 return SDValue();
16225
16226 // Mask should be all ones
16227 SDValue Mask = N->getOperand(3);
16228 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
16229 return SDValue();
16230
16231 // False value of MergeOp should be all zeros
16232 SDValue Z = MergeOp->getOperand(2);
16233
16234 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
16235 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
16236 Z = Z.getOperand(1);
16237
16238 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
16239 return SDValue();
16240
16241 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
16242 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
16243 N->getFlags());
16244}
16245
16248 const RISCVSubtarget &Subtarget) {
16249 [[maybe_unused]] unsigned Opc = N->getOpcode();
16252
16253 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
16254 return V;
16255
16256 return combineVWADDSUBWSelect(N, DCI.DAG);
16257}
16258
16259// Helper function for performMemPairCombine.
16260// Try to combine the memory loads/stores LSNode1 and LSNode2
16261// into a single memory pair operation.
16263 LSBaseSDNode *LSNode2, SDValue BasePtr,
16264 uint64_t Imm) {
16266 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
16267
16268 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
16269 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
16270 return SDValue();
16271
16273 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
16274
16275 // The new operation has twice the width.
16276 MVT XLenVT = Subtarget.getXLenVT();
16277 EVT MemVT = LSNode1->getMemoryVT();
16278 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
16279 MachineMemOperand *MMO = LSNode1->getMemOperand();
16281 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
16282
16283 if (LSNode1->getOpcode() == ISD::LOAD) {
16284 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
16285 unsigned Opcode;
16286 if (MemVT == MVT::i32)
16287 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
16288 else
16289 Opcode = RISCVISD::TH_LDD;
16290
16291 SDValue Res = DAG.getMemIntrinsicNode(
16292 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
16293 {LSNode1->getChain(), BasePtr,
16294 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
16295 NewMemVT, NewMMO);
16296
16297 SDValue Node1 =
16298 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
16299 SDValue Node2 =
16300 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
16301
16302 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
16303 return Node1;
16304 } else {
16305 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
16306
16307 SDValue Res = DAG.getMemIntrinsicNode(
16308 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
16309 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
16310 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
16311 NewMemVT, NewMMO);
16312
16313 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
16314 return Res;
16315 }
16316}
16317
16318// Try to combine two adjacent loads/stores to a single pair instruction from
16319// the XTHeadMemPair vendor extension.
16322 SelectionDAG &DAG = DCI.DAG;
16324 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
16325
16326 // Target does not support load/store pair.
16327 if (!Subtarget.hasVendorXTHeadMemPair())
16328 return SDValue();
16329
16330 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
16331 EVT MemVT = LSNode1->getMemoryVT();
16332 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16333
16334 // No volatile, indexed or atomic loads/stores.
16335 if (!LSNode1->isSimple() || LSNode1->isIndexed())
16336 return SDValue();
16337
16338 // Function to get a base + constant representation from a memory value.
16339 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16340 if (Ptr->getOpcode() == ISD::ADD)
16341 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16342 return {Ptr->getOperand(0), C1->getZExtValue()};
16343 return {Ptr, 0};
16344 };
16345
16346 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16347
16348 SDValue Chain = N->getOperand(0);
16349 for (SDUse &Use : Chain->uses()) {
16350 if (Use.getUser() != N && Use.getResNo() == 0 &&
16351 Use.getUser()->getOpcode() == N->getOpcode()) {
16352 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16353
16354 // No volatile, indexed or atomic loads/stores.
16355 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16356 continue;
16357
16358 // Check if LSNode1 and LSNode2 have the same type and extension.
16359 if (LSNode1->getOpcode() == ISD::LOAD)
16360 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16361 cast<LoadSDNode>(LSNode1)->getExtensionType())
16362 continue;
16363
16364 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16365 continue;
16366
16367 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16368
16369 // Check if the base pointer is the same for both instruction.
16370 if (Base1 != Base2)
16371 continue;
16372
16373 // Check if the offsets match the XTHeadMemPair encoding constraints.
16374 bool Valid = false;
16375 if (MemVT == MVT::i32) {
16376 // Check for adjacent i32 values and a 2-bit index.
16377 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16378 Valid = true;
16379 } else if (MemVT == MVT::i64) {
16380 // Check for adjacent i64 values and a 2-bit index.
16381 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16382 Valid = true;
16383 }
16384
16385 if (!Valid)
16386 continue;
16387
16388 // Try to combine.
16389 if (SDValue Res =
16390 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16391 return Res;
16392 }
16393 }
16394
16395 return SDValue();
16396}
16397
16398// Fold
16399// (fp_to_int (froundeven X)) -> fcvt X, rne
16400// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16401// (fp_to_int (ffloor X)) -> fcvt X, rdn
16402// (fp_to_int (fceil X)) -> fcvt X, rup
16403// (fp_to_int (fround X)) -> fcvt X, rmm
16404// (fp_to_int (frint X)) -> fcvt X
16407 const RISCVSubtarget &Subtarget) {
16408 SelectionDAG &DAG = DCI.DAG;
16409 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16410 MVT XLenVT = Subtarget.getXLenVT();
16411
16412 SDValue Src = N->getOperand(0);
16413
16414 // Don't do this for strict-fp Src.
16415 if (Src->isStrictFPOpcode())
16416 return SDValue();
16417
16418 // Ensure the FP type is legal.
16419 if (!TLI.isTypeLegal(Src.getValueType()))
16420 return SDValue();
16421
16422 // Don't do this for f16 with Zfhmin and not Zfh.
16423 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16424 return SDValue();
16425
16426 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16427 // If the result is invalid, we didn't find a foldable instruction.
16428 if (FRM == RISCVFPRndMode::Invalid)
16429 return SDValue();
16430
16431 SDLoc DL(N);
16432 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16433 EVT VT = N->getValueType(0);
16434
16435 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16436 MVT SrcVT = Src.getSimpleValueType();
16437 MVT SrcContainerVT = SrcVT;
16438 MVT ContainerVT = VT.getSimpleVT();
16439 SDValue XVal = Src.getOperand(0);
16440
16441 // For widening and narrowing conversions we just combine it into a
16442 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16443 // end up getting lowered to their appropriate pseudo instructions based on
16444 // their operand types
16445 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16446 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16447 return SDValue();
16448
16449 // Make fixed-length vectors scalable first
16450 if (SrcVT.isFixedLengthVector()) {
16451 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16452 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16453 ContainerVT =
16454 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16455 }
16456
16457 auto [Mask, VL] =
16458 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16459
16460 SDValue FpToInt;
16461 if (FRM == RISCVFPRndMode::RTZ) {
16462 // Use the dedicated trunc static rounding mode if we're truncating so we
16463 // don't need to generate calls to fsrmi/fsrm
16464 unsigned Opc =
16466 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16467 } else {
16468 unsigned Opc =
16470 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16471 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16472 }
16473
16474 // If converted from fixed-length to scalable, convert back
16475 if (VT.isFixedLengthVector())
16476 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16477
16478 return FpToInt;
16479 }
16480
16481 // Only handle XLen or i32 types. Other types narrower than XLen will
16482 // eventually be legalized to XLenVT.
16483 if (VT != MVT::i32 && VT != XLenVT)
16484 return SDValue();
16485
16486 unsigned Opc;
16487 if (VT == XLenVT)
16488 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16489 else
16491
16492 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16493 DAG.getTargetConstant(FRM, DL, XLenVT));
16494 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16495}
16496
16497// Fold
16498// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16499// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16500// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16501// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16502// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16503// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16506 const RISCVSubtarget &Subtarget) {
16507 SelectionDAG &DAG = DCI.DAG;
16508 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16509 MVT XLenVT = Subtarget.getXLenVT();
16510
16511 // Only handle XLen types. Other types narrower than XLen will eventually be
16512 // legalized to XLenVT.
16513 EVT DstVT = N->getValueType(0);
16514 if (DstVT != XLenVT)
16515 return SDValue();
16516
16517 SDValue Src = N->getOperand(0);
16518
16519 // Don't do this for strict-fp Src.
16520 if (Src->isStrictFPOpcode())
16521 return SDValue();
16522
16523 // Ensure the FP type is also legal.
16524 if (!TLI.isTypeLegal(Src.getValueType()))
16525 return SDValue();
16526
16527 // Don't do this for f16 with Zfhmin and not Zfh.
16528 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16529 return SDValue();
16530
16531 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16532
16533 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16534 if (FRM == RISCVFPRndMode::Invalid)
16535 return SDValue();
16536
16537 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16538
16539 unsigned Opc;
16540 if (SatVT == DstVT)
16541 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16542 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16544 else
16545 return SDValue();
16546 // FIXME: Support other SatVTs by clamping before or after the conversion.
16547
16548 Src = Src.getOperand(0);
16549
16550 SDLoc DL(N);
16551 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16552 DAG.getTargetConstant(FRM, DL, XLenVT));
16553
16554 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16555 // extend.
16556 if (Opc == RISCVISD::FCVT_WU_RV64)
16557 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16558
16559 // RISC-V FP-to-int conversions saturate to the destination register size, but
16560 // don't produce 0 for nan.
16561 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16562 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16563}
16564
16565// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16566// smaller than XLenVT.
16568 const RISCVSubtarget &Subtarget) {
16569 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16570
16571 SDValue Src = N->getOperand(0);
16572 if (Src.getOpcode() != ISD::BSWAP)
16573 return SDValue();
16574
16575 EVT VT = N->getValueType(0);
16576 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16577 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16578 return SDValue();
16579
16580 SDLoc DL(N);
16581 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16582}
16583
16585 const RISCVSubtarget &Subtarget) {
16586 // Fold:
16587 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16588
16589 // Check if its first operand is a vp.load.
16590 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16591 if (!VPLoad)
16592 return SDValue();
16593
16594 EVT LoadVT = VPLoad->getValueType(0);
16595 // We do not have a strided_load version for masks, and the evl of vp.reverse
16596 // and vp.load should always be the same.
16597 if (!LoadVT.getVectorElementType().isByteSized() ||
16598 N->getOperand(2) != VPLoad->getVectorLength() ||
16599 !N->getOperand(0).hasOneUse())
16600 return SDValue();
16601
16602 // Check if the mask of outer vp.reverse are all 1's.
16603 if (!isOneOrOneSplat(N->getOperand(1)))
16604 return SDValue();
16605
16606 SDValue LoadMask = VPLoad->getMask();
16607 // If Mask is all ones, then load is unmasked and can be reversed.
16608 if (!isOneOrOneSplat(LoadMask)) {
16609 // If the mask is not all ones, we can reverse the load if the mask was also
16610 // reversed by an unmasked vp.reverse with the same EVL.
16611 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16612 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16613 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16614 return SDValue();
16615 LoadMask = LoadMask.getOperand(0);
16616 }
16617
16618 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16619 SDLoc DL(N);
16620 MVT XLenVT = Subtarget.getXLenVT();
16621 SDValue NumElem = VPLoad->getVectorLength();
16622 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16623
16624 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16625 DAG.getConstant(1, DL, XLenVT));
16626 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16627 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16628 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16629 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
16630
16632 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16634 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16635 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16636
16637 SDValue Ret = DAG.getStridedLoadVP(
16638 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16639 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16640
16641 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16642
16643 return Ret;
16644}
16645
16647 const RISCVSubtarget &Subtarget) {
16648 // Fold:
16649 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16650 // -1, MASK)
16651 auto *VPStore = cast<VPStoreSDNode>(N);
16652
16653 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16654 return SDValue();
16655
16656 SDValue VPReverse = VPStore->getValue();
16657 EVT ReverseVT = VPReverse->getValueType(0);
16658
16659 // We do not have a strided_store version for masks, and the evl of vp.reverse
16660 // and vp.store should always be the same.
16661 if (!ReverseVT.getVectorElementType().isByteSized() ||
16662 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16663 !VPReverse.hasOneUse())
16664 return SDValue();
16665
16666 SDValue StoreMask = VPStore->getMask();
16667 // If Mask is all ones, then load is unmasked and can be reversed.
16668 if (!isOneOrOneSplat(StoreMask)) {
16669 // If the mask is not all ones, we can reverse the store if the mask was
16670 // also reversed by an unmasked vp.reverse with the same EVL.
16671 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16672 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16673 StoreMask.getOperand(2) != VPStore->getVectorLength())
16674 return SDValue();
16675 StoreMask = StoreMask.getOperand(0);
16676 }
16677
16678 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16679 SDLoc DL(N);
16680 MVT XLenVT = Subtarget.getXLenVT();
16681 SDValue NumElem = VPStore->getVectorLength();
16682 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16683
16684 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16685 DAG.getConstant(1, DL, XLenVT));
16686 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16687 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16688 SDValue Base =
16689 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16690 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
16691
16693 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16695 PtrInfo, VPStore->getMemOperand()->getFlags(),
16696 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16697
16698 return DAG.getStridedStoreVP(
16699 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16700 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16701 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16702 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16703}
16704
16705// Peephole avgceil pattern.
16706// %1 = zext <N x i8> %a to <N x i32>
16707// %2 = zext <N x i8> %b to <N x i32>
16708// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
16709// %4 = add nuw nsw <N x i32> %3, %2
16710// %5 = lshr <N x i32> %4, splat (i32 1)
16711// %6 = trunc <N x i32> %5 to <N x i8>
16713 const RISCVSubtarget &Subtarget) {
16714 EVT VT = N->getValueType(0);
16715
16716 // Ignore fixed vectors.
16717 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16718 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
16719 return SDValue();
16720
16721 SDValue In = N->getOperand(0);
16722 SDValue Mask = N->getOperand(1);
16723 SDValue VL = N->getOperand(2);
16724
16725 // Input should be a vp_srl with same mask and VL.
16726 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
16727 In.getOperand(3) != VL)
16728 return SDValue();
16729
16730 // Shift amount should be 1.
16731 if (!isOneOrOneSplat(In.getOperand(1)))
16732 return SDValue();
16733
16734 // Shifted value should be a vp_add with same mask and VL.
16735 SDValue LHS = In.getOperand(0);
16736 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
16737 LHS.getOperand(3) != VL)
16738 return SDValue();
16739
16740 SDValue Operands[3];
16741
16742 // Matches another VP_ADD with same VL and Mask.
16743 auto FindAdd = [&](SDValue V, SDValue Other) {
16744 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
16745 V.getOperand(3) != VL)
16746 return false;
16747
16748 Operands[0] = Other;
16749 Operands[1] = V.getOperand(1);
16750 Operands[2] = V.getOperand(0);
16751 return true;
16752 };
16753
16754 // We need to find another VP_ADD in one of the operands.
16755 SDValue LHS0 = LHS.getOperand(0);
16756 SDValue LHS1 = LHS.getOperand(1);
16757 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
16758 return SDValue();
16759
16760 // Now we have three operands of two additions. Check that one of them is a
16761 // constant vector with ones.
16762 auto I = llvm::find_if(Operands,
16763 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
16764 if (I == std::end(Operands))
16765 return SDValue();
16766 // We found a vector with ones, move if it to the end of the Operands array.
16767 std::swap(*I, Operands[2]);
16768
16769 // Make sure the other 2 operands can be promoted from the result type.
16770 for (SDValue Op : drop_end(Operands)) {
16771 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
16772 Op.getOperand(2) != VL)
16773 return SDValue();
16774 // Input must be the same size or smaller than our result.
16775 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
16776 return SDValue();
16777 }
16778
16779 // Pattern is detected.
16780 // Rebuild the zero extends in case the inputs are smaller than our result.
16781 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
16782 Operands[0].getOperand(0), Mask, VL);
16783 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
16784 Operands[1].getOperand(0), Mask, VL);
16785 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
16786 // mode.
16787 SDLoc DL(N);
16788 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
16789 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
16790}
16791
16792// Convert from one FMA opcode to another based on whether we are negating the
16793// multiply result and/or the accumulator.
16794// NOTE: Only supports RVV operations with VL.
16795static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16796 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16797 if (NegMul) {
16798 // clang-format off
16799 switch (Opcode) {
16800 default: llvm_unreachable("Unexpected opcode");
16801 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16802 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16803 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16804 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16809 }
16810 // clang-format on
16811 }
16812
16813 // Negating the accumulator changes ADD<->SUB.
16814 if (NegAcc) {
16815 // clang-format off
16816 switch (Opcode) {
16817 default: llvm_unreachable("Unexpected opcode");
16818 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16819 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16820 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16821 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16826 }
16827 // clang-format on
16828 }
16829
16830 return Opcode;
16831}
16832
16834 // Fold FNEG_VL into FMA opcodes.
16835 // The first operand of strict-fp is chain.
16836 bool IsStrict =
16837 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16838 unsigned Offset = IsStrict ? 1 : 0;
16839 SDValue A = N->getOperand(0 + Offset);
16840 SDValue B = N->getOperand(1 + Offset);
16841 SDValue C = N->getOperand(2 + Offset);
16842 SDValue Mask = N->getOperand(3 + Offset);
16843 SDValue VL = N->getOperand(4 + Offset);
16844
16845 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16846 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16847 V.getOperand(2) == VL) {
16848 // Return the negated input.
16849 V = V.getOperand(0);
16850 return true;
16851 }
16852
16853 return false;
16854 };
16855
16856 bool NegA = invertIfNegative(A);
16857 bool NegB = invertIfNegative(B);
16858 bool NegC = invertIfNegative(C);
16859
16860 // If no operands are negated, we're done.
16861 if (!NegA && !NegB && !NegC)
16862 return SDValue();
16863
16864 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16865 if (IsStrict)
16866 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16867 {N->getOperand(0), A, B, C, Mask, VL});
16868 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16869 VL);
16870}
16871
16874 const RISCVSubtarget &Subtarget) {
16875 SelectionDAG &DAG = DCI.DAG;
16876
16878 return V;
16879
16880 // FIXME: Ignore strict opcodes for now.
16881 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16882 return SDValue();
16883
16884 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16885}
16886
16888 const RISCVSubtarget &Subtarget) {
16889 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16890
16891 EVT VT = N->getValueType(0);
16892
16893 if (VT != Subtarget.getXLenVT())
16894 return SDValue();
16895
16896 if (!isa<ConstantSDNode>(N->getOperand(1)))
16897 return SDValue();
16898 uint64_t ShAmt = N->getConstantOperandVal(1);
16899
16900 SDValue N0 = N->getOperand(0);
16901
16902 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16903 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16904 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16905 unsigned ExtSize =
16906 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16907 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16908 N0.getOperand(0).hasOneUse() &&
16909 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16910 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16911 if (LShAmt < ExtSize) {
16912 unsigned Size = VT.getSizeInBits();
16913 SDLoc ShlDL(N0.getOperand(0));
16914 SDValue Shl =
16915 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16916 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16917 SDLoc DL(N);
16918 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16919 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16920 }
16921 }
16922 }
16923
16924 if (ShAmt > 32 || VT != MVT::i64)
16925 return SDValue();
16926
16927 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16928 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16929 //
16930 // Also try these folds where an add or sub is in the middle.
16931 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16932 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16933 SDValue Shl;
16934 ConstantSDNode *AddC = nullptr;
16935
16936 // We might have an ADD or SUB between the SRA and SHL.
16937 bool IsAdd = N0.getOpcode() == ISD::ADD;
16938 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16939 // Other operand needs to be a constant we can modify.
16940 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16941 if (!AddC)
16942 return SDValue();
16943
16944 // AddC needs to have at least 32 trailing zeros.
16945 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16946 return SDValue();
16947
16948 // All users should be a shift by constant less than or equal to 32. This
16949 // ensures we'll do this optimization for each of them to produce an
16950 // add/sub+sext_inreg they can all share.
16951 for (SDNode *U : N0->users()) {
16952 if (U->getOpcode() != ISD::SRA ||
16953 !isa<ConstantSDNode>(U->getOperand(1)) ||
16954 U->getConstantOperandVal(1) > 32)
16955 return SDValue();
16956 }
16957
16958 Shl = N0.getOperand(IsAdd ? 0 : 1);
16959 } else {
16960 // Not an ADD or SUB.
16961 Shl = N0;
16962 }
16963
16964 // Look for a shift left by 32.
16965 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16966 Shl.getConstantOperandVal(1) != 32)
16967 return SDValue();
16968
16969 // We if we didn't look through an add/sub, then the shl should have one use.
16970 // If we did look through an add/sub, the sext_inreg we create is free so
16971 // we're only creating 2 new instructions. It's enough to only remove the
16972 // original sra+add/sub.
16973 if (!AddC && !Shl.hasOneUse())
16974 return SDValue();
16975
16976 SDLoc DL(N);
16977 SDValue In = Shl.getOperand(0);
16978
16979 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16980 // constant.
16981 if (AddC) {
16982 SDValue ShiftedAddC =
16983 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16984 if (IsAdd)
16985 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16986 else
16987 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16988 }
16989
16990 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16991 DAG.getValueType(MVT::i32));
16992 if (ShAmt == 32)
16993 return SExt;
16994
16995 return DAG.getNode(
16996 ISD::SHL, DL, MVT::i64, SExt,
16997 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16998}
16999
17000// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
17001// the result is used as the condition of a br_cc or select_cc we can invert,
17002// inverting the setcc is free, and Z is 0/1. Caller will invert the
17003// br_cc/select_cc.
17005 bool IsAnd = Cond.getOpcode() == ISD::AND;
17006 if (!IsAnd && Cond.getOpcode() != ISD::OR)
17007 return SDValue();
17008
17009 if (!Cond.hasOneUse())
17010 return SDValue();
17011
17012 SDValue Setcc = Cond.getOperand(0);
17013 SDValue Xor = Cond.getOperand(1);
17014 // Canonicalize setcc to LHS.
17015 if (Setcc.getOpcode() != ISD::SETCC)
17016 std::swap(Setcc, Xor);
17017 // LHS should be a setcc and RHS should be an xor.
17018 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
17019 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
17020 return SDValue();
17021
17022 // If the condition is an And, SimplifyDemandedBits may have changed
17023 // (xor Z, 1) to (not Z).
17024 SDValue Xor1 = Xor.getOperand(1);
17025 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
17026 return SDValue();
17027
17028 EVT VT = Cond.getValueType();
17029 SDValue Xor0 = Xor.getOperand(0);
17030
17031 // The LHS of the xor needs to be 0/1.
17033 if (!DAG.MaskedValueIsZero(Xor0, Mask))
17034 return SDValue();
17035
17036 // We can only invert integer setccs.
17037 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
17038 if (!SetCCOpVT.isScalarInteger())
17039 return SDValue();
17040
17041 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
17042 if (ISD::isIntEqualitySetCC(CCVal)) {
17043 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
17044 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
17045 Setcc.getOperand(1), CCVal);
17046 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
17047 // Invert (setlt 0, X) by converting to (setlt X, 1).
17048 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
17049 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
17050 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
17051 // (setlt X, 1) by converting to (setlt 0, X).
17052 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
17053 DAG.getConstant(0, SDLoc(Setcc), VT),
17054 Setcc.getOperand(0), CCVal);
17055 } else
17056 return SDValue();
17057
17058 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
17059 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
17060}
17061
17062// Perform common combines for BR_CC and SELECT_CC conditions.
17063static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
17064 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
17065 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17066
17067 // As far as arithmetic right shift always saves the sign,
17068 // shift can be omitted.
17069 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
17070 // setge (sra X, N), 0 -> setge X, 0
17071 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
17072 LHS.getOpcode() == ISD::SRA) {
17073 LHS = LHS.getOperand(0);
17074 return true;
17075 }
17076
17077 if (!ISD::isIntEqualitySetCC(CCVal))
17078 return false;
17079
17080 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
17081 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
17082 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
17083 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
17084 // If we're looking for eq 0 instead of ne 0, we need to invert the
17085 // condition.
17086 bool Invert = CCVal == ISD::SETEQ;
17087 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
17088 if (Invert)
17089 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
17090
17091 RHS = LHS.getOperand(1);
17092 LHS = LHS.getOperand(0);
17093 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
17094
17095 CC = DAG.getCondCode(CCVal);
17096 return true;
17097 }
17098
17099 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
17100 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
17101 RHS = LHS.getOperand(1);
17102 LHS = LHS.getOperand(0);
17103 return true;
17104 }
17105
17106 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
17107 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
17108 LHS.getOperand(1).getOpcode() == ISD::Constant) {
17109 SDValue LHS0 = LHS.getOperand(0);
17110 if (LHS0.getOpcode() == ISD::AND &&
17111 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
17112 uint64_t Mask = LHS0.getConstantOperandVal(1);
17113 uint64_t ShAmt = LHS.getConstantOperandVal(1);
17114 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
17115 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
17116 CC = DAG.getCondCode(CCVal);
17117
17118 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
17119 LHS = LHS0.getOperand(0);
17120 if (ShAmt != 0)
17121 LHS =
17122 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
17123 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
17124 return true;
17125 }
17126 }
17127 }
17128
17129 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
17130 // This can occur when legalizing some floating point comparisons.
17131 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
17132 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
17133 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
17134 CC = DAG.getCondCode(CCVal);
17135 RHS = DAG.getConstant(0, DL, LHS.getValueType());
17136 return true;
17137 }
17138
17139 if (isNullConstant(RHS)) {
17140 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
17141 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
17142 CC = DAG.getCondCode(CCVal);
17143 LHS = NewCond;
17144 return true;
17145 }
17146 }
17147
17148 return false;
17149}
17150
17151// Fold
17152// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
17153// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
17154// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
17155// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
17157 SDValue TrueVal, SDValue FalseVal,
17158 bool Swapped) {
17159 bool Commutative = true;
17160 unsigned Opc = TrueVal.getOpcode();
17161 switch (Opc) {
17162 default:
17163 return SDValue();
17164 case ISD::SHL:
17165 case ISD::SRA:
17166 case ISD::SRL:
17167 case ISD::SUB:
17168 Commutative = false;
17169 break;
17170 case ISD::ADD:
17171 case ISD::OR:
17172 case ISD::XOR:
17173 break;
17174 }
17175
17176 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
17177 return SDValue();
17178
17179 unsigned OpToFold;
17180 if (FalseVal == TrueVal.getOperand(0))
17181 OpToFold = 0;
17182 else if (Commutative && FalseVal == TrueVal.getOperand(1))
17183 OpToFold = 1;
17184 else
17185 return SDValue();
17186
17187 EVT VT = N->getValueType(0);
17188 SDLoc DL(N);
17189 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
17190 EVT OtherOpVT = OtherOp.getValueType();
17191 SDValue IdentityOperand =
17192 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
17193 if (!Commutative)
17194 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
17195 assert(IdentityOperand && "No identity operand!");
17196
17197 if (Swapped)
17198 std::swap(OtherOp, IdentityOperand);
17199 SDValue NewSel =
17200 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
17201 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
17202}
17203
17204// This tries to get rid of `select` and `icmp` that are being used to handle
17205// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
17207 SDValue Cond = N->getOperand(0);
17208
17209 // This represents either CTTZ or CTLZ instruction.
17210 SDValue CountZeroes;
17211
17212 SDValue ValOnZero;
17213
17214 if (Cond.getOpcode() != ISD::SETCC)
17215 return SDValue();
17216
17217 if (!isNullConstant(Cond->getOperand(1)))
17218 return SDValue();
17219
17220 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
17221 if (CCVal == ISD::CondCode::SETEQ) {
17222 CountZeroes = N->getOperand(2);
17223 ValOnZero = N->getOperand(1);
17224 } else if (CCVal == ISD::CondCode::SETNE) {
17225 CountZeroes = N->getOperand(1);
17226 ValOnZero = N->getOperand(2);
17227 } else {
17228 return SDValue();
17229 }
17230
17231 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
17232 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
17233 CountZeroes = CountZeroes.getOperand(0);
17234
17235 if (CountZeroes.getOpcode() != ISD::CTTZ &&
17236 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
17237 CountZeroes.getOpcode() != ISD::CTLZ &&
17238 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
17239 return SDValue();
17240
17241 if (!isNullConstant(ValOnZero))
17242 return SDValue();
17243
17244 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
17245 if (Cond->getOperand(0) != CountZeroesArgument)
17246 return SDValue();
17247
17248 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
17249 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
17250 CountZeroes.getValueType(), CountZeroesArgument);
17251 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
17252 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
17253 CountZeroes.getValueType(), CountZeroesArgument);
17254 }
17255
17256 unsigned BitWidth = CountZeroes.getValueSizeInBits();
17257 SDValue BitWidthMinusOne =
17258 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
17259
17260 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
17261 CountZeroes, BitWidthMinusOne);
17262 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
17263}
17264
17266 const RISCVSubtarget &Subtarget) {
17267 SDValue Cond = N->getOperand(0);
17268 SDValue True = N->getOperand(1);
17269 SDValue False = N->getOperand(2);
17270 SDLoc DL(N);
17271 EVT VT = N->getValueType(0);
17272 EVT CondVT = Cond.getValueType();
17273
17274 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
17275 return SDValue();
17276
17277 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
17278 // BEXTI, where C is power of 2.
17279 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
17280 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
17281 SDValue LHS = Cond.getOperand(0);
17282 SDValue RHS = Cond.getOperand(1);
17283 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17284 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
17285 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
17286 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
17287 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
17288 return DAG.getSelect(DL, VT,
17289 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
17290 False, True);
17291 }
17292 }
17293 return SDValue();
17294}
17295
17296static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
17297 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
17298 return false;
17299
17300 SwapCC = false;
17301 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
17302 std::swap(TrueVal, FalseVal);
17303 SwapCC = true;
17304 }
17305
17306 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
17307 return false;
17308
17309 SDValue A = FalseVal.getOperand(0);
17310 SDValue B = FalseVal.getOperand(1);
17311 // Add is commutative, so check both orders
17312 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
17313 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
17314}
17315
17316/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
17317/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
17318/// register pressure over the add followed by masked vsub sequence.
17320 SDLoc DL(N);
17321 EVT VT = N->getValueType(0);
17322 SDValue CC = N->getOperand(0);
17323 SDValue TrueVal = N->getOperand(1);
17324 SDValue FalseVal = N->getOperand(2);
17325
17326 bool SwapCC;
17327 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
17328 return SDValue();
17329
17330 SDValue Sub = SwapCC ? TrueVal : FalseVal;
17331 SDValue A = Sub.getOperand(0);
17332 SDValue B = Sub.getOperand(1);
17333
17334 // Arrange the select such that we can match a masked
17335 // vrsub.vi to perform the conditional negate
17336 SDValue NegB = DAG.getNegative(B, DL, VT);
17337 if (!SwapCC)
17338 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17339 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17340 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17341}
17342
17344 const RISCVSubtarget &Subtarget) {
17345 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
17346 return Folded;
17347
17348 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
17349 return V;
17350
17351 if (Subtarget.hasConditionalMoveFusion())
17352 return SDValue();
17353
17354 SDValue TrueVal = N->getOperand(1);
17355 SDValue FalseVal = N->getOperand(2);
17356 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
17357 return V;
17358 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
17359}
17360
17361/// If we have a build_vector where each lane is binop X, C, where C
17362/// is a constant (but not necessarily the same constant on all lanes),
17363/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
17364/// We assume that materializing a constant build vector will be no more
17365/// expensive that performing O(n) binops.
17367 const RISCVSubtarget &Subtarget,
17368 const RISCVTargetLowering &TLI) {
17369 SDLoc DL(N);
17370 EVT VT = N->getValueType(0);
17371
17372 assert(!VT.isScalableVector() && "unexpected build vector");
17373
17374 if (VT.getVectorNumElements() == 1)
17375 return SDValue();
17376
17377 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
17378 if (!TLI.isBinOp(Opcode))
17379 return SDValue();
17380
17381 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
17382 return SDValue();
17383
17384 // This BUILD_VECTOR involves an implicit truncation, and sinking
17385 // truncates through binops is non-trivial.
17386 if (N->op_begin()->getValueType() != VT.getVectorElementType())
17387 return SDValue();
17388
17389 SmallVector<SDValue> LHSOps;
17390 SmallVector<SDValue> RHSOps;
17391 for (SDValue Op : N->ops()) {
17392 if (Op.isUndef()) {
17393 // We can't form a divide or remainder from undef.
17394 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
17395 return SDValue();
17396
17397 LHSOps.push_back(Op);
17398 RHSOps.push_back(Op);
17399 continue;
17400 }
17401
17402 // TODO: We can handle operations which have an neutral rhs value
17403 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
17404 // of profit in a more explicit manner.
17405 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
17406 return SDValue();
17407
17408 LHSOps.push_back(Op.getOperand(0));
17409 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
17410 !isa<ConstantFPSDNode>(Op.getOperand(1)))
17411 return SDValue();
17412 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17413 // have different LHS and RHS types.
17414 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
17415 return SDValue();
17416
17417 RHSOps.push_back(Op.getOperand(1));
17418 }
17419
17420 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
17421 DAG.getBuildVector(VT, DL, RHSOps));
17422}
17423
17425 const RISCVSubtarget &Subtarget,
17426 const RISCVTargetLowering &TLI) {
17427 SDValue InVec = N->getOperand(0);
17428 SDValue InVal = N->getOperand(1);
17429 SDValue EltNo = N->getOperand(2);
17430 SDLoc DL(N);
17431
17432 EVT VT = InVec.getValueType();
17433 if (VT.isScalableVector())
17434 return SDValue();
17435
17436 if (!InVec.hasOneUse())
17437 return SDValue();
17438
17439 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
17440 // move the insert_vector_elts into the arms of the binop. Note that
17441 // the new RHS must be a constant.
17442 const unsigned InVecOpcode = InVec->getOpcode();
17443 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
17444 InVal.hasOneUse()) {
17445 SDValue InVecLHS = InVec->getOperand(0);
17446 SDValue InVecRHS = InVec->getOperand(1);
17447 SDValue InValLHS = InVal->getOperand(0);
17448 SDValue InValRHS = InVal->getOperand(1);
17449
17451 return SDValue();
17452 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17453 return SDValue();
17454 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17455 // have different LHS and RHS types.
17456 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
17457 return SDValue();
17459 InVecLHS, InValLHS, EltNo);
17461 InVecRHS, InValRHS, EltNo);
17462 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
17463 }
17464
17465 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17466 // move the insert_vector_elt to the source operand of the concat_vector.
17467 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17468 return SDValue();
17469
17470 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17471 if (!IndexC)
17472 return SDValue();
17473 unsigned Elt = IndexC->getZExtValue();
17474
17475 EVT ConcatVT = InVec.getOperand(0).getValueType();
17476 if (ConcatVT.getVectorElementType() != InVal.getValueType())
17477 return SDValue();
17478 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17479 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17480
17481 unsigned ConcatOpIdx = Elt / ConcatNumElts;
17482 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17483 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17484 ConcatOp, InVal, NewIdx);
17485
17486 SmallVector<SDValue> ConcatOps;
17487 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17488 ConcatOps[ConcatOpIdx] = ConcatOp;
17489 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17490}
17491
17492// If we're concatenating a series of vector loads like
17493// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17494// Then we can turn this into a strided load by widening the vector elements
17495// vlse32 p, stride=n
17497 const RISCVSubtarget &Subtarget,
17498 const RISCVTargetLowering &TLI) {
17499 SDLoc DL(N);
17500 EVT VT = N->getValueType(0);
17501
17502 // Only perform this combine on legal MVTs.
17503 if (!TLI.isTypeLegal(VT))
17504 return SDValue();
17505
17506 // TODO: Potentially extend this to scalable vectors
17507 if (VT.isScalableVector())
17508 return SDValue();
17509
17510 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17511 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17512 !SDValue(BaseLd, 0).hasOneUse())
17513 return SDValue();
17514
17515 EVT BaseLdVT = BaseLd->getValueType(0);
17516
17517 // Go through the loads and check that they're strided
17519 Lds.push_back(BaseLd);
17520 Align Align = BaseLd->getAlign();
17521 for (SDValue Op : N->ops().drop_front()) {
17522 auto *Ld = dyn_cast<LoadSDNode>(Op);
17523 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17524 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17525 Ld->getValueType(0) != BaseLdVT)
17526 return SDValue();
17527
17528 Lds.push_back(Ld);
17529
17530 // The common alignment is the most restrictive (smallest) of all the loads
17531 Align = std::min(Align, Ld->getAlign());
17532 }
17533
17534 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17535 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17536 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17537 // If the load ptrs can be decomposed into a common (Base + Index) with a
17538 // common constant stride, then return the constant stride.
17539 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17540 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17541 if (BIO1.equalBaseIndex(BIO2, DAG))
17542 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17543
17544 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17545 SDValue P1 = Ld1->getBasePtr();
17546 SDValue P2 = Ld2->getBasePtr();
17547 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17548 return {{P2.getOperand(1), false}};
17549 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17550 return {{P1.getOperand(1), true}};
17551
17552 return std::nullopt;
17553 };
17554
17555 // Get the distance between the first and second loads
17556 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17557 if (!BaseDiff)
17558 return SDValue();
17559
17560 // Check all the loads are the same distance apart
17561 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17562 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17563 return SDValue();
17564
17565 // TODO: At this point, we've successfully matched a generalized gather
17566 // load. Maybe we should emit that, and then move the specialized
17567 // matchers above and below into a DAG combine?
17568
17569 // Get the widened scalar type, e.g. v4i8 -> i64
17570 unsigned WideScalarBitWidth =
17571 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17572 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17573
17574 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17575 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17576 if (!TLI.isTypeLegal(WideVecVT))
17577 return SDValue();
17578
17579 // Check that the operation is legal
17580 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17581 return SDValue();
17582
17583 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17584 SDValue Stride =
17585 std::holds_alternative<SDValue>(StrideVariant)
17586 ? std::get<SDValue>(StrideVariant)
17587 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17588 Lds[0]->getOffset().getValueType());
17589 if (MustNegateStride)
17590 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17591
17592 SDValue AllOneMask =
17593 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17594 DAG.getConstant(1, DL, MVT::i1));
17595
17596 uint64_t MemSize;
17597 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17598 ConstStride && ConstStride->getSExtValue() >= 0)
17599 // total size = (elsize * n) + (stride - elsize) * (n-1)
17600 // = elsize + stride * (n-1)
17601 MemSize = WideScalarVT.getSizeInBits() +
17602 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17603 else
17604 // If Stride isn't constant, then we can't know how much it will load
17606
17608 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17609 Align);
17610
17611 SDValue StridedLoad = DAG.getStridedLoadVP(
17612 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17613 AllOneMask,
17614 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17615
17616 for (SDValue Ld : N->ops())
17617 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17618
17619 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17620}
17621
17623 const RISCVSubtarget &Subtarget,
17624 const RISCVTargetLowering &TLI) {
17625 SDLoc DL(N);
17626 EVT VT = N->getValueType(0);
17627 const unsigned ElementSize = VT.getScalarSizeInBits();
17628 const unsigned NumElts = VT.getVectorNumElements();
17629 SDValue V1 = N->getOperand(0);
17630 SDValue V2 = N->getOperand(1);
17631 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17632 MVT XLenVT = Subtarget.getXLenVT();
17633
17634 // Recognized a disguised select of add/sub.
17635 bool SwapCC;
17636 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
17637 matchSelectAddSub(V1, V2, SwapCC)) {
17638 SDValue Sub = SwapCC ? V1 : V2;
17639 SDValue A = Sub.getOperand(0);
17640 SDValue B = Sub.getOperand(1);
17641
17642 SmallVector<SDValue> MaskVals;
17643 for (int MaskIndex : Mask) {
17644 bool SelectMaskVal = (MaskIndex < (int)NumElts);
17645 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
17646 }
17647 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
17648 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
17649 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
17650
17651 // Arrange the select such that we can match a masked
17652 // vrsub.vi to perform the conditional negate
17653 SDValue NegB = DAG.getNegative(B, DL, VT);
17654 if (!SwapCC)
17655 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17656 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17657 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17658 }
17659
17660 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17661 // during the combine phase before type legalization, and relies on
17662 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17663 // for the source mask.
17664 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17665 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17666 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17667 return SDValue();
17668
17669 SmallVector<int, 8> NewMask;
17670 narrowShuffleMaskElts(2, Mask, NewMask);
17671
17672 LLVMContext &C = *DAG.getContext();
17673 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17674 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17675 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17676 DAG.getBitcast(NewVT, V2), NewMask);
17677 return DAG.getBitcast(VT, Res);
17678}
17679
17681 const RISCVSubtarget &Subtarget) {
17682
17683 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17684
17685 if (N->getValueType(0).isFixedLengthVector())
17686 return SDValue();
17687
17688 SDValue Addend = N->getOperand(0);
17689 SDValue MulOp = N->getOperand(1);
17690
17691 if (N->getOpcode() == RISCVISD::ADD_VL) {
17692 SDValue AddPassthruOp = N->getOperand(2);
17693 if (!AddPassthruOp.isUndef())
17694 return SDValue();
17695 }
17696
17697 auto IsVWMulOpc = [](unsigned Opc) {
17698 switch (Opc) {
17699 case RISCVISD::VWMUL_VL:
17702 return true;
17703 default:
17704 return false;
17705 }
17706 };
17707
17708 if (!IsVWMulOpc(MulOp.getOpcode()))
17709 std::swap(Addend, MulOp);
17710
17711 if (!IsVWMulOpc(MulOp.getOpcode()))
17712 return SDValue();
17713
17714 SDValue MulPassthruOp = MulOp.getOperand(2);
17715
17716 if (!MulPassthruOp.isUndef())
17717 return SDValue();
17718
17719 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17720 const RISCVSubtarget &Subtarget) {
17721 if (N->getOpcode() == ISD::ADD) {
17722 SDLoc DL(N);
17723 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17724 Subtarget);
17725 }
17726 return std::make_pair(N->getOperand(3), N->getOperand(4));
17727 }(N, DAG, Subtarget);
17728
17729 SDValue MulMask = MulOp.getOperand(3);
17730 SDValue MulVL = MulOp.getOperand(4);
17731
17732 if (AddMask != MulMask || AddVL != MulVL)
17733 return SDValue();
17734
17735 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17736 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17737 "Unexpected opcode after VWMACC_VL");
17738 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17739 "Unexpected opcode after VWMACC_VL!");
17740 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17741 "Unexpected opcode after VWMUL_VL!");
17742 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17743 "Unexpected opcode after VWMUL_VL!");
17744
17745 SDLoc DL(N);
17746 EVT VT = N->getValueType(0);
17747 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17748 AddVL};
17749 return DAG.getNode(Opc, DL, VT, Ops);
17750}
17751
17753 ISD::MemIndexType &IndexType,
17755 if (!DCI.isBeforeLegalize())
17756 return false;
17757
17758 SelectionDAG &DAG = DCI.DAG;
17759 const MVT XLenVT =
17760 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17761
17762 const EVT IndexVT = Index.getValueType();
17763
17764 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17765 // mode, so anything else must be manually legalized.
17766 if (!isIndexTypeSigned(IndexType))
17767 return false;
17768
17769 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17770 // Any index legalization should first promote to XLenVT, so we don't lose
17771 // bits when scaling. This may create an illegal index type so we let
17772 // LLVM's legalization take care of the splitting.
17773 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17774 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17775 IndexVT.changeVectorElementType(XLenVT), Index);
17776 }
17777 IndexType = ISD::UNSIGNED_SCALED;
17778 return true;
17779}
17780
17781/// Match the index vector of a scatter or gather node as the shuffle mask
17782/// which performs the rearrangement if possible. Will only match if
17783/// all lanes are touched, and thus replacing the scatter or gather with
17784/// a unit strided access and shuffle is legal.
17785static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17786 SmallVector<int> &ShuffleMask) {
17787 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17788 return false;
17789 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17790 return false;
17791
17792 const unsigned ElementSize = VT.getScalarStoreSize();
17793 const unsigned NumElems = VT.getVectorNumElements();
17794
17795 // Create the shuffle mask and check all bits active
17796 assert(ShuffleMask.empty());
17797 BitVector ActiveLanes(NumElems);
17798 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17799 // TODO: We've found an active bit of UB, and could be
17800 // more aggressive here if desired.
17801 if (Index->getOperand(i)->isUndef())
17802 return false;
17803 uint64_t C = Index->getConstantOperandVal(i);
17804 if (C % ElementSize != 0)
17805 return false;
17806 C = C / ElementSize;
17807 if (C >= NumElems)
17808 return false;
17809 ShuffleMask.push_back(C);
17810 ActiveLanes.set(C);
17811 }
17812 return ActiveLanes.all();
17813}
17814
17815/// Match the index of a gather or scatter operation as an operation
17816/// with twice the element width and half the number of elements. This is
17817/// generally profitable (if legal) because these operations are linear
17818/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17819/// come out ahead.
17820static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17821 Align BaseAlign, const RISCVSubtarget &ST) {
17822 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17823 return false;
17824 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17825 return false;
17826
17827 // Attempt a doubling. If we can use a element type 4x or 8x in
17828 // size, this will happen via multiply iterations of the transform.
17829 const unsigned NumElems = VT.getVectorNumElements();
17830 if (NumElems % 2 != 0)
17831 return false;
17832
17833 const unsigned ElementSize = VT.getScalarStoreSize();
17834 const unsigned WiderElementSize = ElementSize * 2;
17835 if (WiderElementSize > ST.getELen()/8)
17836 return false;
17837
17838 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17839 return false;
17840
17841 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17842 // TODO: We've found an active bit of UB, and could be
17843 // more aggressive here if desired.
17844 if (Index->getOperand(i)->isUndef())
17845 return false;
17846 // TODO: This offset check is too strict if we support fully
17847 // misaligned memory operations.
17848 uint64_t C = Index->getConstantOperandVal(i);
17849 if (i % 2 == 0) {
17850 if (C % WiderElementSize != 0)
17851 return false;
17852 continue;
17853 }
17854 uint64_t Last = Index->getConstantOperandVal(i-1);
17855 if (C != Last + ElementSize)
17856 return false;
17857 }
17858 return true;
17859}
17860
17861// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17862// This would be benefit for the cases where X and Y are both the same value
17863// type of low precision vectors. Since the truncate would be lowered into
17864// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17865// restriction, such pattern would be expanded into a series of "vsetvli"
17866// and "vnsrl" instructions later to reach this point.
17868 SDValue Mask = N->getOperand(1);
17869 SDValue VL = N->getOperand(2);
17870
17871 bool IsVLMAX = isAllOnesConstant(VL) ||
17872 (isa<RegisterSDNode>(VL) &&
17873 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17874 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17875 Mask.getOperand(0) != VL)
17876 return SDValue();
17877
17878 auto IsTruncNode = [&](SDValue V) {
17879 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17880 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17881 };
17882
17883 SDValue Op = N->getOperand(0);
17884
17885 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17886 // to distinguish such pattern.
17887 while (IsTruncNode(Op)) {
17888 if (!Op.hasOneUse())
17889 return SDValue();
17890 Op = Op.getOperand(0);
17891 }
17892
17893 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17894 return SDValue();
17895
17896 SDValue N0 = Op.getOperand(0);
17897 SDValue N1 = Op.getOperand(1);
17898 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17899 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17900 return SDValue();
17901
17902 SDValue N00 = N0.getOperand(0);
17903 SDValue N10 = N1.getOperand(0);
17904 if (!N00.getValueType().isVector() ||
17905 N00.getValueType() != N10.getValueType() ||
17906 N->getValueType(0) != N10.getValueType())
17907 return SDValue();
17908
17909 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17910 SDValue SMin =
17911 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17912 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17913 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17914}
17915
17916// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17917// maximum value for the truncated type.
17918// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17919// is the signed maximum value for the truncated type and C2 is the signed
17920// minimum value.
17922 const RISCVSubtarget &Subtarget) {
17923 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17924
17925 MVT VT = N->getSimpleValueType(0);
17926
17927 SDValue Mask = N->getOperand(1);
17928 SDValue VL = N->getOperand(2);
17929
17930 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17931 APInt &SplatVal) {
17932 if (V.getOpcode() != Opc &&
17933 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17934 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17935 return SDValue();
17936
17937 SDValue Op = V.getOperand(1);
17938
17939 // Peek through conversion between fixed and scalable vectors.
17940 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17941 isNullConstant(Op.getOperand(2)) &&
17942 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17943 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17944 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17945 isNullConstant(Op.getOperand(1).getOperand(1)))
17946 Op = Op.getOperand(1).getOperand(0);
17947
17948 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17949 return V.getOperand(0);
17950
17951 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17952 Op.getOperand(2) == VL) {
17953 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17954 SplatVal =
17955 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17956 return V.getOperand(0);
17957 }
17958 }
17959
17960 return SDValue();
17961 };
17962
17963 SDLoc DL(N);
17964
17965 auto DetectUSatPattern = [&](SDValue V) {
17966 APInt LoC, HiC;
17967
17968 // Simple case, V is a UMIN.
17969 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17970 if (HiC.isMask(VT.getScalarSizeInBits()))
17971 return UMinOp;
17972
17973 // If we have an SMAX that removes negative numbers first, then we can match
17974 // SMIN instead of UMIN.
17975 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17976 if (SDValue SMaxOp =
17977 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17978 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17979 return SMinOp;
17980
17981 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17982 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17983 // first.
17984 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17985 if (SDValue SMinOp =
17986 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17987 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17988 HiC.uge(LoC))
17989 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17990 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17991 Mask, VL);
17992
17993 return SDValue();
17994 };
17995
17996 auto DetectSSatPattern = [&](SDValue V) {
17997 unsigned NumDstBits = VT.getScalarSizeInBits();
17998 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17999 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
18000 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
18001
18002 APInt HiC, LoC;
18003 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
18004 if (SDValue SMaxOp =
18005 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
18006 if (HiC == SignedMax && LoC == SignedMin)
18007 return SMaxOp;
18008
18009 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
18010 if (SDValue SMinOp =
18011 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
18012 if (HiC == SignedMax && LoC == SignedMin)
18013 return SMinOp;
18014
18015 return SDValue();
18016 };
18017
18018 SDValue Src = N->getOperand(0);
18019
18020 // Look through multiple layers of truncates.
18021 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
18022 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
18023 Src.hasOneUse())
18024 Src = Src.getOperand(0);
18025
18026 SDValue Val;
18027 unsigned ClipOpc;
18028 if ((Val = DetectUSatPattern(Src)))
18030 else if ((Val = DetectSSatPattern(Src)))
18032 else
18033 return SDValue();
18034
18035 MVT ValVT = Val.getSimpleValueType();
18036
18037 do {
18038 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
18039 ValVT = ValVT.changeVectorElementType(ValEltVT);
18040 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
18041 } while (ValVT != VT);
18042
18043 return Val;
18044}
18045
18046// Convert
18047// (iX ctpop (bitcast (vXi1 A)))
18048// ->
18049// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
18050// FIXME: It's complicated to match all the variations of this after type
18051// legalization so we only handle the pre-type legalization pattern, but that
18052// requires the fixed vector type to be legal.
18054 const RISCVSubtarget &Subtarget) {
18055 EVT VT = N->getValueType(0);
18056 if (!VT.isScalarInteger())
18057 return SDValue();
18058
18059 SDValue Src = N->getOperand(0);
18060
18061 // Peek through zero_extend. It doesn't change the count.
18062 if (Src.getOpcode() == ISD::ZERO_EXTEND)
18063 Src = Src.getOperand(0);
18064
18065 if (Src.getOpcode() != ISD::BITCAST)
18066 return SDValue();
18067
18068 Src = Src.getOperand(0);
18069 EVT SrcEVT = Src.getValueType();
18070 if (!SrcEVT.isSimple())
18071 return SDValue();
18072
18073 MVT SrcMVT = SrcEVT.getSimpleVT();
18074 // Make sure the input is an i1 vector.
18075 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
18076 return SDValue();
18077
18078 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
18079 return SDValue();
18080
18081 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
18082 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
18083
18084 SDLoc DL(N);
18085 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
18086
18087 MVT XLenVT = Subtarget.getXLenVT();
18088 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
18089 return DAG.getZExtOrTrunc(Pop, DL, VT);
18090}
18091
18094 const RISCVSubtarget &Subtarget) {
18095 // (shl (zext x), y) -> (vwsll x, y)
18096 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18097 return V;
18098
18099 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
18100 // (shl (zext x), C) -> (vwmulu x, 1u << C)
18101
18102 if (!DCI.isAfterLegalizeDAG())
18103 return SDValue();
18104
18105 SDValue LHS = N->getOperand(0);
18106 if (!LHS.hasOneUse())
18107 return SDValue();
18108 unsigned Opcode;
18109 switch (LHS.getOpcode()) {
18110 case ISD::SIGN_EXTEND:
18111 case RISCVISD::VSEXT_VL:
18112 Opcode = RISCVISD::VWMULSU_VL;
18113 break;
18114 case ISD::ZERO_EXTEND:
18115 case RISCVISD::VZEXT_VL:
18116 Opcode = RISCVISD::VWMULU_VL;
18117 break;
18118 default:
18119 return SDValue();
18120 }
18121
18122 SDValue RHS = N->getOperand(1);
18123 APInt ShAmt;
18124 uint64_t ShAmtInt;
18125 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
18126 ShAmtInt = ShAmt.getZExtValue();
18127 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
18128 RHS.getOperand(1).getOpcode() == ISD::Constant)
18129 ShAmtInt = RHS.getConstantOperandVal(1);
18130 else
18131 return SDValue();
18132
18133 // Better foldings:
18134 // (shl (sext x), 1) -> (vwadd x, x)
18135 // (shl (zext x), 1) -> (vwaddu x, x)
18136 if (ShAmtInt <= 1)
18137 return SDValue();
18138
18139 SDValue NarrowOp = LHS.getOperand(0);
18140 MVT NarrowVT = NarrowOp.getSimpleValueType();
18141 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
18142 if (ShAmtInt >= NarrowBits)
18143 return SDValue();
18144 MVT VT = N->getSimpleValueType(0);
18145 if (NarrowBits * 2 != VT.getScalarSizeInBits())
18146 return SDValue();
18147
18148 SelectionDAG &DAG = DCI.DAG;
18149 SDLoc DL(N);
18150 SDValue Passthru, Mask, VL;
18151 switch (N->getOpcode()) {
18152 case ISD::SHL:
18153 Passthru = DAG.getUNDEF(VT);
18154 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
18155 break;
18156 case RISCVISD::SHL_VL:
18157 Passthru = N->getOperand(2);
18158 Mask = N->getOperand(3);
18159 VL = N->getOperand(4);
18160 break;
18161 default:
18162 llvm_unreachable("Expected SHL");
18163 }
18164 return DAG.getNode(Opcode, DL, VT, NarrowOp,
18165 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
18166 Passthru, Mask, VL);
18167}
18168
18170 DAGCombinerInfo &DCI) const {
18171 SelectionDAG &DAG = DCI.DAG;
18172 const MVT XLenVT = Subtarget.getXLenVT();
18173 SDLoc DL(N);
18174
18175 // Helper to call SimplifyDemandedBits on an operand of N where only some low
18176 // bits are demanded. N will be added to the Worklist if it was not deleted.
18177 // Caller should return SDValue(N, 0) if this returns true.
18178 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
18179 SDValue Op = N->getOperand(OpNo);
18180 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
18181 if (!SimplifyDemandedBits(Op, Mask, DCI))
18182 return false;
18183
18184 if (N->getOpcode() != ISD::DELETED_NODE)
18185 DCI.AddToWorklist(N);
18186 return true;
18187 };
18188
18189 switch (N->getOpcode()) {
18190 default:
18191 break;
18192 case RISCVISD::SplitF64: {
18193 SDValue Op0 = N->getOperand(0);
18194 // If the input to SplitF64 is just BuildPairF64 then the operation is
18195 // redundant. Instead, use BuildPairF64's operands directly.
18196 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
18197 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
18198
18199 if (Op0->isUndef()) {
18200 SDValue Lo = DAG.getUNDEF(MVT::i32);
18201 SDValue Hi = DAG.getUNDEF(MVT::i32);
18202 return DCI.CombineTo(N, Lo, Hi);
18203 }
18204
18205 // It's cheaper to materialise two 32-bit integers than to load a double
18206 // from the constant pool and transfer it to integer registers through the
18207 // stack.
18208 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
18209 APInt V = C->getValueAPF().bitcastToAPInt();
18210 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
18211 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
18212 return DCI.CombineTo(N, Lo, Hi);
18213 }
18214
18215 // This is a target-specific version of a DAGCombine performed in
18216 // DAGCombiner::visitBITCAST. It performs the equivalent of:
18217 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
18218 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
18219 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
18220 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
18221 break;
18222 SDValue NewSplitF64 =
18223 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
18224 Op0.getOperand(0));
18225 SDValue Lo = NewSplitF64.getValue(0);
18226 SDValue Hi = NewSplitF64.getValue(1);
18227 APInt SignBit = APInt::getSignMask(32);
18228 if (Op0.getOpcode() == ISD::FNEG) {
18229 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
18230 DAG.getConstant(SignBit, DL, MVT::i32));
18231 return DCI.CombineTo(N, Lo, NewHi);
18232 }
18233 assert(Op0.getOpcode() == ISD::FABS);
18234 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
18235 DAG.getConstant(~SignBit, DL, MVT::i32));
18236 return DCI.CombineTo(N, Lo, NewHi);
18237 }
18238 case RISCVISD::SLLW:
18239 case RISCVISD::SRAW:
18240 case RISCVISD::SRLW:
18241 case RISCVISD::RORW:
18242 case RISCVISD::ROLW: {
18243 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
18244 if (SimplifyDemandedLowBitsHelper(0, 32) ||
18245 SimplifyDemandedLowBitsHelper(1, 5))
18246 return SDValue(N, 0);
18247
18248 break;
18249 }
18250 case RISCVISD::CLZW:
18251 case RISCVISD::CTZW: {
18252 // Only the lower 32 bits of the first operand are read
18253 if (SimplifyDemandedLowBitsHelper(0, 32))
18254 return SDValue(N, 0);
18255 break;
18256 }
18258 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
18259 // conversion is unnecessary and can be replaced with the
18260 // FMV_X_ANYEXTW_RV64 operand.
18261 SDValue Op0 = N->getOperand(0);
18263 return Op0.getOperand(0);
18264 break;
18265 }
18268 SDLoc DL(N);
18269 SDValue Op0 = N->getOperand(0);
18270 MVT VT = N->getSimpleValueType(0);
18271
18272 // Constant fold.
18273 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
18274 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
18275 return DAG.getConstant(Val, DL, VT);
18276 }
18277
18278 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
18279 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
18280 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
18281 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
18282 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
18283 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
18284 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
18285 assert(Op0.getOperand(0).getValueType() == VT &&
18286 "Unexpected value type!");
18287 return Op0.getOperand(0);
18288 }
18289
18290 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
18291 cast<LoadSDNode>(Op0)->isSimple()) {
18293 auto *LN0 = cast<LoadSDNode>(Op0);
18294 SDValue Load =
18295 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
18296 LN0->getBasePtr(), IVT, LN0->getMemOperand());
18297 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
18298 return Load;
18299 }
18300
18301 // This is a target-specific version of a DAGCombine performed in
18302 // DAGCombiner::visitBITCAST. It performs the equivalent of:
18303 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
18304 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
18305 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
18306 !Op0.getNode()->hasOneUse())
18307 break;
18308 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
18309 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
18310 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
18311 if (Op0.getOpcode() == ISD::FNEG)
18312 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
18313 DAG.getConstant(SignBit, DL, VT));
18314
18315 assert(Op0.getOpcode() == ISD::FABS);
18316 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
18317 DAG.getConstant(~SignBit, DL, VT));
18318 }
18319 case ISD::ABS: {
18320 EVT VT = N->getValueType(0);
18321 SDValue N0 = N->getOperand(0);
18322 // abs (sext) -> zext (abs)
18323 // abs (zext) -> zext (handled elsewhere)
18324 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
18325 SDValue Src = N0.getOperand(0);
18326 SDLoc DL(N);
18327 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
18328 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
18329 }
18330 break;
18331 }
18332 case ISD::ADD: {
18333 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18334 return V;
18335 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
18336 return V;
18337 return performADDCombine(N, DCI, Subtarget);
18338 }
18339 case ISD::SUB: {
18340 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18341 return V;
18342 return performSUBCombine(N, DAG, Subtarget);
18343 }
18344 case ISD::AND:
18345 return performANDCombine(N, DCI, Subtarget);
18346 case ISD::OR: {
18347 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18348 return V;
18349 return performORCombine(N, DCI, Subtarget);
18350 }
18351 case ISD::XOR:
18352 return performXORCombine(N, DAG, Subtarget);
18353 case ISD::MUL:
18354 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18355 return V;
18356 return performMULCombine(N, DAG, DCI, Subtarget);
18357 case ISD::SDIV:
18358 case ISD::UDIV:
18359 case ISD::SREM:
18360 case ISD::UREM:
18361 if (SDValue V = combineBinOpOfZExt(N, DAG))
18362 return V;
18363 break;
18364 case ISD::FMUL: {
18365 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
18366 SDValue N0 = N->getOperand(0);
18367 SDValue N1 = N->getOperand(1);
18368 if (N0->getOpcode() != ISD::FCOPYSIGN)
18369 std::swap(N0, N1);
18370 if (N0->getOpcode() != ISD::FCOPYSIGN)
18371 return SDValue();
18372 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
18373 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
18374 return SDValue();
18375 EVT VT = N->getValueType(0);
18376 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
18377 return SDValue();
18378 SDValue Sign = N0->getOperand(1);
18379 if (Sign.getValueType() != VT)
18380 return SDValue();
18381 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
18382 }
18383 case ISD::FADD:
18384 case ISD::UMAX:
18385 case ISD::UMIN:
18386 case ISD::SMAX:
18387 case ISD::SMIN:
18388 case ISD::FMAXNUM:
18389 case ISD::FMINNUM: {
18390 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
18391 return V;
18392 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
18393 return V;
18394 return SDValue();
18395 }
18396 case ISD::SETCC:
18397 return performSETCCCombine(N, DAG, Subtarget);
18399 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
18400 case ISD::ZERO_EXTEND:
18401 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
18402 // type legalization. This is safe because fp_to_uint produces poison if
18403 // it overflows.
18404 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
18405 SDValue Src = N->getOperand(0);
18406 if (Src.getOpcode() == ISD::FP_TO_UINT &&
18407 isTypeLegal(Src.getOperand(0).getValueType()))
18408 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
18409 Src.getOperand(0));
18410 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
18411 isTypeLegal(Src.getOperand(1).getValueType())) {
18412 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
18413 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
18414 Src.getOperand(0), Src.getOperand(1));
18415 DCI.CombineTo(N, Res);
18416 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
18417 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
18418 return SDValue(N, 0); // Return N so it doesn't get rechecked.
18419 }
18420 }
18421 return SDValue();
18423 if (SDValue V = combineTruncOfSraSext(N, DAG))
18424 return V;
18425 return combineTruncToVnclip(N, DAG, Subtarget);
18426 case ISD::VP_TRUNCATE:
18427 return performVP_TRUNCATECombine(N, DAG, Subtarget);
18428 case ISD::TRUNCATE:
18429 return performTRUNCATECombine(N, DAG, Subtarget);
18430 case ISD::SELECT:
18431 return performSELECTCombine(N, DAG, Subtarget);
18432 case ISD::VSELECT:
18433 return performVSELECTCombine(N, DAG);
18435 case RISCVISD::CZERO_NEZ: {
18436 SDValue Val = N->getOperand(0);
18437 SDValue Cond = N->getOperand(1);
18438
18439 unsigned Opc = N->getOpcode();
18440
18441 // czero_eqz x, x -> x
18442 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
18443 return Val;
18444
18445 unsigned InvOpc =
18447
18448 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
18449 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
18450 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
18451 SDValue NewCond = Cond.getOperand(0);
18452 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
18453 if (DAG.MaskedValueIsZero(NewCond, Mask))
18454 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
18455 }
18456 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
18457 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
18458 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
18459 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
18460 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
18461 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18462 if (ISD::isIntEqualitySetCC(CCVal))
18463 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
18464 N->getValueType(0), Val, Cond.getOperand(0));
18465 }
18466 return SDValue();
18467 }
18468 case RISCVISD::SELECT_CC: {
18469 // Transform
18470 SDValue LHS = N->getOperand(0);
18471 SDValue RHS = N->getOperand(1);
18472 SDValue CC = N->getOperand(2);
18473 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18474 SDValue TrueV = N->getOperand(3);
18475 SDValue FalseV = N->getOperand(4);
18476 SDLoc DL(N);
18477 EVT VT = N->getValueType(0);
18478
18479 // If the True and False values are the same, we don't need a select_cc.
18480 if (TrueV == FalseV)
18481 return TrueV;
18482
18483 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
18484 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
18485 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
18486 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
18487 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
18488 if (CCVal == ISD::CondCode::SETGE)
18489 std::swap(TrueV, FalseV);
18490
18491 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
18492 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
18493 // Only handle simm12, if it is not in this range, it can be considered as
18494 // register.
18495 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18496 isInt<12>(TrueSImm - FalseSImm)) {
18497 SDValue SRA =
18498 DAG.getNode(ISD::SRA, DL, VT, LHS,
18499 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
18500 SDValue AND =
18501 DAG.getNode(ISD::AND, DL, VT, SRA,
18502 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
18503 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
18504 }
18505
18506 if (CCVal == ISD::CondCode::SETGE)
18507 std::swap(TrueV, FalseV);
18508 }
18509
18510 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18511 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
18512 {LHS, RHS, CC, TrueV, FalseV});
18513
18514 if (!Subtarget.hasConditionalMoveFusion()) {
18515 // (select c, -1, y) -> -c | y
18516 if (isAllOnesConstant(TrueV)) {
18517 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18518 SDValue Neg = DAG.getNegative(C, DL, VT);
18519 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
18520 }
18521 // (select c, y, -1) -> -!c | y
18522 if (isAllOnesConstant(FalseV)) {
18523 SDValue C =
18524 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18525 SDValue Neg = DAG.getNegative(C, DL, VT);
18526 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
18527 }
18528
18529 // (select c, 0, y) -> -!c & y
18530 if (isNullConstant(TrueV)) {
18531 SDValue C =
18532 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18533 SDValue Neg = DAG.getNegative(C, DL, VT);
18534 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
18535 }
18536 // (select c, y, 0) -> -c & y
18537 if (isNullConstant(FalseV)) {
18538 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18539 SDValue Neg = DAG.getNegative(C, DL, VT);
18540 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
18541 }
18542 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
18543 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
18544 if (((isOneConstant(FalseV) && LHS == TrueV &&
18545 CCVal == ISD::CondCode::SETNE) ||
18546 (isOneConstant(TrueV) && LHS == FalseV &&
18547 CCVal == ISD::CondCode::SETEQ)) &&
18549 // freeze it to be safe.
18550 LHS = DAG.getFreeze(LHS);
18552 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
18553 }
18554 }
18555
18556 // If both true/false are an xor with 1, pull through the select.
18557 // This can occur after op legalization if both operands are setccs that
18558 // require an xor to invert.
18559 // FIXME: Generalize to other binary ops with identical operand?
18560 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
18561 TrueV.getOperand(1) == FalseV.getOperand(1) &&
18562 isOneConstant(TrueV.getOperand(1)) &&
18563 TrueV.hasOneUse() && FalseV.hasOneUse()) {
18564 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
18565 TrueV.getOperand(0), FalseV.getOperand(0));
18566 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
18567 }
18568
18569 return SDValue();
18570 }
18571 case RISCVISD::BR_CC: {
18572 SDValue LHS = N->getOperand(1);
18573 SDValue RHS = N->getOperand(2);
18574 SDValue CC = N->getOperand(3);
18575 SDLoc DL(N);
18576
18577 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18578 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18579 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18580
18581 return SDValue();
18582 }
18583 case ISD::BITREVERSE:
18584 return performBITREVERSECombine(N, DAG, Subtarget);
18585 case ISD::FP_TO_SINT:
18586 case ISD::FP_TO_UINT:
18587 return performFP_TO_INTCombine(N, DCI, Subtarget);
18590 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18591 case ISD::FCOPYSIGN: {
18592 EVT VT = N->getValueType(0);
18593 if (!VT.isVector())
18594 break;
18595 // There is a form of VFSGNJ which injects the negated sign of its second
18596 // operand. Try and bubble any FNEG up after the extend/round to produce
18597 // this optimized pattern. Avoid modifying cases where FP_ROUND and
18598 // TRUNC=1.
18599 SDValue In2 = N->getOperand(1);
18600 // Avoid cases where the extend/round has multiple uses, as duplicating
18601 // those is typically more expensive than removing a fneg.
18602 if (!In2.hasOneUse())
18603 break;
18604 if (In2.getOpcode() != ISD::FP_EXTEND &&
18605 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18606 break;
18607 In2 = In2.getOperand(0);
18608 if (In2.getOpcode() != ISD::FNEG)
18609 break;
18610 SDLoc DL(N);
18611 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18612 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18613 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18614 }
18615 case ISD::MGATHER: {
18616 const auto *MGN = cast<MaskedGatherSDNode>(N);
18617 const EVT VT = N->getValueType(0);
18618 SDValue Index = MGN->getIndex();
18619 SDValue ScaleOp = MGN->getScale();
18620 ISD::MemIndexType IndexType = MGN->getIndexType();
18621 assert(!MGN->isIndexScaled() &&
18622 "Scaled gather/scatter should not be formed");
18623
18624 SDLoc DL(N);
18625 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18626 return DAG.getMaskedGather(
18627 N->getVTList(), MGN->getMemoryVT(), DL,
18628 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18629 MGN->getBasePtr(), Index, ScaleOp},
18630 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18631
18632 if (narrowIndex(Index, IndexType, DAG))
18633 return DAG.getMaskedGather(
18634 N->getVTList(), MGN->getMemoryVT(), DL,
18635 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18636 MGN->getBasePtr(), Index, ScaleOp},
18637 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18638
18639 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18640 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18641 // The sequence will be XLenVT, not the type of Index. Tell
18642 // isSimpleVIDSequence this so we avoid overflow.
18643 if (std::optional<VIDSequence> SimpleVID =
18644 isSimpleVIDSequence(Index, Subtarget.getXLen());
18645 SimpleVID && SimpleVID->StepDenominator == 1) {
18646 const int64_t StepNumerator = SimpleVID->StepNumerator;
18647 const int64_t Addend = SimpleVID->Addend;
18648
18649 // Note: We don't need to check alignment here since (by assumption
18650 // from the existence of the gather), our offsets must be sufficiently
18651 // aligned.
18652
18653 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18654 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18655 assert(IndexType == ISD::UNSIGNED_SCALED);
18656 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18657 DAG.getSignedConstant(Addend, DL, PtrVT));
18658
18659 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18661 SDValue StridedLoad = DAG.getStridedLoadVP(
18662 VT, DL, MGN->getChain(), BasePtr,
18663 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18664 EVL, MGN->getMemOperand());
18665 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18666 StridedLoad, MGN->getPassThru(), EVL);
18667 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18668 DL);
18669 }
18670 }
18671
18672 SmallVector<int> ShuffleMask;
18673 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18674 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18675 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18676 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18677 MGN->getMask(), DAG.getUNDEF(VT),
18678 MGN->getMemoryVT(), MGN->getMemOperand(),
18680 SDValue Shuffle =
18681 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18682 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18683 }
18684
18685 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18686 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18687 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18688 SmallVector<SDValue> NewIndices;
18689 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18690 NewIndices.push_back(Index.getOperand(i));
18691 EVT IndexVT = Index.getValueType()
18692 .getHalfNumVectorElementsVT(*DAG.getContext());
18693 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18694
18695 unsigned ElementSize = VT.getScalarStoreSize();
18696 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18697 auto EltCnt = VT.getVectorElementCount();
18698 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18699 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18700 EltCnt.divideCoefficientBy(2));
18701 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18702 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18703 EltCnt.divideCoefficientBy(2));
18704 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18705
18706 SDValue Gather =
18707 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18708 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18709 Index, ScaleOp},
18710 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18711 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18712 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18713 }
18714 break;
18715 }
18716 case ISD::MSCATTER:{
18717 const auto *MSN = cast<MaskedScatterSDNode>(N);
18718 SDValue Index = MSN->getIndex();
18719 SDValue ScaleOp = MSN->getScale();
18720 ISD::MemIndexType IndexType = MSN->getIndexType();
18721 assert(!MSN->isIndexScaled() &&
18722 "Scaled gather/scatter should not be formed");
18723
18724 SDLoc DL(N);
18725 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18726 return DAG.getMaskedScatter(
18727 N->getVTList(), MSN->getMemoryVT(), DL,
18728 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18729 Index, ScaleOp},
18730 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18731
18732 if (narrowIndex(Index, IndexType, DAG))
18733 return DAG.getMaskedScatter(
18734 N->getVTList(), MSN->getMemoryVT(), DL,
18735 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18736 Index, ScaleOp},
18737 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18738
18739 EVT VT = MSN->getValue()->getValueType(0);
18740 SmallVector<int> ShuffleMask;
18741 if (!MSN->isTruncatingStore() &&
18742 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18743 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18744 DAG.getUNDEF(VT), ShuffleMask);
18745 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18746 DAG.getUNDEF(XLenVT), MSN->getMask(),
18747 MSN->getMemoryVT(), MSN->getMemOperand(),
18748 ISD::UNINDEXED, false);
18749 }
18750 break;
18751 }
18752 case ISD::VP_GATHER: {
18753 const auto *VPGN = cast<VPGatherSDNode>(N);
18754 SDValue Index = VPGN->getIndex();
18755 SDValue ScaleOp = VPGN->getScale();
18756 ISD::MemIndexType IndexType = VPGN->getIndexType();
18757 assert(!VPGN->isIndexScaled() &&
18758 "Scaled gather/scatter should not be formed");
18759
18760 SDLoc DL(N);
18761 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18762 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18763 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18764 ScaleOp, VPGN->getMask(),
18765 VPGN->getVectorLength()},
18766 VPGN->getMemOperand(), IndexType);
18767
18768 if (narrowIndex(Index, IndexType, DAG))
18769 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18770 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18771 ScaleOp, VPGN->getMask(),
18772 VPGN->getVectorLength()},
18773 VPGN->getMemOperand(), IndexType);
18774
18775 break;
18776 }
18777 case ISD::VP_SCATTER: {
18778 const auto *VPSN = cast<VPScatterSDNode>(N);
18779 SDValue Index = VPSN->getIndex();
18780 SDValue ScaleOp = VPSN->getScale();
18781 ISD::MemIndexType IndexType = VPSN->getIndexType();
18782 assert(!VPSN->isIndexScaled() &&
18783 "Scaled gather/scatter should not be formed");
18784
18785 SDLoc DL(N);
18786 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18787 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18788 {VPSN->getChain(), VPSN->getValue(),
18789 VPSN->getBasePtr(), Index, ScaleOp,
18790 VPSN->getMask(), VPSN->getVectorLength()},
18791 VPSN->getMemOperand(), IndexType);
18792
18793 if (narrowIndex(Index, IndexType, DAG))
18794 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18795 {VPSN->getChain(), VPSN->getValue(),
18796 VPSN->getBasePtr(), Index, ScaleOp,
18797 VPSN->getMask(), VPSN->getVectorLength()},
18798 VPSN->getMemOperand(), IndexType);
18799 break;
18800 }
18801 case RISCVISD::SHL_VL:
18802 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
18803 return V;
18804 [[fallthrough]];
18805 case RISCVISD::SRA_VL:
18806 case RISCVISD::SRL_VL: {
18807 SDValue ShAmt = N->getOperand(1);
18809 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18810 SDLoc DL(N);
18811 SDValue VL = N->getOperand(4);
18812 EVT VT = N->getValueType(0);
18813 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18814 ShAmt.getOperand(1), VL);
18815 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18816 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18817 }
18818 break;
18819 }
18820 case ISD::SRA:
18821 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18822 return V;
18823 [[fallthrough]];
18824 case ISD::SRL:
18825 case ISD::SHL: {
18826 if (N->getOpcode() == ISD::SHL) {
18827 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
18828 return V;
18829 }
18830 SDValue ShAmt = N->getOperand(1);
18832 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18833 SDLoc DL(N);
18834 EVT VT = N->getValueType(0);
18835 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18836 ShAmt.getOperand(1),
18837 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18838 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18839 }
18840 break;
18841 }
18842 case RISCVISD::ADD_VL:
18843 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18844 return V;
18845 return combineToVWMACC(N, DAG, Subtarget);
18850 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18851 case RISCVISD::SUB_VL:
18852 case RISCVISD::MUL_VL:
18853 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18862 return performVFMADD_VLCombine(N, DCI, Subtarget);
18863 case RISCVISD::FADD_VL:
18864 case RISCVISD::FSUB_VL:
18865 case RISCVISD::FMUL_VL:
18868 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18869 case ISD::LOAD:
18870 case ISD::STORE: {
18871 if (DCI.isAfterLegalizeDAG())
18872 if (SDValue V = performMemPairCombine(N, DCI))
18873 return V;
18874
18875 if (N->getOpcode() != ISD::STORE)
18876 break;
18877
18878 auto *Store = cast<StoreSDNode>(N);
18879 SDValue Chain = Store->getChain();
18880 EVT MemVT = Store->getMemoryVT();
18881 SDValue Val = Store->getValue();
18882 SDLoc DL(N);
18883
18884 bool IsScalarizable =
18885 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18886 Store->isSimple() &&
18887 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18888 isPowerOf2_64(MemVT.getSizeInBits()) &&
18889 MemVT.getSizeInBits() <= Subtarget.getXLen();
18890
18891 // If sufficiently aligned we can scalarize stores of constant vectors of
18892 // any power-of-two size up to XLen bits, provided that they aren't too
18893 // expensive to materialize.
18894 // vsetivli zero, 2, e8, m1, ta, ma
18895 // vmv.v.i v8, 4
18896 // vse64.v v8, (a0)
18897 // ->
18898 // li a1, 1028
18899 // sh a1, 0(a0)
18900 if (DCI.isBeforeLegalize() && IsScalarizable &&
18902 // Get the constant vector bits
18903 APInt NewC(Val.getValueSizeInBits(), 0);
18904 uint64_t EltSize = Val.getScalarValueSizeInBits();
18905 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18906 if (Val.getOperand(i).isUndef())
18907 continue;
18908 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18909 i * EltSize);
18910 }
18911 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18912
18913 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18914 true) <= 2 &&
18916 NewVT, *Store->getMemOperand())) {
18917 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18918 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18919 Store->getPointerInfo(), Store->getOriginalAlign(),
18920 Store->getMemOperand()->getFlags());
18921 }
18922 }
18923
18924 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18925 // vsetivli zero, 2, e16, m1, ta, ma
18926 // vle16.v v8, (a0)
18927 // vse16.v v8, (a1)
18928 if (auto *L = dyn_cast<LoadSDNode>(Val);
18929 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18930 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18931 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18932 L->getMemoryVT() == MemVT) {
18933 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18935 NewVT, *Store->getMemOperand()) &&
18937 NewVT, *L->getMemOperand())) {
18938 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18939 L->getPointerInfo(), L->getOriginalAlign(),
18940 L->getMemOperand()->getFlags());
18941 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18942 Store->getPointerInfo(), Store->getOriginalAlign(),
18943 Store->getMemOperand()->getFlags());
18944 }
18945 }
18946
18947 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18948 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18949 // any illegal types.
18950 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18951 (DCI.isAfterLegalizeDAG() &&
18953 isNullConstant(Val.getOperand(1)))) {
18954 SDValue Src = Val.getOperand(0);
18955 MVT VecVT = Src.getSimpleValueType();
18956 // VecVT should be scalable and memory VT should match the element type.
18957 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18958 MemVT == VecVT.getVectorElementType()) {
18959 SDLoc DL(N);
18960 MVT MaskVT = getMaskTypeFor(VecVT);
18961 return DAG.getStoreVP(
18962 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18963 DAG.getConstant(1, DL, MaskVT),
18964 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18965 Store->getMemOperand(), Store->getAddressingMode(),
18966 Store->isTruncatingStore(), /*IsCompress*/ false);
18967 }
18968 }
18969
18970 break;
18971 }
18972 case ISD::SPLAT_VECTOR: {
18973 EVT VT = N->getValueType(0);
18974 // Only perform this combine on legal MVT types.
18975 if (!isTypeLegal(VT))
18976 break;
18977 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18978 DAG, Subtarget))
18979 return Gather;
18980 break;
18981 }
18982 case ISD::BUILD_VECTOR:
18983 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18984 return V;
18985 break;
18987 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18988 return V;
18989 break;
18991 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18992 return V;
18993 break;
18995 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18996 return V;
18997 break;
18998 case RISCVISD::VFMV_V_F_VL: {
18999 const MVT VT = N->getSimpleValueType(0);
19000 SDValue Passthru = N->getOperand(0);
19001 SDValue Scalar = N->getOperand(1);
19002 SDValue VL = N->getOperand(2);
19003
19004 // If VL is 1, we can use vfmv.s.f.
19005 if (isOneConstant(VL))
19006 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
19007 break;
19008 }
19009 case RISCVISD::VMV_V_X_VL: {
19010 const MVT VT = N->getSimpleValueType(0);
19011 SDValue Passthru = N->getOperand(0);
19012 SDValue Scalar = N->getOperand(1);
19013 SDValue VL = N->getOperand(2);
19014
19015 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
19016 // scalar input.
19017 unsigned ScalarSize = Scalar.getValueSizeInBits();
19018 unsigned EltWidth = VT.getScalarSizeInBits();
19019 if (ScalarSize > EltWidth && Passthru.isUndef())
19020 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
19021 return SDValue(N, 0);
19022
19023 // If VL is 1 and the scalar value won't benefit from immediate, we can
19024 // use vmv.s.x.
19025 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
19026 if (isOneConstant(VL) &&
19027 (!Const || Const->isZero() ||
19028 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
19029 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
19030
19031 break;
19032 }
19033 case RISCVISD::VFMV_S_F_VL: {
19034 SDValue Src = N->getOperand(1);
19035 // Try to remove vector->scalar->vector if the scalar->vector is inserting
19036 // into an undef vector.
19037 // TODO: Could use a vslide or vmv.v.v for non-undef.
19038 if (N->getOperand(0).isUndef() &&
19039 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19040 isNullConstant(Src.getOperand(1)) &&
19041 Src.getOperand(0).getValueType().isScalableVector()) {
19042 EVT VT = N->getValueType(0);
19043 EVT SrcVT = Src.getOperand(0).getValueType();
19045 // Widths match, just return the original vector.
19046 if (SrcVT == VT)
19047 return Src.getOperand(0);
19048 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
19049 }
19050 [[fallthrough]];
19051 }
19052 case RISCVISD::VMV_S_X_VL: {
19053 const MVT VT = N->getSimpleValueType(0);
19054 SDValue Passthru = N->getOperand(0);
19055 SDValue Scalar = N->getOperand(1);
19056 SDValue VL = N->getOperand(2);
19057
19058 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
19059 Scalar.getOperand(0).getValueType() == N->getValueType(0))
19060 return Scalar.getOperand(0);
19061
19062 // Use M1 or smaller to avoid over constraining register allocation
19063 const MVT M1VT = getLMUL1VT(VT);
19064 if (M1VT.bitsLT(VT)) {
19065 SDValue M1Passthru =
19066 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
19067 DAG.getVectorIdxConstant(0, DL));
19068 SDValue Result =
19069 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
19070 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
19071 DAG.getVectorIdxConstant(0, DL));
19072 return Result;
19073 }
19074
19075 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
19076 // higher would involve overly constraining the register allocator for
19077 // no purpose.
19078 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
19079 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
19080 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
19081 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
19082
19083 break;
19084 }
19085 case RISCVISD::VMV_X_S: {
19086 SDValue Vec = N->getOperand(0);
19087 MVT VecVT = N->getOperand(0).getSimpleValueType();
19088 const MVT M1VT = getLMUL1VT(VecVT);
19089 if (M1VT.bitsLT(VecVT)) {
19090 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
19091 DAG.getVectorIdxConstant(0, DL));
19092 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
19093 }
19094 break;
19095 }
19099 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
19100 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
19101 switch (IntNo) {
19102 // By default we do not combine any intrinsic.
19103 default:
19104 return SDValue();
19105 case Intrinsic::riscv_vcpop:
19106 case Intrinsic::riscv_vcpop_mask:
19107 case Intrinsic::riscv_vfirst:
19108 case Intrinsic::riscv_vfirst_mask: {
19109 SDValue VL = N->getOperand(2);
19110 if (IntNo == Intrinsic::riscv_vcpop_mask ||
19111 IntNo == Intrinsic::riscv_vfirst_mask)
19112 VL = N->getOperand(3);
19113 if (!isNullConstant(VL))
19114 return SDValue();
19115 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
19116 SDLoc DL(N);
19117 EVT VT = N->getValueType(0);
19118 if (IntNo == Intrinsic::riscv_vfirst ||
19119 IntNo == Intrinsic::riscv_vfirst_mask)
19120 return DAG.getAllOnesConstant(DL, VT);
19121 return DAG.getConstant(0, DL, VT);
19122 }
19123 }
19124 }
19125 case ISD::EXPERIMENTAL_VP_REVERSE:
19126 return performVP_REVERSECombine(N, DAG, Subtarget);
19127 case ISD::VP_STORE:
19128 return performVP_STORECombine(N, DAG, Subtarget);
19129 case ISD::BITCAST: {
19131 SDValue N0 = N->getOperand(0);
19132 EVT VT = N->getValueType(0);
19133 EVT SrcVT = N0.getValueType();
19134 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
19135 unsigned NF = VT.getRISCVVectorTupleNumFields();
19136 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
19137 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
19138 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
19139
19140 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
19141
19142 SDValue Result = DAG.getUNDEF(VT);
19143 for (unsigned i = 0; i < NF; ++i)
19144 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
19145 DAG.getVectorIdxConstant(i, DL));
19146 return Result;
19147 }
19148 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
19149 // type, widen both sides to avoid a trip through memory.
19150 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
19151 VT.isScalarInteger()) {
19152 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
19153 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
19154 Ops[0] = N0;
19155 SDLoc DL(N);
19156 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
19157 N0 = DAG.getBitcast(MVT::i8, N0);
19158 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
19159 }
19160
19161 return SDValue();
19162 }
19163 case ISD::CTPOP:
19164 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
19165 return V;
19166 break;
19167 }
19168
19169 return SDValue();
19170}
19171
19173 EVT XVT, unsigned KeptBits) const {
19174 // For vectors, we don't have a preference..
19175 if (XVT.isVector())
19176 return false;
19177
19178 if (XVT != MVT::i32 && XVT != MVT::i64)
19179 return false;
19180
19181 // We can use sext.w for RV64 or an srai 31 on RV32.
19182 if (KeptBits == 32 || KeptBits == 64)
19183 return true;
19184
19185 // With Zbb we can use sext.h/sext.b.
19186 return Subtarget.hasStdExtZbb() &&
19187 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
19188 KeptBits == 16);
19189}
19190
19192 const SDNode *N, CombineLevel Level) const {
19193 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
19194 N->getOpcode() == ISD::SRL) &&
19195 "Expected shift op");
19196
19197 // The following folds are only desirable if `(OP _, c1 << c2)` can be
19198 // materialised in fewer instructions than `(OP _, c1)`:
19199 //
19200 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
19201 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
19202 SDValue N0 = N->getOperand(0);
19203 EVT Ty = N0.getValueType();
19204
19205 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
19206 // LD/ST, it can still complete the folding optimization operation performed
19207 // above.
19208 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
19209 for (SDNode *Use : X->users()) {
19210 // This use is the one we're on right now. Skip it
19211 if (Use == User || Use->getOpcode() == ISD::SELECT)
19212 continue;
19213 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
19214 return false;
19215 }
19216 return true;
19217 };
19218
19219 if (Ty.isScalarInteger() &&
19220 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
19221 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
19222 return isUsedByLdSt(N0.getNode(), N);
19223
19224 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
19225 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19226
19227 // Bail if we might break a sh{1,2,3}add pattern.
19228 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
19229 C2->getZExtValue() <= 3 && N->hasOneUse() &&
19230 N->user_begin()->getOpcode() == ISD::ADD &&
19231 !isUsedByLdSt(*N->user_begin(), nullptr) &&
19232 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
19233 return false;
19234
19235 if (C1 && C2) {
19236 const APInt &C1Int = C1->getAPIntValue();
19237 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
19238
19239 // We can materialise `c1 << c2` into an add immediate, so it's "free",
19240 // and the combine should happen, to potentially allow further combines
19241 // later.
19242 if (ShiftedC1Int.getSignificantBits() <= 64 &&
19243 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
19244 return true;
19245
19246 // We can materialise `c1` in an add immediate, so it's "free", and the
19247 // combine should be prevented.
19248 if (C1Int.getSignificantBits() <= 64 &&
19250 return false;
19251
19252 // Neither constant will fit into an immediate, so find materialisation
19253 // costs.
19254 int C1Cost =
19255 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
19256 /*CompressionCost*/ true);
19257 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
19258 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
19259 /*CompressionCost*/ true);
19260
19261 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
19262 // combine should be prevented.
19263 if (C1Cost < ShiftedC1Cost)
19264 return false;
19265 }
19266 }
19267
19268 if (!N0->hasOneUse())
19269 return false;
19270
19271 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
19272 N0->getOperand(0)->getOpcode() == ISD::ADD &&
19273 !N0->getOperand(0)->hasOneUse())
19274 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
19275
19276 return true;
19277}
19278
19280 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
19281 TargetLoweringOpt &TLO) const {
19282 // Delay this optimization as late as possible.
19283 if (!TLO.LegalOps)
19284 return false;
19285
19286 EVT VT = Op.getValueType();
19287 if (VT.isVector())
19288 return false;
19289
19290 unsigned Opcode = Op.getOpcode();
19291 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
19292 return false;
19293
19294 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
19295 if (!C)
19296 return false;
19297
19298 const APInt &Mask = C->getAPIntValue();
19299
19300 // Clear all non-demanded bits initially.
19301 APInt ShrunkMask = Mask & DemandedBits;
19302
19303 // Try to make a smaller immediate by setting undemanded bits.
19304
19305 APInt ExpandedMask = Mask | ~DemandedBits;
19306
19307 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
19308 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
19309 };
19310 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
19311 if (NewMask == Mask)
19312 return true;
19313 SDLoc DL(Op);
19314 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
19315 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
19316 Op.getOperand(0), NewC);
19317 return TLO.CombineTo(Op, NewOp);
19318 };
19319
19320 // If the shrunk mask fits in sign extended 12 bits, let the target
19321 // independent code apply it.
19322 if (ShrunkMask.isSignedIntN(12))
19323 return false;
19324
19325 // And has a few special cases for zext.
19326 if (Opcode == ISD::AND) {
19327 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
19328 // otherwise use SLLI + SRLI.
19329 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
19330 if (IsLegalMask(NewMask))
19331 return UseMask(NewMask);
19332
19333 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
19334 if (VT == MVT::i64) {
19335 APInt NewMask = APInt(64, 0xffffffff);
19336 if (IsLegalMask(NewMask))
19337 return UseMask(NewMask);
19338 }
19339 }
19340
19341 // For the remaining optimizations, we need to be able to make a negative
19342 // number through a combination of mask and undemanded bits.
19343 if (!ExpandedMask.isNegative())
19344 return false;
19345
19346 // What is the fewest number of bits we need to represent the negative number.
19347 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
19348
19349 // Try to make a 12 bit negative immediate. If that fails try to make a 32
19350 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
19351 // If we can't create a simm12, we shouldn't change opaque constants.
19352 APInt NewMask = ShrunkMask;
19353 if (MinSignedBits <= 12)
19354 NewMask.setBitsFrom(11);
19355 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
19356 NewMask.setBitsFrom(31);
19357 else
19358 return false;
19359
19360 // Check that our new mask is a subset of the demanded mask.
19361 assert(IsLegalMask(NewMask));
19362 return UseMask(NewMask);
19363}
19364
19365static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
19366 static const uint64_t GREVMasks[] = {
19367 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
19368 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
19369
19370 for (unsigned Stage = 0; Stage != 6; ++Stage) {
19371 unsigned Shift = 1 << Stage;
19372 if (ShAmt & Shift) {
19373 uint64_t Mask = GREVMasks[Stage];
19374 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
19375 if (IsGORC)
19376 Res |= x;
19377 x = Res;
19378 }
19379 }
19380
19381 return x;
19382}
19383
19385 KnownBits &Known,
19386 const APInt &DemandedElts,
19387 const SelectionDAG &DAG,
19388 unsigned Depth) const {
19389 unsigned BitWidth = Known.getBitWidth();
19390 unsigned Opc = Op.getOpcode();
19391 assert((Opc >= ISD::BUILTIN_OP_END ||
19392 Opc == ISD::INTRINSIC_WO_CHAIN ||
19393 Opc == ISD::INTRINSIC_W_CHAIN ||
19394 Opc == ISD::INTRINSIC_VOID) &&
19395 "Should use MaskedValueIsZero if you don't know whether Op"
19396 " is a target node!");
19397
19398 Known.resetAll();
19399 switch (Opc) {
19400 default: break;
19401 case RISCVISD::SELECT_CC: {
19402 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
19403 // If we don't know any bits, early out.
19404 if (Known.isUnknown())
19405 break;
19406 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
19407
19408 // Only known if known in both the LHS and RHS.
19409 Known = Known.intersectWith(Known2);
19410 break;
19411 }
19414 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19415 // Result is either all zero or operand 0. We can propagate zeros, but not
19416 // ones.
19417 Known.One.clearAllBits();
19418 break;
19419 case RISCVISD::REMUW: {
19420 KnownBits Known2;
19421 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19422 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19423 // We only care about the lower 32 bits.
19424 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
19425 // Restore the original width by sign extending.
19426 Known = Known.sext(BitWidth);
19427 break;
19428 }
19429 case RISCVISD::DIVUW: {
19430 KnownBits Known2;
19431 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19432 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19433 // We only care about the lower 32 bits.
19434 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
19435 // Restore the original width by sign extending.
19436 Known = Known.sext(BitWidth);
19437 break;
19438 }
19439 case RISCVISD::SLLW: {
19440 KnownBits Known2;
19441 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19442 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19443 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
19444 // Restore the original width by sign extending.
19445 Known = Known.sext(BitWidth);
19446 break;
19447 }
19448 case RISCVISD::CTZW: {
19449 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19450 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
19451 unsigned LowBits = llvm::bit_width(PossibleTZ);
19452 Known.Zero.setBitsFrom(LowBits);
19453 break;
19454 }
19455 case RISCVISD::CLZW: {
19456 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19457 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
19458 unsigned LowBits = llvm::bit_width(PossibleLZ);
19459 Known.Zero.setBitsFrom(LowBits);
19460 break;
19461 }
19462 case RISCVISD::BREV8:
19463 case RISCVISD::ORC_B: {
19464 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
19465 // control value of 7 is equivalent to brev8 and orc.b.
19466 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19467 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
19468 // To compute zeros, we need to invert the value and invert it back after.
19469 Known.Zero =
19470 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
19471 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
19472 break;
19473 }
19474 case RISCVISD::READ_VLENB: {
19475 // We can use the minimum and maximum VLEN values to bound VLENB. We
19476 // know VLEN must be a power of two.
19477 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
19478 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
19479 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
19480 Known.Zero.setLowBits(Log2_32(MinVLenB));
19481 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
19482 if (MaxVLenB == MinVLenB)
19483 Known.One.setBit(Log2_32(MinVLenB));
19484 break;
19485 }
19486 case RISCVISD::FCLASS: {
19487 // fclass will only set one of the low 10 bits.
19488 Known.Zero.setBitsFrom(10);
19489 break;
19490 }
19493 unsigned IntNo =
19494 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
19495 switch (IntNo) {
19496 default:
19497 // We can't do anything for most intrinsics.
19498 break;
19499 case Intrinsic::riscv_vsetvli:
19500 case Intrinsic::riscv_vsetvlimax: {
19501 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19502 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
19503 RISCVII::VLMUL VLMUL =
19504 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
19505 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
19506 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
19507 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
19508 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19509
19510 // Result of vsetvli must be not larger than AVL.
19511 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
19512 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
19513
19514 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
19515 if (BitWidth > KnownZeroFirstBit)
19516 Known.Zero.setBitsFrom(KnownZeroFirstBit);
19517 break;
19518 }
19519 }
19520 break;
19521 }
19522 }
19523}
19524
19526 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19527 unsigned Depth) const {
19528 switch (Op.getOpcode()) {
19529 default:
19530 break;
19531 case RISCVISD::SELECT_CC: {
19532 unsigned Tmp =
19533 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
19534 if (Tmp == 1) return 1; // Early out.
19535 unsigned Tmp2 =
19536 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
19537 return std::min(Tmp, Tmp2);
19538 }
19541 // Output is either all zero or operand 0. We can propagate sign bit count
19542 // from operand 0.
19543 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19544 case RISCVISD::ABSW: {
19545 // We expand this at isel to negw+max. The result will have 33 sign bits
19546 // if the input has at least 33 sign bits.
19547 unsigned Tmp =
19548 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19549 if (Tmp < 33) return 1;
19550 return 33;
19551 }
19552 case RISCVISD::SLLW:
19553 case RISCVISD::SRAW:
19554 case RISCVISD::SRLW:
19555 case RISCVISD::DIVW:
19556 case RISCVISD::DIVUW:
19557 case RISCVISD::REMUW:
19558 case RISCVISD::ROLW:
19559 case RISCVISD::RORW:
19564 // TODO: As the result is sign-extended, this is conservatively correct. A
19565 // more precise answer could be calculated for SRAW depending on known
19566 // bits in the shift amount.
19567 return 33;
19568 case RISCVISD::VMV_X_S: {
19569 // The number of sign bits of the scalar result is computed by obtaining the
19570 // element type of the input vector operand, subtracting its width from the
19571 // XLEN, and then adding one (sign bit within the element type). If the
19572 // element type is wider than XLen, the least-significant XLEN bits are
19573 // taken.
19574 unsigned XLen = Subtarget.getXLen();
19575 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19576 if (EltBits <= XLen)
19577 return XLen - EltBits + 1;
19578 break;
19579 }
19581 unsigned IntNo = Op.getConstantOperandVal(1);
19582 switch (IntNo) {
19583 default:
19584 break;
19585 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19586 case Intrinsic::riscv_masked_atomicrmw_add_i64:
19587 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19588 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19589 case Intrinsic::riscv_masked_atomicrmw_max_i64:
19590 case Intrinsic::riscv_masked_atomicrmw_min_i64:
19591 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19592 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19593 case Intrinsic::riscv_masked_cmpxchg_i64:
19594 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19595 // narrow atomic operation. These are implemented using atomic
19596 // operations at the minimum supported atomicrmw/cmpxchg width whose
19597 // result is then sign extended to XLEN. With +A, the minimum width is
19598 // 32 for both 64 and 32.
19599 assert(Subtarget.getXLen() == 64);
19601 assert(Subtarget.hasStdExtA());
19602 return 33;
19603 }
19604 break;
19605 }
19606 }
19607
19608 return 1;
19609}
19610
19612 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19613 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19614
19615 // TODO: Add more target nodes.
19616 switch (Op.getOpcode()) {
19618 // Integer select_cc cannot create poison.
19619 // TODO: What are the FP poison semantics?
19620 // TODO: This instruction blocks poison from the unselected operand, can
19621 // we do anything with that?
19622 return !Op.getValueType().isInteger();
19623 }
19625 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19626}
19627
19628const Constant *
19630 assert(Ld && "Unexpected null LoadSDNode");
19631 if (!ISD::isNormalLoad(Ld))
19632 return nullptr;
19633
19634 SDValue Ptr = Ld->getBasePtr();
19635
19636 // Only constant pools with no offset are supported.
19637 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19638 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19639 if (!CNode || CNode->isMachineConstantPoolEntry() ||
19640 CNode->getOffset() != 0)
19641 return nullptr;
19642
19643 return CNode;
19644 };
19645
19646 // Simple case, LLA.
19647 if (Ptr.getOpcode() == RISCVISD::LLA) {
19648 auto *CNode = GetSupportedConstantPool(Ptr);
19649 if (!CNode || CNode->getTargetFlags() != 0)
19650 return nullptr;
19651
19652 return CNode->getConstVal();
19653 }
19654
19655 // Look for a HI and ADD_LO pair.
19656 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19657 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19658 return nullptr;
19659
19660 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19661 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19662
19663 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19664 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19665 return nullptr;
19666
19667 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19668 return nullptr;
19669
19670 return CNodeLo->getConstVal();
19671}
19672
19674 MachineBasicBlock *BB) {
19675 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19676
19677 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19678 // Should the count have wrapped while it was being read, we need to try
19679 // again.
19680 // For example:
19681 // ```
19682 // read:
19683 // csrrs x3, counterh # load high word of counter
19684 // csrrs x2, counter # load low word of counter
19685 // csrrs x4, counterh # load high word of counter
19686 // bne x3, x4, read # check if high word reads match, otherwise try again
19687 // ```
19688
19689 MachineFunction &MF = *BB->getParent();
19690 const BasicBlock *LLVMBB = BB->getBasicBlock();
19692
19693 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19694 MF.insert(It, LoopMBB);
19695
19696 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19697 MF.insert(It, DoneMBB);
19698
19699 // Transfer the remainder of BB and its successor edges to DoneMBB.
19700 DoneMBB->splice(DoneMBB->begin(), BB,
19701 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19703
19704 BB->addSuccessor(LoopMBB);
19705
19707 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19708 Register LoReg = MI.getOperand(0).getReg();
19709 Register HiReg = MI.getOperand(1).getReg();
19710 int64_t LoCounter = MI.getOperand(2).getImm();
19711 int64_t HiCounter = MI.getOperand(3).getImm();
19712 DebugLoc DL = MI.getDebugLoc();
19713
19715 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19716 .addImm(HiCounter)
19717 .addReg(RISCV::X0);
19718 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19719 .addImm(LoCounter)
19720 .addReg(RISCV::X0);
19721 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19722 .addImm(HiCounter)
19723 .addReg(RISCV::X0);
19724
19725 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19726 .addReg(HiReg)
19727 .addReg(ReadAgainReg)
19728 .addMBB(LoopMBB);
19729
19730 LoopMBB->addSuccessor(LoopMBB);
19731 LoopMBB->addSuccessor(DoneMBB);
19732
19733 MI.eraseFromParent();
19734
19735 return DoneMBB;
19736}
19737
19740 const RISCVSubtarget &Subtarget) {
19741 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19742
19743 MachineFunction &MF = *BB->getParent();
19744 DebugLoc DL = MI.getDebugLoc();
19747 Register LoReg = MI.getOperand(0).getReg();
19748 Register HiReg = MI.getOperand(1).getReg();
19749 Register SrcReg = MI.getOperand(2).getReg();
19750
19751 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19752 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19753
19754 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19755 RI, Register());
19757 MachineMemOperand *MMOLo =
19761 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19762 .addFrameIndex(FI)
19763 .addImm(0)
19764 .addMemOperand(MMOLo);
19765 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19766 .addFrameIndex(FI)
19767 .addImm(4)
19768 .addMemOperand(MMOHi);
19769 MI.eraseFromParent(); // The pseudo instruction is gone now.
19770 return BB;
19771}
19772
19775 const RISCVSubtarget &Subtarget) {
19776 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19777 "Unexpected instruction");
19778
19779 MachineFunction &MF = *BB->getParent();
19780 DebugLoc DL = MI.getDebugLoc();
19783 Register DstReg = MI.getOperand(0).getReg();
19784 Register LoReg = MI.getOperand(1).getReg();
19785 Register HiReg = MI.getOperand(2).getReg();
19786
19787 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19788 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19789
19791 MachineMemOperand *MMOLo =
19795 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19796 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19797 .addFrameIndex(FI)
19798 .addImm(0)
19799 .addMemOperand(MMOLo);
19800 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19801 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19802 .addFrameIndex(FI)
19803 .addImm(4)
19804 .addMemOperand(MMOHi);
19805 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19806 MI.eraseFromParent(); // The pseudo instruction is gone now.
19807 return BB;
19808}
19809
19811 switch (MI.getOpcode()) {
19812 default:
19813 return false;
19814 case RISCV::Select_GPR_Using_CC_GPR:
19815 case RISCV::Select_GPR_Using_CC_Imm:
19816 case RISCV::Select_FPR16_Using_CC_GPR:
19817 case RISCV::Select_FPR16INX_Using_CC_GPR:
19818 case RISCV::Select_FPR32_Using_CC_GPR:
19819 case RISCV::Select_FPR32INX_Using_CC_GPR:
19820 case RISCV::Select_FPR64_Using_CC_GPR:
19821 case RISCV::Select_FPR64INX_Using_CC_GPR:
19822 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19823 return true;
19824 }
19825}
19826
19828 unsigned RelOpcode, unsigned EqOpcode,
19829 const RISCVSubtarget &Subtarget) {
19830 DebugLoc DL = MI.getDebugLoc();
19831 Register DstReg = MI.getOperand(0).getReg();
19832 Register Src1Reg = MI.getOperand(1).getReg();
19833 Register Src2Reg = MI.getOperand(2).getReg();
19835 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19837
19838 // Save the current FFLAGS.
19839 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19840
19841 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19842 .addReg(Src1Reg)
19843 .addReg(Src2Reg);
19846
19847 // Restore the FFLAGS.
19848 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19849 .addReg(SavedFFlags, RegState::Kill);
19850
19851 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19852 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19853 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19854 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19857
19858 // Erase the pseudoinstruction.
19859 MI.eraseFromParent();
19860 return BB;
19861}
19862
19863static MachineBasicBlock *
19865 MachineBasicBlock *ThisMBB,
19866 const RISCVSubtarget &Subtarget) {
19867 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19868 // Without this, custom-inserter would have generated:
19869 //
19870 // A
19871 // | \
19872 // | B
19873 // | /
19874 // C
19875 // | \
19876 // | D
19877 // | /
19878 // E
19879 //
19880 // A: X = ...; Y = ...
19881 // B: empty
19882 // C: Z = PHI [X, A], [Y, B]
19883 // D: empty
19884 // E: PHI [X, C], [Z, D]
19885 //
19886 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19887 //
19888 // A
19889 // | \
19890 // | C
19891 // | /|
19892 // |/ |
19893 // | |
19894 // | D
19895 // | /
19896 // E
19897 //
19898 // A: X = ...; Y = ...
19899 // D: empty
19900 // E: PHI [X, A], [X, C], [Y, D]
19901
19902 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19903 const DebugLoc &DL = First.getDebugLoc();
19904 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19905 MachineFunction *F = ThisMBB->getParent();
19906 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19907 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19908 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19909 MachineFunction::iterator It = ++ThisMBB->getIterator();
19910 F->insert(It, FirstMBB);
19911 F->insert(It, SecondMBB);
19912 F->insert(It, SinkMBB);
19913
19914 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19915 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19917 ThisMBB->end());
19918 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19919
19920 // Fallthrough block for ThisMBB.
19921 ThisMBB->addSuccessor(FirstMBB);
19922 // Fallthrough block for FirstMBB.
19923 FirstMBB->addSuccessor(SecondMBB);
19924 ThisMBB->addSuccessor(SinkMBB);
19925 FirstMBB->addSuccessor(SinkMBB);
19926 // This is fallthrough.
19927 SecondMBB->addSuccessor(SinkMBB);
19928
19929 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19930 Register FLHS = First.getOperand(1).getReg();
19931 Register FRHS = First.getOperand(2).getReg();
19932 // Insert appropriate branch.
19933 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19934 .addReg(FLHS)
19935 .addReg(FRHS)
19936 .addMBB(SinkMBB);
19937
19938 Register SLHS = Second.getOperand(1).getReg();
19939 Register SRHS = Second.getOperand(2).getReg();
19940 Register Op1Reg4 = First.getOperand(4).getReg();
19941 Register Op1Reg5 = First.getOperand(5).getReg();
19942
19943 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19944 // Insert appropriate branch.
19945 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19946 .addReg(SLHS)
19947 .addReg(SRHS)
19948 .addMBB(SinkMBB);
19949
19950 Register DestReg = Second.getOperand(0).getReg();
19951 Register Op2Reg4 = Second.getOperand(4).getReg();
19952 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19953 .addReg(Op2Reg4)
19954 .addMBB(ThisMBB)
19955 .addReg(Op1Reg4)
19956 .addMBB(FirstMBB)
19957 .addReg(Op1Reg5)
19958 .addMBB(SecondMBB);
19959
19960 // Now remove the Select_FPRX_s.
19961 First.eraseFromParent();
19962 Second.eraseFromParent();
19963 return SinkMBB;
19964}
19965
19968 const RISCVSubtarget &Subtarget) {
19969 // To "insert" Select_* instructions, we actually have to insert the triangle
19970 // control-flow pattern. The incoming instructions know the destination vreg
19971 // to set, the condition code register to branch on, the true/false values to
19972 // select between, and the condcode to use to select the appropriate branch.
19973 //
19974 // We produce the following control flow:
19975 // HeadMBB
19976 // | \
19977 // | IfFalseMBB
19978 // | /
19979 // TailMBB
19980 //
19981 // When we find a sequence of selects we attempt to optimize their emission
19982 // by sharing the control flow. Currently we only handle cases where we have
19983 // multiple selects with the exact same condition (same LHS, RHS and CC).
19984 // The selects may be interleaved with other instructions if the other
19985 // instructions meet some requirements we deem safe:
19986 // - They are not pseudo instructions.
19987 // - They are debug instructions. Otherwise,
19988 // - They do not have side-effects, do not access memory and their inputs do
19989 // not depend on the results of the select pseudo-instructions.
19990 // The TrueV/FalseV operands of the selects cannot depend on the result of
19991 // previous selects in the sequence.
19992 // These conditions could be further relaxed. See the X86 target for a
19993 // related approach and more information.
19994 //
19995 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19996 // is checked here and handled by a separate function -
19997 // EmitLoweredCascadedSelect.
19998
19999 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
20000 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
20001 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
20002 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
20003 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
20004 Next->getOperand(5).isKill())
20005 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
20006
20007 Register LHS = MI.getOperand(1).getReg();
20008 Register RHS;
20009 if (MI.getOperand(2).isReg())
20010 RHS = MI.getOperand(2).getReg();
20011 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
20012
20013 SmallVector<MachineInstr *, 4> SelectDebugValues;
20014 SmallSet<Register, 4> SelectDests;
20015 SelectDests.insert(MI.getOperand(0).getReg());
20016
20017 MachineInstr *LastSelectPseudo = &MI;
20018 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
20019 SequenceMBBI != E; ++SequenceMBBI) {
20020 if (SequenceMBBI->isDebugInstr())
20021 continue;
20022 if (isSelectPseudo(*SequenceMBBI)) {
20023 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
20024 !SequenceMBBI->getOperand(2).isReg() ||
20025 SequenceMBBI->getOperand(2).getReg() != RHS ||
20026 SequenceMBBI->getOperand(3).getImm() != CC ||
20027 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
20028 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
20029 break;
20030 LastSelectPseudo = &*SequenceMBBI;
20031 SequenceMBBI->collectDebugValues(SelectDebugValues);
20032 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
20033 continue;
20034 }
20035 if (SequenceMBBI->hasUnmodeledSideEffects() ||
20036 SequenceMBBI->mayLoadOrStore() ||
20037 SequenceMBBI->usesCustomInsertionHook())
20038 break;
20039 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
20040 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
20041 }))
20042 break;
20043 }
20044
20045 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
20046 const BasicBlock *LLVM_BB = BB->getBasicBlock();
20047 DebugLoc DL = MI.getDebugLoc();
20049
20050 MachineBasicBlock *HeadMBB = BB;
20051 MachineFunction *F = BB->getParent();
20052 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
20053 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
20054
20055 F->insert(I, IfFalseMBB);
20056 F->insert(I, TailMBB);
20057
20058 // Set the call frame size on entry to the new basic blocks.
20059 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
20060 IfFalseMBB->setCallFrameSize(CallFrameSize);
20061 TailMBB->setCallFrameSize(CallFrameSize);
20062
20063 // Transfer debug instructions associated with the selects to TailMBB.
20064 for (MachineInstr *DebugInstr : SelectDebugValues) {
20065 TailMBB->push_back(DebugInstr->removeFromParent());
20066 }
20067
20068 // Move all instructions after the sequence to TailMBB.
20069 TailMBB->splice(TailMBB->end(), HeadMBB,
20070 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
20071 // Update machine-CFG edges by transferring all successors of the current
20072 // block to the new block which will contain the Phi nodes for the selects.
20073 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
20074 // Set the successors for HeadMBB.
20075 HeadMBB->addSuccessor(IfFalseMBB);
20076 HeadMBB->addSuccessor(TailMBB);
20077
20078 // Insert appropriate branch.
20079 if (MI.getOperand(2).isImm())
20080 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
20081 .addReg(LHS)
20082 .addImm(MI.getOperand(2).getImm())
20083 .addMBB(TailMBB);
20084 else
20085 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
20086 .addReg(LHS)
20087 .addReg(RHS)
20088 .addMBB(TailMBB);
20089
20090 // IfFalseMBB just falls through to TailMBB.
20091 IfFalseMBB->addSuccessor(TailMBB);
20092
20093 // Create PHIs for all of the select pseudo-instructions.
20094 auto SelectMBBI = MI.getIterator();
20095 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
20096 auto InsertionPoint = TailMBB->begin();
20097 while (SelectMBBI != SelectEnd) {
20098 auto Next = std::next(SelectMBBI);
20099 if (isSelectPseudo(*SelectMBBI)) {
20100 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
20101 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
20102 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
20103 .addReg(SelectMBBI->getOperand(4).getReg())
20104 .addMBB(HeadMBB)
20105 .addReg(SelectMBBI->getOperand(5).getReg())
20106 .addMBB(IfFalseMBB);
20107 SelectMBBI->eraseFromParent();
20108 }
20109 SelectMBBI = Next;
20110 }
20111
20112 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
20113 return TailMBB;
20114}
20115
20116// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
20117static const RISCV::RISCVMaskedPseudoInfo *
20118lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
20120 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
20121 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
20123 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
20124 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
20125 return Masked;
20126}
20127
20130 unsigned CVTXOpc) {
20131 DebugLoc DL = MI.getDebugLoc();
20132
20134
20136 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
20137
20138 // Save the old value of FFLAGS.
20139 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
20140
20141 assert(MI.getNumOperands() == 7);
20142
20143 // Emit a VFCVT_X_F
20144 const TargetRegisterInfo *TRI =
20146 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
20147 Register Tmp = MRI.createVirtualRegister(RC);
20148 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
20149 .add(MI.getOperand(1))
20150 .add(MI.getOperand(2))
20151 .add(MI.getOperand(3))
20152 .add(MachineOperand::CreateImm(7)) // frm = DYN
20153 .add(MI.getOperand(4))
20154 .add(MI.getOperand(5))
20155 .add(MI.getOperand(6))
20156 .add(MachineOperand::CreateReg(RISCV::FRM,
20157 /*IsDef*/ false,
20158 /*IsImp*/ true));
20159
20160 // Emit a VFCVT_F_X
20161 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
20162 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
20163 // There is no E8 variant for VFCVT_F_X.
20164 assert(Log2SEW >= 4);
20165 unsigned CVTFOpc =
20166 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
20167 ->MaskedPseudo;
20168
20169 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
20170 .add(MI.getOperand(0))
20171 .add(MI.getOperand(1))
20172 .addReg(Tmp)
20173 .add(MI.getOperand(3))
20174 .add(MachineOperand::CreateImm(7)) // frm = DYN
20175 .add(MI.getOperand(4))
20176 .add(MI.getOperand(5))
20177 .add(MI.getOperand(6))
20178 .add(MachineOperand::CreateReg(RISCV::FRM,
20179 /*IsDef*/ false,
20180 /*IsImp*/ true));
20181
20182 // Restore FFLAGS.
20183 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
20184 .addReg(SavedFFLAGS, RegState::Kill);
20185
20186 // Erase the pseudoinstruction.
20187 MI.eraseFromParent();
20188 return BB;
20189}
20190
20192 const RISCVSubtarget &Subtarget) {
20193 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
20194 const TargetRegisterClass *RC;
20195 switch (MI.getOpcode()) {
20196 default:
20197 llvm_unreachable("Unexpected opcode");
20198 case RISCV::PseudoFROUND_H:
20199 CmpOpc = RISCV::FLT_H;
20200 F2IOpc = RISCV::FCVT_W_H;
20201 I2FOpc = RISCV::FCVT_H_W;
20202 FSGNJOpc = RISCV::FSGNJ_H;
20203 FSGNJXOpc = RISCV::FSGNJX_H;
20204 RC = &RISCV::FPR16RegClass;
20205 break;
20206 case RISCV::PseudoFROUND_H_INX:
20207 CmpOpc = RISCV::FLT_H_INX;
20208 F2IOpc = RISCV::FCVT_W_H_INX;
20209 I2FOpc = RISCV::FCVT_H_W_INX;
20210 FSGNJOpc = RISCV::FSGNJ_H_INX;
20211 FSGNJXOpc = RISCV::FSGNJX_H_INX;
20212 RC = &RISCV::GPRF16RegClass;
20213 break;
20214 case RISCV::PseudoFROUND_S:
20215 CmpOpc = RISCV::FLT_S;
20216 F2IOpc = RISCV::FCVT_W_S;
20217 I2FOpc = RISCV::FCVT_S_W;
20218 FSGNJOpc = RISCV::FSGNJ_S;
20219 FSGNJXOpc = RISCV::FSGNJX_S;
20220 RC = &RISCV::FPR32RegClass;
20221 break;
20222 case RISCV::PseudoFROUND_S_INX:
20223 CmpOpc = RISCV::FLT_S_INX;
20224 F2IOpc = RISCV::FCVT_W_S_INX;
20225 I2FOpc = RISCV::FCVT_S_W_INX;
20226 FSGNJOpc = RISCV::FSGNJ_S_INX;
20227 FSGNJXOpc = RISCV::FSGNJX_S_INX;
20228 RC = &RISCV::GPRF32RegClass;
20229 break;
20230 case RISCV::PseudoFROUND_D:
20231 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
20232 CmpOpc = RISCV::FLT_D;
20233 F2IOpc = RISCV::FCVT_L_D;
20234 I2FOpc = RISCV::FCVT_D_L;
20235 FSGNJOpc = RISCV::FSGNJ_D;
20236 FSGNJXOpc = RISCV::FSGNJX_D;
20237 RC = &RISCV::FPR64RegClass;
20238 break;
20239 case RISCV::PseudoFROUND_D_INX:
20240 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
20241 CmpOpc = RISCV::FLT_D_INX;
20242 F2IOpc = RISCV::FCVT_L_D_INX;
20243 I2FOpc = RISCV::FCVT_D_L_INX;
20244 FSGNJOpc = RISCV::FSGNJ_D_INX;
20245 FSGNJXOpc = RISCV::FSGNJX_D_INX;
20246 RC = &RISCV::GPRRegClass;
20247 break;
20248 }
20249
20250 const BasicBlock *BB = MBB->getBasicBlock();
20251 DebugLoc DL = MI.getDebugLoc();
20253
20255 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
20256 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
20257
20258 F->insert(I, CvtMBB);
20259 F->insert(I, DoneMBB);
20260 // Move all instructions after the sequence to DoneMBB.
20261 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
20262 MBB->end());
20263 // Update machine-CFG edges by transferring all successors of the current
20264 // block to the new block which will contain the Phi nodes for the selects.
20266 // Set the successors for MBB.
20267 MBB->addSuccessor(CvtMBB);
20268 MBB->addSuccessor(DoneMBB);
20269
20270 Register DstReg = MI.getOperand(0).getReg();
20271 Register SrcReg = MI.getOperand(1).getReg();
20272 Register MaxReg = MI.getOperand(2).getReg();
20273 int64_t FRM = MI.getOperand(3).getImm();
20274
20275 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
20277
20278 Register FabsReg = MRI.createVirtualRegister(RC);
20279 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
20280
20281 // Compare the FP value to the max value.
20282 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
20283 auto MIB =
20284 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
20287
20288 // Insert branch.
20289 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
20290 .addReg(CmpReg)
20291 .addReg(RISCV::X0)
20292 .addMBB(DoneMBB);
20293
20294 CvtMBB->addSuccessor(DoneMBB);
20295
20296 // Convert to integer.
20297 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
20298 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
20301
20302 // Convert back to FP.
20303 Register I2FReg = MRI.createVirtualRegister(RC);
20304 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
20307
20308 // Restore the sign bit.
20309 Register CvtReg = MRI.createVirtualRegister(RC);
20310 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
20311
20312 // Merge the results.
20313 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
20314 .addReg(SrcReg)
20315 .addMBB(MBB)
20316 .addReg(CvtReg)
20317 .addMBB(CvtMBB);
20318
20319 MI.eraseFromParent();
20320 return DoneMBB;
20321}
20322
20325 MachineBasicBlock *BB) const {
20326 switch (MI.getOpcode()) {
20327 default:
20328 llvm_unreachable("Unexpected instr type to insert");
20329 case RISCV::ReadCounterWide:
20330 assert(!Subtarget.is64Bit() &&
20331 "ReadCounterWide is only to be used on riscv32");
20332 return emitReadCounterWidePseudo(MI, BB);
20333 case RISCV::Select_GPR_Using_CC_GPR:
20334 case RISCV::Select_GPR_Using_CC_Imm:
20335 case RISCV::Select_FPR16_Using_CC_GPR:
20336 case RISCV::Select_FPR16INX_Using_CC_GPR:
20337 case RISCV::Select_FPR32_Using_CC_GPR:
20338 case RISCV::Select_FPR32INX_Using_CC_GPR:
20339 case RISCV::Select_FPR64_Using_CC_GPR:
20340 case RISCV::Select_FPR64INX_Using_CC_GPR:
20341 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
20342 return emitSelectPseudo(MI, BB, Subtarget);
20343 case RISCV::BuildPairF64Pseudo:
20344 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
20345 case RISCV::SplitF64Pseudo:
20346 return emitSplitF64Pseudo(MI, BB, Subtarget);
20347 case RISCV::PseudoQuietFLE_H:
20348 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
20349 case RISCV::PseudoQuietFLE_H_INX:
20350 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
20351 case RISCV::PseudoQuietFLT_H:
20352 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
20353 case RISCV::PseudoQuietFLT_H_INX:
20354 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
20355 case RISCV::PseudoQuietFLE_S:
20356 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
20357 case RISCV::PseudoQuietFLE_S_INX:
20358 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
20359 case RISCV::PseudoQuietFLT_S:
20360 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
20361 case RISCV::PseudoQuietFLT_S_INX:
20362 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
20363 case RISCV::PseudoQuietFLE_D:
20364 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
20365 case RISCV::PseudoQuietFLE_D_INX:
20366 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
20367 case RISCV::PseudoQuietFLE_D_IN32X:
20368 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
20369 Subtarget);
20370 case RISCV::PseudoQuietFLT_D:
20371 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
20372 case RISCV::PseudoQuietFLT_D_INX:
20373 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
20374 case RISCV::PseudoQuietFLT_D_IN32X:
20375 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
20376 Subtarget);
20377
20378 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
20379 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
20380 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
20381 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
20382 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
20383 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
20384 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
20385 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
20386 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
20387 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
20388 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
20389 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
20390 case RISCV::PseudoFROUND_H:
20391 case RISCV::PseudoFROUND_H_INX:
20392 case RISCV::PseudoFROUND_S:
20393 case RISCV::PseudoFROUND_S_INX:
20394 case RISCV::PseudoFROUND_D:
20395 case RISCV::PseudoFROUND_D_INX:
20396 case RISCV::PseudoFROUND_D_IN32X:
20397 return emitFROUND(MI, BB, Subtarget);
20398 case RISCV::PROBED_STACKALLOC_DYN:
20399 return emitDynamicProbedAlloc(MI, BB);
20400 case TargetOpcode::STATEPOINT:
20401 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
20402 // while jal call instruction (where statepoint will be lowered at the end)
20403 // has implicit def. This def is early-clobber as it will be set at
20404 // the moment of the call and earlier than any use is read.
20405 // Add this implicit dead def here as a workaround.
20406 MI.addOperand(*MI.getMF(),
20408 RISCV::X1, /*isDef*/ true,
20409 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
20410 /*isUndef*/ false, /*isEarlyClobber*/ true));
20411 [[fallthrough]];
20412 case TargetOpcode::STACKMAP:
20413 case TargetOpcode::PATCHPOINT:
20414 if (!Subtarget.is64Bit())
20415 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
20416 "supported on 64-bit targets");
20417 return emitPatchPoint(MI, BB);
20418 }
20419}
20420
20422 SDNode *Node) const {
20423 // Add FRM dependency to any instructions with dynamic rounding mode.
20424 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
20425 if (Idx < 0) {
20426 // Vector pseudos have FRM index indicated by TSFlags.
20427 Idx = RISCVII::getFRMOpNum(MI.getDesc());
20428 if (Idx < 0)
20429 return;
20430 }
20431 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
20432 return;
20433 // If the instruction already reads FRM, don't add another read.
20434 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
20435 return;
20436 MI.addOperand(
20437 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
20438}
20439
20440void RISCVTargetLowering::analyzeInputArgs(
20441 MachineFunction &MF, CCState &CCInfo,
20442 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
20443 RISCVCCAssignFn Fn) const {
20444 unsigned NumArgs = Ins.size();
20446
20447 for (unsigned i = 0; i != NumArgs; ++i) {
20448 MVT ArgVT = Ins[i].VT;
20449 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
20450
20451 Type *ArgTy = nullptr;
20452 if (IsRet)
20453 ArgTy = FType->getReturnType();
20454 else if (Ins[i].isOrigArg())
20455 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
20456
20457 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
20458 /*IsFixed=*/true, IsRet, ArgTy)) {
20459 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
20460 << ArgVT << '\n');
20461 llvm_unreachable(nullptr);
20462 }
20463 }
20464}
20465
20466void RISCVTargetLowering::analyzeOutputArgs(
20467 MachineFunction &MF, CCState &CCInfo,
20468 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
20469 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
20470 unsigned NumArgs = Outs.size();
20471
20472 for (unsigned i = 0; i != NumArgs; i++) {
20473 MVT ArgVT = Outs[i].VT;
20474 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20475 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
20476
20477 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
20478 Outs[i].IsFixed, IsRet, OrigTy)) {
20479 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
20480 << ArgVT << "\n");
20481 llvm_unreachable(nullptr);
20482 }
20483 }
20484}
20485
20486// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
20487// values.
20489 const CCValAssign &VA, const SDLoc &DL,
20490 const RISCVSubtarget &Subtarget) {
20491 if (VA.needsCustom()) {
20492 if (VA.getLocVT().isInteger() &&
20493 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20494 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
20495 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
20496 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
20498 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
20499 llvm_unreachable("Unexpected Custom handling.");
20500 }
20501
20502 switch (VA.getLocInfo()) {
20503 default:
20504 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20505 case CCValAssign::Full:
20506 break;
20507 case CCValAssign::BCvt:
20508 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
20509 break;
20510 }
20511 return Val;
20512}
20513
20514// The caller is responsible for loading the full value if the argument is
20515// passed with CCValAssign::Indirect.
20517 const CCValAssign &VA, const SDLoc &DL,
20518 const ISD::InputArg &In,
20519 const RISCVTargetLowering &TLI) {
20522 EVT LocVT = VA.getLocVT();
20523 SDValue Val;
20524 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
20525 Register VReg = RegInfo.createVirtualRegister(RC);
20526 RegInfo.addLiveIn(VA.getLocReg(), VReg);
20527 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
20528
20529 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
20530 if (In.isOrigArg()) {
20531 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
20532 if (OrigArg->getType()->isIntegerTy()) {
20533 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
20534 // An input zero extended from i31 can also be considered sign extended.
20535 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
20536 (BitWidth < 32 && In.Flags.isZExt())) {
20538 RVFI->addSExt32Register(VReg);
20539 }
20540 }
20541 }
20542
20544 return Val;
20545
20546 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
20547}
20548
20550 const CCValAssign &VA, const SDLoc &DL,
20551 const RISCVSubtarget &Subtarget) {
20552 EVT LocVT = VA.getLocVT();
20553
20554 if (VA.needsCustom()) {
20555 if (LocVT.isInteger() &&
20556 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20557 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
20558 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
20559 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
20560 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
20561 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
20562 llvm_unreachable("Unexpected Custom handling.");
20563 }
20564
20565 switch (VA.getLocInfo()) {
20566 default:
20567 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20568 case CCValAssign::Full:
20569 break;
20570 case CCValAssign::BCvt:
20571 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
20572 break;
20573 }
20574 return Val;
20575}
20576
20577// The caller is responsible for loading the full value if the argument is
20578// passed with CCValAssign::Indirect.
20580 const CCValAssign &VA, const SDLoc &DL) {
20582 MachineFrameInfo &MFI = MF.getFrameInfo();
20583 EVT LocVT = VA.getLocVT();
20584 EVT ValVT = VA.getValVT();
20586 if (VA.getLocInfo() == CCValAssign::Indirect) {
20587 // When the value is a scalable vector, we save the pointer which points to
20588 // the scalable vector value in the stack. The ValVT will be the pointer
20589 // type, instead of the scalable vector type.
20590 ValVT = LocVT;
20591 }
20592 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20593 /*IsImmutable=*/true);
20594 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20595 SDValue Val;
20596
20598 switch (VA.getLocInfo()) {
20599 default:
20600 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20601 case CCValAssign::Full:
20603 case CCValAssign::BCvt:
20604 break;
20605 }
20606 Val = DAG.getExtLoad(
20607 ExtType, DL, LocVT, Chain, FIN,
20609 return Val;
20610}
20611
20613 const CCValAssign &VA,
20614 const CCValAssign &HiVA,
20615 const SDLoc &DL) {
20616 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20617 "Unexpected VA");
20619 MachineFrameInfo &MFI = MF.getFrameInfo();
20621
20622 assert(VA.isRegLoc() && "Expected register VA assignment");
20623
20624 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20625 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20626 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20627 SDValue Hi;
20628 if (HiVA.isMemLoc()) {
20629 // Second half of f64 is passed on the stack.
20630 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20631 /*IsImmutable=*/true);
20632 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20633 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20635 } else {
20636 // Second half of f64 is passed in another GPR.
20637 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20638 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20639 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20640 }
20641 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20642}
20643
20644// Transform physical registers into virtual registers.
20646 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20647 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20648 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20649
20651
20652 switch (CallConv) {
20653 default:
20654 report_fatal_error("Unsupported calling convention");
20655 case CallingConv::C:
20656 case CallingConv::Fast:
20658 case CallingConv::GRAAL:
20660 break;
20661 case CallingConv::GHC:
20662 if (Subtarget.hasStdExtE())
20663 report_fatal_error("GHC calling convention is not supported on RVE!");
20664 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20665 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20666 "(Zdinx/D) instruction set extensions");
20667 }
20668
20669 const Function &Func = MF.getFunction();
20670 if (Func.hasFnAttribute("interrupt")) {
20671 if (!Func.arg_empty())
20673 "Functions with the interrupt attribute cannot have arguments!");
20674
20675 StringRef Kind =
20676 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20677
20678 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20680 "Function interrupt attribute argument not supported!");
20681 }
20682
20683 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20684 MVT XLenVT = Subtarget.getXLenVT();
20685 unsigned XLenInBytes = Subtarget.getXLen() / 8;
20686 // Used with vargs to accumulate store chains.
20687 std::vector<SDValue> OutChains;
20688
20689 // Assign locations to all of the incoming arguments.
20691 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20692
20693 if (CallConv == CallingConv::GHC)
20695 else
20696 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20698 : CC_RISCV);
20699
20700 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20701 CCValAssign &VA = ArgLocs[i];
20702 SDValue ArgValue;
20703 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20704 // case.
20705 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20706 assert(VA.needsCustom());
20707 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20708 } else if (VA.isRegLoc())
20709 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20710 else
20711 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20712
20713 if (VA.getLocInfo() == CCValAssign::Indirect) {
20714 // If the original argument was split and passed by reference (e.g. i128
20715 // on RV32), we need to load all parts of it here (using the same
20716 // address). Vectors may be partly split to registers and partly to the
20717 // stack, in which case the base address is partly offset and subsequent
20718 // stores are relative to that.
20719 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20721 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20722 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20723 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20724 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20725 CCValAssign &PartVA = ArgLocs[i + 1];
20726 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20727 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20728 if (PartVA.getValVT().isScalableVector())
20729 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20730 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20731 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20733 ++i;
20734 ++InsIdx;
20735 }
20736 continue;
20737 }
20738 InVals.push_back(ArgValue);
20739 }
20740
20741 if (any_of(ArgLocs,
20742 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20743 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20744
20745 if (IsVarArg) {
20746 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20747 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20748 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20749 MachineFrameInfo &MFI = MF.getFrameInfo();
20750 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20752
20753 // Size of the vararg save area. For now, the varargs save area is either
20754 // zero or large enough to hold a0-a7.
20755 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20756 int FI;
20757
20758 // If all registers are allocated, then all varargs must be passed on the
20759 // stack and we don't need to save any argregs.
20760 if (VarArgsSaveSize == 0) {
20761 int VaArgOffset = CCInfo.getStackSize();
20762 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20763 } else {
20764 int VaArgOffset = -VarArgsSaveSize;
20765 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20766
20767 // If saving an odd number of registers then create an extra stack slot to
20768 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20769 // offsets to even-numbered registered remain 2*XLEN-aligned.
20770 if (Idx % 2) {
20772 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20773 VarArgsSaveSize += XLenInBytes;
20774 }
20775
20776 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20777
20778 // Copy the integer registers that may have been used for passing varargs
20779 // to the vararg save area.
20780 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20781 const Register Reg = RegInfo.createVirtualRegister(RC);
20782 RegInfo.addLiveIn(ArgRegs[I], Reg);
20783 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20784 SDValue Store = DAG.getStore(
20785 Chain, DL, ArgValue, FIN,
20786 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20787 OutChains.push_back(Store);
20788 FIN =
20789 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20790 }
20791 }
20792
20793 // Record the frame index of the first variable argument
20794 // which is a value necessary to VASTART.
20795 RVFI->setVarArgsFrameIndex(FI);
20796 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20797 }
20798
20799 // All stores are grouped in one node to allow the matching between
20800 // the size of Ins and InVals. This only happens for vararg functions.
20801 if (!OutChains.empty()) {
20802 OutChains.push_back(Chain);
20803 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20804 }
20805
20806 return Chain;
20807}
20808
20809/// isEligibleForTailCallOptimization - Check whether the call is eligible
20810/// for tail call optimization.
20811/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20812bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20813 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20814 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20815
20816 auto CalleeCC = CLI.CallConv;
20817 auto &Outs = CLI.Outs;
20818 auto &Caller = MF.getFunction();
20819 auto CallerCC = Caller.getCallingConv();
20820
20821 // Exception-handling functions need a special set of instructions to
20822 // indicate a return to the hardware. Tail-calling another function would
20823 // probably break this.
20824 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20825 // should be expanded as new function attributes are introduced.
20826 if (Caller.hasFnAttribute("interrupt"))
20827 return false;
20828
20829 // Do not tail call opt if the stack is used to pass parameters.
20830 if (CCInfo.getStackSize() != 0)
20831 return false;
20832
20833 // Do not tail call opt if any parameters need to be passed indirectly.
20834 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20835 // passed indirectly. So the address of the value will be passed in a
20836 // register, or if not available, then the address is put on the stack. In
20837 // order to pass indirectly, space on the stack often needs to be allocated
20838 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20839 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20840 // are passed CCValAssign::Indirect.
20841 for (auto &VA : ArgLocs)
20842 if (VA.getLocInfo() == CCValAssign::Indirect)
20843 return false;
20844
20845 // Do not tail call opt if either caller or callee uses struct return
20846 // semantics.
20847 auto IsCallerStructRet = Caller.hasStructRetAttr();
20848 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20849 if (IsCallerStructRet || IsCalleeStructRet)
20850 return false;
20851
20852 // The callee has to preserve all registers the caller needs to preserve.
20853 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20854 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20855 if (CalleeCC != CallerCC) {
20856 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20857 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20858 return false;
20859 }
20860
20861 // Byval parameters hand the function a pointer directly into the stack area
20862 // we want to reuse during a tail call. Working around this *is* possible
20863 // but less efficient and uglier in LowerCall.
20864 for (auto &Arg : Outs)
20865 if (Arg.Flags.isByVal())
20866 return false;
20867
20868 return true;
20869}
20870
20872 return DAG.getDataLayout().getPrefTypeAlign(
20873 VT.getTypeForEVT(*DAG.getContext()));
20874}
20875
20876// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20877// and output parameter nodes.
20879 SmallVectorImpl<SDValue> &InVals) const {
20880 SelectionDAG &DAG = CLI.DAG;
20881 SDLoc &DL = CLI.DL;
20883 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20885 SDValue Chain = CLI.Chain;
20886 SDValue Callee = CLI.Callee;
20887 bool &IsTailCall = CLI.IsTailCall;
20888 CallingConv::ID CallConv = CLI.CallConv;
20889 bool IsVarArg = CLI.IsVarArg;
20890 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20891 MVT XLenVT = Subtarget.getXLenVT();
20892
20894
20895 // Analyze the operands of the call, assigning locations to each operand.
20897 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20898
20899 if (CallConv == CallingConv::GHC) {
20900 if (Subtarget.hasStdExtE())
20901 report_fatal_error("GHC calling convention is not supported on RVE!");
20902 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20903 } else
20904 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20906 : CC_RISCV);
20907
20908 // Check if it's really possible to do a tail call.
20909 if (IsTailCall)
20910 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20911
20912 if (IsTailCall)
20913 ++NumTailCalls;
20914 else if (CLI.CB && CLI.CB->isMustTailCall())
20915 report_fatal_error("failed to perform tail call elimination on a call "
20916 "site marked musttail");
20917
20918 // Get a count of how many bytes are to be pushed on the stack.
20919 unsigned NumBytes = ArgCCInfo.getStackSize();
20920
20921 // Create local copies for byval args
20922 SmallVector<SDValue, 8> ByValArgs;
20923 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20924 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20925 if (!Flags.isByVal())
20926 continue;
20927
20928 SDValue Arg = OutVals[i];
20929 unsigned Size = Flags.getByValSize();
20930 Align Alignment = Flags.getNonZeroByValAlign();
20931
20932 int FI =
20933 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20934 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20935 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20936
20937 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20938 /*IsVolatile=*/false,
20939 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20941 ByValArgs.push_back(FIPtr);
20942 }
20943
20944 if (!IsTailCall)
20945 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20946
20947 // Copy argument values to their designated locations.
20949 SmallVector<SDValue, 8> MemOpChains;
20950 SDValue StackPtr;
20951 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20952 ++i, ++OutIdx) {
20953 CCValAssign &VA = ArgLocs[i];
20954 SDValue ArgValue = OutVals[OutIdx];
20955 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20956
20957 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20958 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20959 assert(VA.isRegLoc() && "Expected register VA assignment");
20960 assert(VA.needsCustom());
20961 SDValue SplitF64 = DAG.getNode(
20962 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20963 SDValue Lo = SplitF64.getValue(0);
20964 SDValue Hi = SplitF64.getValue(1);
20965
20966 Register RegLo = VA.getLocReg();
20967 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20968
20969 // Get the CCValAssign for the Hi part.
20970 CCValAssign &HiVA = ArgLocs[++i];
20971
20972 if (HiVA.isMemLoc()) {
20973 // Second half of f64 is passed on the stack.
20974 if (!StackPtr.getNode())
20975 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20977 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20978 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20979 // Emit the store.
20980 MemOpChains.push_back(DAG.getStore(
20981 Chain, DL, Hi, Address,
20983 } else {
20984 // Second half of f64 is passed in another GPR.
20985 Register RegHigh = HiVA.getLocReg();
20986 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20987 }
20988 continue;
20989 }
20990
20991 // Promote the value if needed.
20992 // For now, only handle fully promoted and indirect arguments.
20993 if (VA.getLocInfo() == CCValAssign::Indirect) {
20994 // Store the argument in a stack slot and pass its address.
20995 Align StackAlign =
20996 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20997 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20998 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20999 // If the original argument was split (e.g. i128), we need
21000 // to store the required parts of it here (and pass just one address).
21001 // Vectors may be partly split to registers and partly to the stack, in
21002 // which case the base address is partly offset and subsequent stores are
21003 // relative to that.
21004 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
21005 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
21006 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
21007 // Calculate the total size to store. We don't have access to what we're
21008 // actually storing other than performing the loop and collecting the
21009 // info.
21011 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
21012 SDValue PartValue = OutVals[OutIdx + 1];
21013 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
21014 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
21015 EVT PartVT = PartValue.getValueType();
21016 if (PartVT.isScalableVector())
21017 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
21018 StoredSize += PartVT.getStoreSize();
21019 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
21020 Parts.push_back(std::make_pair(PartValue, Offset));
21021 ++i;
21022 ++OutIdx;
21023 }
21024 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
21025 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
21026 MemOpChains.push_back(
21027 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
21029 for (const auto &Part : Parts) {
21030 SDValue PartValue = Part.first;
21031 SDValue PartOffset = Part.second;
21033 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
21034 MemOpChains.push_back(
21035 DAG.getStore(Chain, DL, PartValue, Address,
21037 }
21038 ArgValue = SpillSlot;
21039 } else {
21040 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
21041 }
21042
21043 // Use local copy if it is a byval arg.
21044 if (Flags.isByVal())
21045 ArgValue = ByValArgs[j++];
21046
21047 if (VA.isRegLoc()) {
21048 // Queue up the argument copies and emit them at the end.
21049 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
21050 } else {
21051 assert(VA.isMemLoc() && "Argument not register or memory");
21052 assert(!IsTailCall && "Tail call not allowed if stack is used "
21053 "for passing parameters");
21054
21055 // Work out the address of the stack slot.
21056 if (!StackPtr.getNode())
21057 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
21059 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
21061
21062 // Emit the store.
21063 MemOpChains.push_back(
21064 DAG.getStore(Chain, DL, ArgValue, Address,
21066 }
21067 }
21068
21069 // Join the stores, which are independent of one another.
21070 if (!MemOpChains.empty())
21071 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
21072
21073 SDValue Glue;
21074
21075 // Build a sequence of copy-to-reg nodes, chained and glued together.
21076 for (auto &Reg : RegsToPass) {
21077 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
21078 Glue = Chain.getValue(1);
21079 }
21080
21081 // Validate that none of the argument registers have been marked as
21082 // reserved, if so report an error. Do the same for the return address if this
21083 // is not a tailcall.
21084 validateCCReservedRegs(RegsToPass, MF);
21085 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
21087 MF.getFunction(),
21088 "Return address register required, but has been reserved."});
21089
21090 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
21091 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
21092 // split it and then direct call can be matched by PseudoCALL.
21093 bool CalleeIsLargeExternalSymbol = false;
21095 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
21096 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
21097 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
21098 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
21099 CalleeIsLargeExternalSymbol = true;
21100 }
21101 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
21102 const GlobalValue *GV = S->getGlobal();
21103 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
21104 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
21105 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
21106 }
21107
21108 // The first call operand is the chain and the second is the target address.
21110 Ops.push_back(Chain);
21111 Ops.push_back(Callee);
21112
21113 // Add argument registers to the end of the list so that they are
21114 // known live into the call.
21115 for (auto &Reg : RegsToPass)
21116 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
21117
21118 // Add a register mask operand representing the call-preserved registers.
21119 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
21120 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
21121 assert(Mask && "Missing call preserved mask for calling convention");
21122 Ops.push_back(DAG.getRegisterMask(Mask));
21123
21124 // Glue the call to the argument copies, if any.
21125 if (Glue.getNode())
21126 Ops.push_back(Glue);
21127
21128 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
21129 "Unexpected CFI type for a direct call");
21130
21131 // Emit the call.
21132 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
21133
21134 // Use software guarded branch for large code model non-indirect calls
21135 // Tail call to external symbol will have a null CLI.CB and we need another
21136 // way to determine the callsite type
21137 bool NeedSWGuarded = false;
21139 Subtarget.hasStdExtZicfilp() &&
21140 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
21141 NeedSWGuarded = true;
21142
21143 if (IsTailCall) {
21145 unsigned CallOpc =
21146 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
21147 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
21148 if (CLI.CFIType)
21149 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
21150 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
21151 return Ret;
21152 }
21153
21154 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
21155 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
21156 if (CLI.CFIType)
21157 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
21158 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
21159 Glue = Chain.getValue(1);
21160
21161 // Mark the end of the call, which is glued to the call itself.
21162 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
21163 Glue = Chain.getValue(1);
21164
21165 // Assign locations to each value returned by this call.
21167 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
21168 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
21169
21170 // Copy all of the result registers out of their specified physreg.
21171 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
21172 auto &VA = RVLocs[i];
21173 // Copy the value out
21174 SDValue RetValue =
21175 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
21176 // Glue the RetValue to the end of the call sequence
21177 Chain = RetValue.getValue(1);
21178 Glue = RetValue.getValue(2);
21179
21180 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
21181 assert(VA.needsCustom());
21182 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
21183 MVT::i32, Glue);
21184 Chain = RetValue2.getValue(1);
21185 Glue = RetValue2.getValue(2);
21186 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
21187 RetValue2);
21188 } else
21189 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
21190
21191 InVals.push_back(RetValue);
21192 }
21193
21194 return Chain;
21195}
21196
21198 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
21199 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
21200 const Type *RetTy) const {
21202 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
21203
21204 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
21205 MVT VT = Outs[i].VT;
21206 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
21207 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
21208 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
21209 return false;
21210 }
21211 return true;
21212}
21213
21214SDValue
21216 bool IsVarArg,
21218 const SmallVectorImpl<SDValue> &OutVals,
21219 const SDLoc &DL, SelectionDAG &DAG) const {
21221 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
21222
21223 // Stores the assignment of the return value to a location.
21225
21226 // Info about the registers and stack slot.
21227 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
21228 *DAG.getContext());
21229
21230 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
21231 nullptr, CC_RISCV);
21232
21233 if (CallConv == CallingConv::GHC && !RVLocs.empty())
21234 report_fatal_error("GHC functions return void only");
21235
21236 SDValue Glue;
21237 SmallVector<SDValue, 4> RetOps(1, Chain);
21238
21239 // Copy the result values into the output registers.
21240 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
21241 SDValue Val = OutVals[OutIdx];
21242 CCValAssign &VA = RVLocs[i];
21243 assert(VA.isRegLoc() && "Can only return in registers!");
21244
21245 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
21246 // Handle returning f64 on RV32D with a soft float ABI.
21247 assert(VA.isRegLoc() && "Expected return via registers");
21248 assert(VA.needsCustom());
21249 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
21250 DAG.getVTList(MVT::i32, MVT::i32), Val);
21251 SDValue Lo = SplitF64.getValue(0);
21252 SDValue Hi = SplitF64.getValue(1);
21253 Register RegLo = VA.getLocReg();
21254 Register RegHi = RVLocs[++i].getLocReg();
21255
21256 if (STI.isRegisterReservedByUser(RegLo) ||
21257 STI.isRegisterReservedByUser(RegHi))
21259 MF.getFunction(),
21260 "Return value register required, but has been reserved."});
21261
21262 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
21263 Glue = Chain.getValue(1);
21264 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
21265 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
21266 Glue = Chain.getValue(1);
21267 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
21268 } else {
21269 // Handle a 'normal' return.
21270 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
21271 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
21272
21273 if (STI.isRegisterReservedByUser(VA.getLocReg()))
21275 MF.getFunction(),
21276 "Return value register required, but has been reserved."});
21277
21278 // Guarantee that all emitted copies are stuck together.
21279 Glue = Chain.getValue(1);
21280 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
21281 }
21282 }
21283
21284 RetOps[0] = Chain; // Update chain.
21285
21286 // Add the glue node if we have it.
21287 if (Glue.getNode()) {
21288 RetOps.push_back(Glue);
21289 }
21290
21291 if (any_of(RVLocs,
21292 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
21293 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
21294
21295 unsigned RetOpc = RISCVISD::RET_GLUE;
21296 // Interrupt service routines use different return instructions.
21297 const Function &Func = DAG.getMachineFunction().getFunction();
21298 if (Func.hasFnAttribute("interrupt")) {
21299 if (!Func.getReturnType()->isVoidTy())
21301 "Functions with the interrupt attribute must have void return type!");
21302
21304 StringRef Kind =
21305 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
21306
21307 if (Kind == "supervisor")
21308 RetOpc = RISCVISD::SRET_GLUE;
21309 else
21310 RetOpc = RISCVISD::MRET_GLUE;
21311 }
21312
21313 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
21314}
21315
21316void RISCVTargetLowering::validateCCReservedRegs(
21317 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
21318 MachineFunction &MF) const {
21319 const Function &F = MF.getFunction();
21320 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
21321
21322 if (llvm::any_of(Regs, [&STI](auto Reg) {
21323 return STI.isRegisterReservedByUser(Reg.first);
21324 }))
21325 F.getContext().diagnose(DiagnosticInfoUnsupported{
21326 F, "Argument register required, but has been reserved."});
21327}
21328
21329// Check if the result of the node is only used as a return value, as
21330// otherwise we can't perform a tail-call.
21332 if (N->getNumValues() != 1)
21333 return false;
21334 if (!N->hasNUsesOfValue(1, 0))
21335 return false;
21336
21337 SDNode *Copy = *N->user_begin();
21338
21339 if (Copy->getOpcode() == ISD::BITCAST) {
21340 return isUsedByReturnOnly(Copy, Chain);
21341 }
21342
21343 // TODO: Handle additional opcodes in order to support tail-calling libcalls
21344 // with soft float ABIs.
21345 if (Copy->getOpcode() != ISD::CopyToReg) {
21346 return false;
21347 }
21348
21349 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
21350 // isn't safe to perform a tail call.
21351 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
21352 return false;
21353
21354 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
21355 bool HasRet = false;
21356 for (SDNode *Node : Copy->users()) {
21357 if (Node->getOpcode() != RISCVISD::RET_GLUE)
21358 return false;
21359 HasRet = true;
21360 }
21361 if (!HasRet)
21362 return false;
21363
21364 Chain = Copy->getOperand(0);
21365 return true;
21366}
21367
21369 return CI->isTailCall();
21370}
21371
21372const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
21373#define NODE_NAME_CASE(NODE) \
21374 case RISCVISD::NODE: \
21375 return "RISCVISD::" #NODE;
21376 // clang-format off
21377 switch ((RISCVISD::NodeType)Opcode) {
21379 break;
21380 NODE_NAME_CASE(RET_GLUE)
21381 NODE_NAME_CASE(SRET_GLUE)
21382 NODE_NAME_CASE(MRET_GLUE)
21383 NODE_NAME_CASE(CALL)
21384 NODE_NAME_CASE(TAIL)
21385 NODE_NAME_CASE(SELECT_CC)
21386 NODE_NAME_CASE(BR_CC)
21387 NODE_NAME_CASE(BuildGPRPair)
21388 NODE_NAME_CASE(SplitGPRPair)
21389 NODE_NAME_CASE(BuildPairF64)
21390 NODE_NAME_CASE(SplitF64)
21391 NODE_NAME_CASE(ADD_LO)
21392 NODE_NAME_CASE(HI)
21393 NODE_NAME_CASE(LLA)
21394 NODE_NAME_CASE(ADD_TPREL)
21395 NODE_NAME_CASE(MULHSU)
21396 NODE_NAME_CASE(SHL_ADD)
21397 NODE_NAME_CASE(SLLW)
21398 NODE_NAME_CASE(SRAW)
21399 NODE_NAME_CASE(SRLW)
21400 NODE_NAME_CASE(DIVW)
21401 NODE_NAME_CASE(DIVUW)
21402 NODE_NAME_CASE(REMUW)
21403 NODE_NAME_CASE(ROLW)
21404 NODE_NAME_CASE(RORW)
21405 NODE_NAME_CASE(CLZW)
21406 NODE_NAME_CASE(CTZW)
21407 NODE_NAME_CASE(ABSW)
21408 NODE_NAME_CASE(FMV_H_X)
21409 NODE_NAME_CASE(FMV_X_ANYEXTH)
21410 NODE_NAME_CASE(FMV_X_SIGNEXTH)
21411 NODE_NAME_CASE(FMV_W_X_RV64)
21412 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
21413 NODE_NAME_CASE(FCVT_X)
21414 NODE_NAME_CASE(FCVT_XU)
21415 NODE_NAME_CASE(FCVT_W_RV64)
21416 NODE_NAME_CASE(FCVT_WU_RV64)
21417 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
21418 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
21419 NODE_NAME_CASE(FROUND)
21420 NODE_NAME_CASE(FCLASS)
21421 NODE_NAME_CASE(FSGNJX)
21422 NODE_NAME_CASE(FMAX)
21423 NODE_NAME_CASE(FMIN)
21424 NODE_NAME_CASE(FLI)
21425 NODE_NAME_CASE(READ_COUNTER_WIDE)
21426 NODE_NAME_CASE(BREV8)
21427 NODE_NAME_CASE(ORC_B)
21428 NODE_NAME_CASE(ZIP)
21429 NODE_NAME_CASE(UNZIP)
21430 NODE_NAME_CASE(CLMUL)
21431 NODE_NAME_CASE(CLMULH)
21432 NODE_NAME_CASE(CLMULR)
21433 NODE_NAME_CASE(MOPR)
21434 NODE_NAME_CASE(MOPRR)
21435 NODE_NAME_CASE(SHA256SIG0)
21436 NODE_NAME_CASE(SHA256SIG1)
21437 NODE_NAME_CASE(SHA256SUM0)
21438 NODE_NAME_CASE(SHA256SUM1)
21439 NODE_NAME_CASE(SM4KS)
21440 NODE_NAME_CASE(SM4ED)
21441 NODE_NAME_CASE(SM3P0)
21442 NODE_NAME_CASE(SM3P1)
21443 NODE_NAME_CASE(TH_LWD)
21444 NODE_NAME_CASE(TH_LWUD)
21445 NODE_NAME_CASE(TH_LDD)
21446 NODE_NAME_CASE(TH_SWD)
21447 NODE_NAME_CASE(TH_SDD)
21448 NODE_NAME_CASE(VMV_V_V_VL)
21449 NODE_NAME_CASE(VMV_V_X_VL)
21450 NODE_NAME_CASE(VFMV_V_F_VL)
21451 NODE_NAME_CASE(VMV_X_S)
21452 NODE_NAME_CASE(VMV_S_X_VL)
21453 NODE_NAME_CASE(VFMV_S_F_VL)
21454 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
21455 NODE_NAME_CASE(READ_VLENB)
21456 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
21457 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
21458 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
21459 NODE_NAME_CASE(VSLIDEUP_VL)
21460 NODE_NAME_CASE(VSLIDE1UP_VL)
21461 NODE_NAME_CASE(VSLIDEDOWN_VL)
21462 NODE_NAME_CASE(VSLIDE1DOWN_VL)
21463 NODE_NAME_CASE(VFSLIDE1UP_VL)
21464 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
21465 NODE_NAME_CASE(VID_VL)
21466 NODE_NAME_CASE(VFNCVT_ROD_VL)
21467 NODE_NAME_CASE(VECREDUCE_ADD_VL)
21468 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
21469 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
21470 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
21471 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
21472 NODE_NAME_CASE(VECREDUCE_AND_VL)
21473 NODE_NAME_CASE(VECREDUCE_OR_VL)
21474 NODE_NAME_CASE(VECREDUCE_XOR_VL)
21475 NODE_NAME_CASE(VECREDUCE_FADD_VL)
21476 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
21477 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
21478 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
21479 NODE_NAME_CASE(ADD_VL)
21480 NODE_NAME_CASE(AND_VL)
21481 NODE_NAME_CASE(MUL_VL)
21482 NODE_NAME_CASE(OR_VL)
21483 NODE_NAME_CASE(SDIV_VL)
21484 NODE_NAME_CASE(SHL_VL)
21485 NODE_NAME_CASE(SREM_VL)
21486 NODE_NAME_CASE(SRA_VL)
21487 NODE_NAME_CASE(SRL_VL)
21488 NODE_NAME_CASE(ROTL_VL)
21489 NODE_NAME_CASE(ROTR_VL)
21490 NODE_NAME_CASE(SUB_VL)
21491 NODE_NAME_CASE(UDIV_VL)
21492 NODE_NAME_CASE(UREM_VL)
21493 NODE_NAME_CASE(XOR_VL)
21494 NODE_NAME_CASE(AVGFLOORS_VL)
21495 NODE_NAME_CASE(AVGFLOORU_VL)
21496 NODE_NAME_CASE(AVGCEILS_VL)
21497 NODE_NAME_CASE(AVGCEILU_VL)
21498 NODE_NAME_CASE(SADDSAT_VL)
21499 NODE_NAME_CASE(UADDSAT_VL)
21500 NODE_NAME_CASE(SSUBSAT_VL)
21501 NODE_NAME_CASE(USUBSAT_VL)
21502 NODE_NAME_CASE(FADD_VL)
21503 NODE_NAME_CASE(FSUB_VL)
21504 NODE_NAME_CASE(FMUL_VL)
21505 NODE_NAME_CASE(FDIV_VL)
21506 NODE_NAME_CASE(FNEG_VL)
21507 NODE_NAME_CASE(FABS_VL)
21508 NODE_NAME_CASE(FSQRT_VL)
21509 NODE_NAME_CASE(FCLASS_VL)
21510 NODE_NAME_CASE(VFMADD_VL)
21511 NODE_NAME_CASE(VFNMADD_VL)
21512 NODE_NAME_CASE(VFMSUB_VL)
21513 NODE_NAME_CASE(VFNMSUB_VL)
21514 NODE_NAME_CASE(VFWMADD_VL)
21515 NODE_NAME_CASE(VFWNMADD_VL)
21516 NODE_NAME_CASE(VFWMSUB_VL)
21517 NODE_NAME_CASE(VFWNMSUB_VL)
21518 NODE_NAME_CASE(FCOPYSIGN_VL)
21519 NODE_NAME_CASE(SMIN_VL)
21520 NODE_NAME_CASE(SMAX_VL)
21521 NODE_NAME_CASE(UMIN_VL)
21522 NODE_NAME_CASE(UMAX_VL)
21523 NODE_NAME_CASE(BITREVERSE_VL)
21524 NODE_NAME_CASE(BSWAP_VL)
21525 NODE_NAME_CASE(CTLZ_VL)
21526 NODE_NAME_CASE(CTTZ_VL)
21527 NODE_NAME_CASE(CTPOP_VL)
21528 NODE_NAME_CASE(VFMIN_VL)
21529 NODE_NAME_CASE(VFMAX_VL)
21530 NODE_NAME_CASE(MULHS_VL)
21531 NODE_NAME_CASE(MULHU_VL)
21532 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
21533 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
21534 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
21535 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
21536 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
21537 NODE_NAME_CASE(SINT_TO_FP_VL)
21538 NODE_NAME_CASE(UINT_TO_FP_VL)
21539 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
21540 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
21541 NODE_NAME_CASE(FP_EXTEND_VL)
21542 NODE_NAME_CASE(FP_ROUND_VL)
21543 NODE_NAME_CASE(STRICT_FADD_VL)
21544 NODE_NAME_CASE(STRICT_FSUB_VL)
21545 NODE_NAME_CASE(STRICT_FMUL_VL)
21546 NODE_NAME_CASE(STRICT_FDIV_VL)
21547 NODE_NAME_CASE(STRICT_FSQRT_VL)
21548 NODE_NAME_CASE(STRICT_VFMADD_VL)
21549 NODE_NAME_CASE(STRICT_VFNMADD_VL)
21550 NODE_NAME_CASE(STRICT_VFMSUB_VL)
21551 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
21552 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
21553 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
21554 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
21555 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
21556 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
21557 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
21558 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
21559 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
21560 NODE_NAME_CASE(STRICT_FSETCC_VL)
21561 NODE_NAME_CASE(STRICT_FSETCCS_VL)
21562 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
21563 NODE_NAME_CASE(VWMUL_VL)
21564 NODE_NAME_CASE(VWMULU_VL)
21565 NODE_NAME_CASE(VWMULSU_VL)
21566 NODE_NAME_CASE(VWADD_VL)
21567 NODE_NAME_CASE(VWADDU_VL)
21568 NODE_NAME_CASE(VWSUB_VL)
21569 NODE_NAME_CASE(VWSUBU_VL)
21570 NODE_NAME_CASE(VWADD_W_VL)
21571 NODE_NAME_CASE(VWADDU_W_VL)
21572 NODE_NAME_CASE(VWSUB_W_VL)
21573 NODE_NAME_CASE(VWSUBU_W_VL)
21574 NODE_NAME_CASE(VWSLL_VL)
21575 NODE_NAME_CASE(VFWMUL_VL)
21576 NODE_NAME_CASE(VFWADD_VL)
21577 NODE_NAME_CASE(VFWSUB_VL)
21578 NODE_NAME_CASE(VFWADD_W_VL)
21579 NODE_NAME_CASE(VFWSUB_W_VL)
21580 NODE_NAME_CASE(VWMACC_VL)
21581 NODE_NAME_CASE(VWMACCU_VL)
21582 NODE_NAME_CASE(VWMACCSU_VL)
21583 NODE_NAME_CASE(SETCC_VL)
21584 NODE_NAME_CASE(VMERGE_VL)
21585 NODE_NAME_CASE(VMAND_VL)
21586 NODE_NAME_CASE(VMOR_VL)
21587 NODE_NAME_CASE(VMXOR_VL)
21588 NODE_NAME_CASE(VMCLR_VL)
21589 NODE_NAME_CASE(VMSET_VL)
21590 NODE_NAME_CASE(VRGATHER_VX_VL)
21591 NODE_NAME_CASE(VRGATHER_VV_VL)
21592 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
21593 NODE_NAME_CASE(VSEXT_VL)
21594 NODE_NAME_CASE(VZEXT_VL)
21595 NODE_NAME_CASE(VCPOP_VL)
21596 NODE_NAME_CASE(VFIRST_VL)
21597 NODE_NAME_CASE(READ_CSR)
21598 NODE_NAME_CASE(WRITE_CSR)
21599 NODE_NAME_CASE(SWAP_CSR)
21600 NODE_NAME_CASE(CZERO_EQZ)
21601 NODE_NAME_CASE(CZERO_NEZ)
21602 NODE_NAME_CASE(SW_GUARDED_BRIND)
21603 NODE_NAME_CASE(SW_GUARDED_CALL)
21604 NODE_NAME_CASE(SW_GUARDED_TAIL)
21605 NODE_NAME_CASE(TUPLE_INSERT)
21606 NODE_NAME_CASE(TUPLE_EXTRACT)
21607 NODE_NAME_CASE(SF_VC_XV_SE)
21608 NODE_NAME_CASE(SF_VC_IV_SE)
21609 NODE_NAME_CASE(SF_VC_VV_SE)
21610 NODE_NAME_CASE(SF_VC_FV_SE)
21611 NODE_NAME_CASE(SF_VC_XVV_SE)
21612 NODE_NAME_CASE(SF_VC_IVV_SE)
21613 NODE_NAME_CASE(SF_VC_VVV_SE)
21614 NODE_NAME_CASE(SF_VC_FVV_SE)
21615 NODE_NAME_CASE(SF_VC_XVW_SE)
21616 NODE_NAME_CASE(SF_VC_IVW_SE)
21617 NODE_NAME_CASE(SF_VC_VVW_SE)
21618 NODE_NAME_CASE(SF_VC_FVW_SE)
21619 NODE_NAME_CASE(SF_VC_V_X_SE)
21620 NODE_NAME_CASE(SF_VC_V_I_SE)
21621 NODE_NAME_CASE(SF_VC_V_XV_SE)
21622 NODE_NAME_CASE(SF_VC_V_IV_SE)
21623 NODE_NAME_CASE(SF_VC_V_VV_SE)
21624 NODE_NAME_CASE(SF_VC_V_FV_SE)
21625 NODE_NAME_CASE(SF_VC_V_XVV_SE)
21626 NODE_NAME_CASE(SF_VC_V_IVV_SE)
21627 NODE_NAME_CASE(SF_VC_V_VVV_SE)
21628 NODE_NAME_CASE(SF_VC_V_FVV_SE)
21629 NODE_NAME_CASE(SF_VC_V_XVW_SE)
21630 NODE_NAME_CASE(SF_VC_V_IVW_SE)
21631 NODE_NAME_CASE(SF_VC_V_VVW_SE)
21632 NODE_NAME_CASE(SF_VC_V_FVW_SE)
21633 NODE_NAME_CASE(PROBED_ALLOCA)
21634 }
21635 // clang-format on
21636 return nullptr;
21637#undef NODE_NAME_CASE
21638}
21639
21640/// getConstraintType - Given a constraint letter, return the type of
21641/// constraint it is for this target.
21644 if (Constraint.size() == 1) {
21645 switch (Constraint[0]) {
21646 default:
21647 break;
21648 case 'f':
21649 case 'R':
21650 return C_RegisterClass;
21651 case 'I':
21652 case 'J':
21653 case 'K':
21654 return C_Immediate;
21655 case 'A':
21656 return C_Memory;
21657 case 's':
21658 case 'S': // A symbolic address
21659 return C_Other;
21660 }
21661 } else {
21662 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
21663 return C_RegisterClass;
21664 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
21665 return C_RegisterClass;
21666 }
21667 return TargetLowering::getConstraintType(Constraint);
21668}
21669
21670std::pair<unsigned, const TargetRegisterClass *>
21672 StringRef Constraint,
21673 MVT VT) const {
21674 // First, see if this is a constraint that directly corresponds to a RISC-V
21675 // register class.
21676 if (Constraint.size() == 1) {
21677 switch (Constraint[0]) {
21678 case 'r':
21679 // TODO: Support fixed vectors up to XLen for P extension?
21680 if (VT.isVector())
21681 break;
21682 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21683 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21684 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21685 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21686 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21687 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21688 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21689 case 'f':
21690 if (VT == MVT::f16) {
21691 if (Subtarget.hasStdExtZfhmin())
21692 return std::make_pair(0U, &RISCV::FPR16RegClass);
21693 if (Subtarget.hasStdExtZhinxmin())
21694 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21695 } else if (VT == MVT::f32) {
21696 if (Subtarget.hasStdExtF())
21697 return std::make_pair(0U, &RISCV::FPR32RegClass);
21698 if (Subtarget.hasStdExtZfinx())
21699 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21700 } else if (VT == MVT::f64) {
21701 if (Subtarget.hasStdExtD())
21702 return std::make_pair(0U, &RISCV::FPR64RegClass);
21703 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21704 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21705 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21706 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21707 }
21708 break;
21709 case 'R':
21710 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21711 default:
21712 break;
21713 }
21714 } else if (Constraint == "vr") {
21715 for (const auto *RC :
21716 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21717 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21718 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21719 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21720 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21721 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21722 &RISCV::VRN2M4RegClass}) {
21723 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21724 return std::make_pair(0U, RC);
21725 }
21726 } else if (Constraint == "vd") {
21727 for (const auto *RC :
21728 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21729 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21730 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21731 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21732 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21733 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21734 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21735 &RISCV::VRN2M4NoV0RegClass}) {
21736 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21737 return std::make_pair(0U, RC);
21738 }
21739 } else if (Constraint == "vm") {
21740 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21741 return std::make_pair(0U, &RISCV::VMV0RegClass);
21742 } else if (Constraint == "cr") {
21743 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21744 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21745 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21746 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21747 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21748 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21749 if (!VT.isVector())
21750 return std::make_pair(0U, &RISCV::GPRCRegClass);
21751 } else if (Constraint == "cR") {
21752 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21753 } else if (Constraint == "cf") {
21754 if (VT == MVT::f16) {
21755 if (Subtarget.hasStdExtZfhmin())
21756 return std::make_pair(0U, &RISCV::FPR16CRegClass);
21757 if (Subtarget.hasStdExtZhinxmin())
21758 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21759 } else if (VT == MVT::f32) {
21760 if (Subtarget.hasStdExtF())
21761 return std::make_pair(0U, &RISCV::FPR32CRegClass);
21762 if (Subtarget.hasStdExtZfinx())
21763 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21764 } else if (VT == MVT::f64) {
21765 if (Subtarget.hasStdExtD())
21766 return std::make_pair(0U, &RISCV::FPR64CRegClass);
21767 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21768 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21769 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21770 return std::make_pair(0U, &RISCV::GPRCRegClass);
21771 }
21772 }
21773
21774 // Clang will correctly decode the usage of register name aliases into their
21775 // official names. However, other frontends like `rustc` do not. This allows
21776 // users of these frontends to use the ABI names for registers in LLVM-style
21777 // register constraints.
21778 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21779 .Case("{zero}", RISCV::X0)
21780 .Case("{ra}", RISCV::X1)
21781 .Case("{sp}", RISCV::X2)
21782 .Case("{gp}", RISCV::X3)
21783 .Case("{tp}", RISCV::X4)
21784 .Case("{t0}", RISCV::X5)
21785 .Case("{t1}", RISCV::X6)
21786 .Case("{t2}", RISCV::X7)
21787 .Cases("{s0}", "{fp}", RISCV::X8)
21788 .Case("{s1}", RISCV::X9)
21789 .Case("{a0}", RISCV::X10)
21790 .Case("{a1}", RISCV::X11)
21791 .Case("{a2}", RISCV::X12)
21792 .Case("{a3}", RISCV::X13)
21793 .Case("{a4}", RISCV::X14)
21794 .Case("{a5}", RISCV::X15)
21795 .Case("{a6}", RISCV::X16)
21796 .Case("{a7}", RISCV::X17)
21797 .Case("{s2}", RISCV::X18)
21798 .Case("{s3}", RISCV::X19)
21799 .Case("{s4}", RISCV::X20)
21800 .Case("{s5}", RISCV::X21)
21801 .Case("{s6}", RISCV::X22)
21802 .Case("{s7}", RISCV::X23)
21803 .Case("{s8}", RISCV::X24)
21804 .Case("{s9}", RISCV::X25)
21805 .Case("{s10}", RISCV::X26)
21806 .Case("{s11}", RISCV::X27)
21807 .Case("{t3}", RISCV::X28)
21808 .Case("{t4}", RISCV::X29)
21809 .Case("{t5}", RISCV::X30)
21810 .Case("{t6}", RISCV::X31)
21811 .Default(RISCV::NoRegister);
21812 if (XRegFromAlias != RISCV::NoRegister)
21813 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21814
21815 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21816 // TableGen record rather than the AsmName to choose registers for InlineAsm
21817 // constraints, plus we want to match those names to the widest floating point
21818 // register type available, manually select floating point registers here.
21819 //
21820 // The second case is the ABI name of the register, so that frontends can also
21821 // use the ABI names in register constraint lists.
21822 if (Subtarget.hasStdExtF()) {
21823 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21824 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21825 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21826 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21827 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21828 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21829 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21830 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21831 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21832 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21833 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21834 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21835 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21836 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21837 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21838 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21839 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21840 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21841 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21842 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21843 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21844 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21845 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21846 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21847 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21848 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21849 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21850 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21851 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21852 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21853 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21854 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21855 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21856 .Default(RISCV::NoRegister);
21857 if (FReg != RISCV::NoRegister) {
21858 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21859 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21860 unsigned RegNo = FReg - RISCV::F0_F;
21861 unsigned DReg = RISCV::F0_D + RegNo;
21862 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21863 }
21864 if (VT == MVT::f32 || VT == MVT::Other)
21865 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21866 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21867 unsigned RegNo = FReg - RISCV::F0_F;
21868 unsigned HReg = RISCV::F0_H + RegNo;
21869 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21870 }
21871 }
21872 }
21873
21874 if (Subtarget.hasVInstructions()) {
21875 Register VReg = StringSwitch<Register>(Constraint.lower())
21876 .Case("{v0}", RISCV::V0)
21877 .Case("{v1}", RISCV::V1)
21878 .Case("{v2}", RISCV::V2)
21879 .Case("{v3}", RISCV::V3)
21880 .Case("{v4}", RISCV::V4)
21881 .Case("{v5}", RISCV::V5)
21882 .Case("{v6}", RISCV::V6)
21883 .Case("{v7}", RISCV::V7)
21884 .Case("{v8}", RISCV::V8)
21885 .Case("{v9}", RISCV::V9)
21886 .Case("{v10}", RISCV::V10)
21887 .Case("{v11}", RISCV::V11)
21888 .Case("{v12}", RISCV::V12)
21889 .Case("{v13}", RISCV::V13)
21890 .Case("{v14}", RISCV::V14)
21891 .Case("{v15}", RISCV::V15)
21892 .Case("{v16}", RISCV::V16)
21893 .Case("{v17}", RISCV::V17)
21894 .Case("{v18}", RISCV::V18)
21895 .Case("{v19}", RISCV::V19)
21896 .Case("{v20}", RISCV::V20)
21897 .Case("{v21}", RISCV::V21)
21898 .Case("{v22}", RISCV::V22)
21899 .Case("{v23}", RISCV::V23)
21900 .Case("{v24}", RISCV::V24)
21901 .Case("{v25}", RISCV::V25)
21902 .Case("{v26}", RISCV::V26)
21903 .Case("{v27}", RISCV::V27)
21904 .Case("{v28}", RISCV::V28)
21905 .Case("{v29}", RISCV::V29)
21906 .Case("{v30}", RISCV::V30)
21907 .Case("{v31}", RISCV::V31)
21908 .Default(RISCV::NoRegister);
21909 if (VReg != RISCV::NoRegister) {
21910 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21911 return std::make_pair(VReg, &RISCV::VMRegClass);
21912 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21913 return std::make_pair(VReg, &RISCV::VRRegClass);
21914 for (const auto *RC :
21915 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21916 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21917 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21918 return std::make_pair(VReg, RC);
21919 }
21920 }
21921 }
21922 }
21923
21924 std::pair<Register, const TargetRegisterClass *> Res =
21926
21927 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21928 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21929 // Subtarget into account.
21930 if (Res.second == &RISCV::GPRF16RegClass ||
21931 Res.second == &RISCV::GPRF32RegClass ||
21932 Res.second == &RISCV::GPRPairRegClass)
21933 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21934
21935 return Res;
21936}
21937
21940 // Currently only support length 1 constraints.
21941 if (ConstraintCode.size() == 1) {
21942 switch (ConstraintCode[0]) {
21943 case 'A':
21945 default:
21946 break;
21947 }
21948 }
21949
21950 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21951}
21952
21954 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21955 SelectionDAG &DAG) const {
21956 // Currently only support length 1 constraints.
21957 if (Constraint.size() == 1) {
21958 switch (Constraint[0]) {
21959 case 'I':
21960 // Validate & create a 12-bit signed immediate operand.
21961 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21962 uint64_t CVal = C->getSExtValue();
21963 if (isInt<12>(CVal))
21964 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21965 Subtarget.getXLenVT()));
21966 }
21967 return;
21968 case 'J':
21969 // Validate & create an integer zero operand.
21970 if (isNullConstant(Op))
21971 Ops.push_back(
21972 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21973 return;
21974 case 'K':
21975 // Validate & create a 5-bit unsigned immediate operand.
21976 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21977 uint64_t CVal = C->getZExtValue();
21978 if (isUInt<5>(CVal))
21979 Ops.push_back(
21980 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21981 }
21982 return;
21983 case 'S':
21985 return;
21986 default:
21987 break;
21988 }
21989 }
21990 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21991}
21992
21994 Instruction *Inst,
21995 AtomicOrdering Ord) const {
21996 if (Subtarget.hasStdExtZtso()) {
21997 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21998 return Builder.CreateFence(Ord);
21999 return nullptr;
22000 }
22001
22002 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
22003 return Builder.CreateFence(Ord);
22004 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
22005 return Builder.CreateFence(AtomicOrdering::Release);
22006 return nullptr;
22007}
22008
22010 Instruction *Inst,
22011 AtomicOrdering Ord) const {
22012 if (Subtarget.hasStdExtZtso()) {
22013 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
22014 return Builder.CreateFence(Ord);
22015 return nullptr;
22016 }
22017
22018 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
22019 return Builder.CreateFence(AtomicOrdering::Acquire);
22020 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
22023 return nullptr;
22024}
22025
22028 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
22029 // point operations can't be used in an lr/sc sequence without breaking the
22030 // forward-progress guarantee.
22031 if (AI->isFloatingPointOperation() ||
22037
22038 // Don't expand forced atomics, we want to have __sync libcalls instead.
22039 if (Subtarget.hasForcedAtomics())
22041
22042 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
22043 if (AI->getOperation() == AtomicRMWInst::Nand) {
22044 if (Subtarget.hasStdExtZacas() &&
22045 (Size >= 32 || Subtarget.hasStdExtZabha()))
22047 if (Size < 32)
22049 }
22050
22051 if (Size < 32 && !Subtarget.hasStdExtZabha())
22053
22055}
22056
22057static Intrinsic::ID
22059 if (XLen == 32) {
22060 switch (BinOp) {
22061 default:
22062 llvm_unreachable("Unexpected AtomicRMW BinOp");
22064 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
22065 case AtomicRMWInst::Add:
22066 return Intrinsic::riscv_masked_atomicrmw_add_i32;
22067 case AtomicRMWInst::Sub:
22068 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
22070 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
22071 case AtomicRMWInst::Max:
22072 return Intrinsic::riscv_masked_atomicrmw_max_i32;
22073 case AtomicRMWInst::Min:
22074 return Intrinsic::riscv_masked_atomicrmw_min_i32;
22076 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
22078 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
22079 }
22080 }
22081
22082 if (XLen == 64) {
22083 switch (BinOp) {
22084 default:
22085 llvm_unreachable("Unexpected AtomicRMW BinOp");
22087 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
22088 case AtomicRMWInst::Add:
22089 return Intrinsic::riscv_masked_atomicrmw_add_i64;
22090 case AtomicRMWInst::Sub:
22091 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
22093 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
22094 case AtomicRMWInst::Max:
22095 return Intrinsic::riscv_masked_atomicrmw_max_i64;
22096 case AtomicRMWInst::Min:
22097 return Intrinsic::riscv_masked_atomicrmw_min_i64;
22099 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
22101 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
22102 }
22103 }
22104
22105 llvm_unreachable("Unexpected XLen\n");
22106}
22107
22109 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
22110 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
22111 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
22112 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
22113 // mask, as this produces better code than the LR/SC loop emitted by
22114 // int_riscv_masked_atomicrmw_xchg.
22115 if (AI->getOperation() == AtomicRMWInst::Xchg &&
22116 isa<ConstantInt>(AI->getValOperand())) {
22117 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
22118 if (CVal->isZero())
22119 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
22120 Builder.CreateNot(Mask, "Inv_Mask"),
22121 AI->getAlign(), Ord);
22122 if (CVal->isMinusOne())
22123 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
22124 AI->getAlign(), Ord);
22125 }
22126
22127 unsigned XLen = Subtarget.getXLen();
22128 Value *Ordering =
22129 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
22130 Type *Tys[] = {AlignedAddr->getType()};
22132 AI->getModule(),
22134
22135 if (XLen == 64) {
22136 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
22137 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
22138 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
22139 }
22140
22141 Value *Result;
22142
22143 // Must pass the shift amount needed to sign extend the loaded value prior
22144 // to performing a signed comparison for min/max. ShiftAmt is the number of
22145 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
22146 // is the number of bits to left+right shift the value in order to
22147 // sign-extend.
22148 if (AI->getOperation() == AtomicRMWInst::Min ||
22150 const DataLayout &DL = AI->getDataLayout();
22151 unsigned ValWidth =
22152 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
22153 Value *SextShamt =
22154 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
22155 Result = Builder.CreateCall(LrwOpScwLoop,
22156 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
22157 } else {
22158 Result =
22159 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
22160 }
22161
22162 if (XLen == 64)
22163 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
22164 return Result;
22165}
22166
22169 AtomicCmpXchgInst *CI) const {
22170 // Don't expand forced atomics, we want to have __sync libcalls instead.
22171 if (Subtarget.hasForcedAtomics())
22173
22175 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
22176 (Size == 8 || Size == 16))
22179}
22180
22182 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
22183 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
22184 unsigned XLen = Subtarget.getXLen();
22185 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
22186 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
22187 if (XLen == 64) {
22188 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
22189 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
22190 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
22191 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
22192 }
22193 Type *Tys[] = {AlignedAddr->getType()};
22194 Value *Result = Builder.CreateIntrinsic(
22195 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
22196 if (XLen == 64)
22197 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
22198 return Result;
22199}
22200
22202 EVT DataVT) const {
22203 // We have indexed loads for all supported EEW types. Indices are always
22204 // zero extended.
22205 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
22206 isTypeLegal(Extend.getValueType()) &&
22207 isTypeLegal(Extend.getOperand(0).getValueType()) &&
22208 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
22209}
22210
22212 EVT VT) const {
22213 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
22214 return false;
22215
22216 switch (FPVT.getSimpleVT().SimpleTy) {
22217 case MVT::f16:
22218 return Subtarget.hasStdExtZfhmin();
22219 case MVT::f32:
22220 return Subtarget.hasStdExtF();
22221 case MVT::f64:
22222 return Subtarget.hasStdExtD();
22223 default:
22224 return false;
22225 }
22226}
22227
22229 // If we are using the small code model, we can reduce size of jump table
22230 // entry to 4 bytes.
22231 if (Subtarget.is64Bit() && !isPositionIndependent() &&
22234 }
22236}
22237
22239 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
22240 unsigned uid, MCContext &Ctx) const {
22241 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
22243 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
22244}
22245
22247 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
22248 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
22249 // a power of two as well.
22250 // FIXME: This doesn't work for zve32, but that's already broken
22251 // elsewhere for the same reason.
22252 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
22253 static_assert(RISCV::RVVBitsPerBlock == 64,
22254 "RVVBitsPerBlock changed, audit needed");
22255 return true;
22256}
22257
22259 SDValue &Offset,
22261 SelectionDAG &DAG) const {
22262 // Target does not support indexed loads.
22263 if (!Subtarget.hasVendorXTHeadMemIdx())
22264 return false;
22265
22266 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
22267 return false;
22268
22269 Base = Op->getOperand(0);
22270 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
22271 int64_t RHSC = RHS->getSExtValue();
22272 if (Op->getOpcode() == ISD::SUB)
22273 RHSC = -(uint64_t)RHSC;
22274
22275 // The constants that can be encoded in the THeadMemIdx instructions
22276 // are of the form (sign_extend(imm5) << imm2).
22277 bool isLegalIndexedOffset = false;
22278 for (unsigned i = 0; i < 4; i++)
22279 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
22280 isLegalIndexedOffset = true;
22281 break;
22282 }
22283
22284 if (!isLegalIndexedOffset)
22285 return false;
22286
22287 Offset = Op->getOperand(1);
22288 return true;
22289 }
22290
22291 return false;
22292}
22293
22295 SDValue &Offset,
22297 SelectionDAG &DAG) const {
22298 EVT VT;
22299 SDValue Ptr;
22300 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
22301 VT = LD->getMemoryVT();
22302 Ptr = LD->getBasePtr();
22303 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
22304 VT = ST->getMemoryVT();
22305 Ptr = ST->getBasePtr();
22306 } else
22307 return false;
22308
22309 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
22310 return false;
22311
22312 AM = ISD::PRE_INC;
22313 return true;
22314}
22315
22317 SDValue &Base,
22318 SDValue &Offset,
22320 SelectionDAG &DAG) const {
22321 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
22322 if (Op->getOpcode() != ISD::ADD)
22323 return false;
22324
22325 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
22326 Base = LS->getBasePtr();
22327 else
22328 return false;
22329
22330 if (Base == Op->getOperand(0))
22331 Offset = Op->getOperand(1);
22332 else if (Base == Op->getOperand(1))
22333 Offset = Op->getOperand(0);
22334 else
22335 return false;
22336
22337 AM = ISD::POST_INC;
22338 return true;
22339 }
22340
22341 EVT VT;
22342 SDValue Ptr;
22343 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
22344 VT = LD->getMemoryVT();
22345 Ptr = LD->getBasePtr();
22346 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
22347 VT = ST->getMemoryVT();
22348 Ptr = ST->getBasePtr();
22349 } else
22350 return false;
22351
22352 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
22353 return false;
22354 // Post-indexing updates the base, so it's not a valid transform
22355 // if that's not the same as the load's pointer.
22356 if (Ptr != Base)
22357 return false;
22358
22359 AM = ISD::POST_INC;
22360 return true;
22361}
22362
22364 EVT VT) const {
22365 EVT SVT = VT.getScalarType();
22366
22367 if (!SVT.isSimple())
22368 return false;
22369
22370 switch (SVT.getSimpleVT().SimpleTy) {
22371 case MVT::f16:
22372 return VT.isVector() ? Subtarget.hasVInstructionsF16()
22373 : Subtarget.hasStdExtZfhOrZhinx();
22374 case MVT::f32:
22375 return Subtarget.hasStdExtFOrZfinx();
22376 case MVT::f64:
22377 return Subtarget.hasStdExtDOrZdinx();
22378 default:
22379 break;
22380 }
22381
22382 return false;
22383}
22384
22386 // Zacas will use amocas.w which does not require extension.
22387 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
22388}
22389
22391 const Constant *PersonalityFn) const {
22392 return RISCV::X10;
22393}
22394
22396 const Constant *PersonalityFn) const {
22397 return RISCV::X11;
22398}
22399
22401 // Return false to suppress the unnecessary extensions if the LibCall
22402 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
22403 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
22404 Type.getSizeInBits() < Subtarget.getXLen()))
22405 return false;
22406
22407 return true;
22408}
22409
22411 bool IsSigned) const {
22412 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
22413 return true;
22414
22415 return IsSigned;
22416}
22417
22419 SDValue C) const {
22420 // Check integral scalar types.
22421 if (!VT.isScalarInteger())
22422 return false;
22423
22424 // Omit the optimization if the sub target has the M extension and the data
22425 // size exceeds XLen.
22426 const bool HasZmmul = Subtarget.hasStdExtZmmul();
22427 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
22428 return false;
22429
22430 auto *ConstNode = cast<ConstantSDNode>(C);
22431 const APInt &Imm = ConstNode->getAPIntValue();
22432
22433 // Break the MUL to a SLLI and an ADD/SUB.
22434 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
22435 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
22436 return true;
22437
22438 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
22439 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
22440 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
22441 (Imm - 8).isPowerOf2()))
22442 return true;
22443
22444 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
22445 // a pair of LUI/ADDI.
22446 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
22447 ConstNode->hasOneUse()) {
22448 APInt ImmS = Imm.ashr(Imm.countr_zero());
22449 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
22450 (1 - ImmS).isPowerOf2())
22451 return true;
22452 }
22453
22454 return false;
22455}
22456
22458 SDValue ConstNode) const {
22459 // Let the DAGCombiner decide for vectors.
22460 EVT VT = AddNode.getValueType();
22461 if (VT.isVector())
22462 return true;
22463
22464 // Let the DAGCombiner decide for larger types.
22465 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
22466 return true;
22467
22468 // It is worse if c1 is simm12 while c1*c2 is not.
22469 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
22470 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
22471 const APInt &C1 = C1Node->getAPIntValue();
22472 const APInt &C2 = C2Node->getAPIntValue();
22473 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
22474 return false;
22475
22476 // Default to true and let the DAGCombiner decide.
22477 return true;
22478}
22479
22481 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
22482 unsigned *Fast) const {
22483 if (!VT.isVector()) {
22484 if (Fast)
22485 *Fast = Subtarget.enableUnalignedScalarMem();
22486 return Subtarget.enableUnalignedScalarMem();
22487 }
22488
22489 // All vector implementations must support element alignment
22490 EVT ElemVT = VT.getVectorElementType();
22491 if (Alignment >= ElemVT.getStoreSize()) {
22492 if (Fast)
22493 *Fast = 1;
22494 return true;
22495 }
22496
22497 // Note: We lower an unmasked unaligned vector access to an equally sized
22498 // e8 element type access. Given this, we effectively support all unmasked
22499 // misaligned accesses. TODO: Work through the codegen implications of
22500 // allowing such accesses to be formed, and considered fast.
22501 if (Fast)
22502 *Fast = Subtarget.enableUnalignedVectorMem();
22503 return Subtarget.enableUnalignedVectorMem();
22504}
22505
22506
22508 const AttributeList &FuncAttributes) const {
22509 if (!Subtarget.hasVInstructions())
22510 return MVT::Other;
22511
22512 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
22513 return MVT::Other;
22514
22515 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
22516 // has an expansion threshold, and we want the number of hardware memory
22517 // operations to correspond roughly to that threshold. LMUL>1 operations
22518 // are typically expanded linearly internally, and thus correspond to more
22519 // than one actual memory operation. Note that store merging and load
22520 // combining will typically form larger LMUL operations from the LMUL1
22521 // operations emitted here, and that's okay because combining isn't
22522 // introducing new memory operations; it's just merging existing ones.
22523 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
22524 if (Op.size() < MinVLenInBytes)
22525 // TODO: Figure out short memops. For the moment, do the default thing
22526 // which ends up using scalar sequences.
22527 return MVT::Other;
22528
22529 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
22530 // fixed vectors.
22531 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
22532 return MVT::Other;
22533
22534 // Prefer i8 for non-zero memset as it allows us to avoid materializing
22535 // a large scalar constant and instead use vmv.v.x/i to do the
22536 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
22537 // maximize the chance we can encode the size in the vsetvli.
22538 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
22539 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
22540
22541 // Do we have sufficient alignment for our preferred VT? If not, revert
22542 // to largest size allowed by our alignment criteria.
22543 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
22544 Align RequiredAlign(PreferredVT.getStoreSize());
22545 if (Op.isFixedDstAlign())
22546 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
22547 if (Op.isMemcpy())
22548 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
22549 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
22550 }
22551 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
22552}
22553
22555 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
22556 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
22557 bool IsABIRegCopy = CC.has_value();
22558 EVT ValueVT = Val.getValueType();
22559
22560 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22561 if ((ValueVT == PairVT ||
22562 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22563 ValueVT == MVT::f64)) &&
22564 NumParts == 1 && PartVT == MVT::Untyped) {
22565 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22566 MVT XLenVT = Subtarget.getXLenVT();
22567 if (ValueVT == MVT::f64)
22568 Val = DAG.getBitcast(MVT::i64, Val);
22569 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
22570 // Always creating an MVT::Untyped part, so always use
22571 // RISCVISD::BuildGPRPair.
22572 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
22573 return true;
22574 }
22575
22576 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22577 PartVT == MVT::f32) {
22578 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
22579 // nan, and cast to f32.
22580 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
22581 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
22582 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
22583 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
22584 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22585 Parts[0] = Val;
22586 return true;
22587 }
22588
22589 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
22590#ifndef NDEBUG
22591 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
22592 [[maybe_unused]] unsigned ValLMUL =
22594 ValNF * RISCV::RVVBitsPerBlock);
22595 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
22596 [[maybe_unused]] unsigned PartLMUL =
22598 PartNF * RISCV::RVVBitsPerBlock);
22599 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22600 "RISC-V vector tuple type only accepts same register class type "
22601 "TUPLE_INSERT");
22602#endif
22603
22604 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
22605 Val, DAG.getVectorIdxConstant(0, DL));
22606 Parts[0] = Val;
22607 return true;
22608 }
22609
22610 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22611 LLVMContext &Context = *DAG.getContext();
22612 EVT ValueEltVT = ValueVT.getVectorElementType();
22613 EVT PartEltVT = PartVT.getVectorElementType();
22614 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22615 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22616 if (PartVTBitSize % ValueVTBitSize == 0) {
22617 assert(PartVTBitSize >= ValueVTBitSize);
22618 // If the element types are different, bitcast to the same element type of
22619 // PartVT first.
22620 // Give an example here, we want copy a <vscale x 1 x i8> value to
22621 // <vscale x 4 x i16>.
22622 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
22623 // subvector, then we can bitcast to <vscale x 4 x i16>.
22624 if (ValueEltVT != PartEltVT) {
22625 if (PartVTBitSize > ValueVTBitSize) {
22626 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22627 assert(Count != 0 && "The number of element should not be zero.");
22628 EVT SameEltTypeVT =
22629 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22630 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
22631 DAG.getUNDEF(SameEltTypeVT), Val,
22632 DAG.getVectorIdxConstant(0, DL));
22633 }
22634 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22635 } else {
22636 Val =
22637 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
22638 Val, DAG.getVectorIdxConstant(0, DL));
22639 }
22640 Parts[0] = Val;
22641 return true;
22642 }
22643 }
22644
22645 return false;
22646}
22647
22649 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
22650 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
22651 bool IsABIRegCopy = CC.has_value();
22652
22653 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22654 if ((ValueVT == PairVT ||
22655 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22656 ValueVT == MVT::f64)) &&
22657 NumParts == 1 && PartVT == MVT::Untyped) {
22658 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22659 MVT XLenVT = Subtarget.getXLenVT();
22660
22661 SDValue Val = Parts[0];
22662 // Always starting with an MVT::Untyped part, so always use
22663 // RISCVISD::SplitGPRPair
22664 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
22665 Val);
22666 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
22667 Val.getValue(1));
22668 if (ValueVT == MVT::f64)
22669 Val = DAG.getBitcast(ValueVT, Val);
22670 return Val;
22671 }
22672
22673 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22674 PartVT == MVT::f32) {
22675 SDValue Val = Parts[0];
22676
22677 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
22678 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
22679 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
22680 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
22681 return Val;
22682 }
22683
22684 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22685 LLVMContext &Context = *DAG.getContext();
22686 SDValue Val = Parts[0];
22687 EVT ValueEltVT = ValueVT.getVectorElementType();
22688 EVT PartEltVT = PartVT.getVectorElementType();
22689 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22690 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22691 if (PartVTBitSize % ValueVTBitSize == 0) {
22692 assert(PartVTBitSize >= ValueVTBitSize);
22693 EVT SameEltTypeVT = ValueVT;
22694 // If the element types are different, convert it to the same element type
22695 // of PartVT.
22696 // Give an example here, we want copy a <vscale x 1 x i8> value from
22697 // <vscale x 4 x i16>.
22698 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22699 // then we can extract <vscale x 1 x i8>.
22700 if (ValueEltVT != PartEltVT) {
22701 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22702 assert(Count != 0 && "The number of element should not be zero.");
22703 SameEltTypeVT =
22704 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22705 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
22706 }
22707 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
22708 DAG.getVectorIdxConstant(0, DL));
22709 return Val;
22710 }
22711 }
22712 return SDValue();
22713}
22714
22716 // When aggressively optimizing for code size, we prefer to use a div
22717 // instruction, as it is usually smaller than the alternative sequence.
22718 // TODO: Add vector division?
22719 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22720 return OptSize && !VT.isVector();
22721}
22722
22724 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
22725 // some situation.
22726 unsigned Opc = N->getOpcode();
22727 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
22728 return false;
22729 return true;
22730}
22731
22732static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
22733 Module *M = IRB.GetInsertBlock()->getModule();
22734 Function *ThreadPointerFunc =
22735 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22736 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22737 IRB.CreateCall(ThreadPointerFunc), Offset);
22738}
22739
22741 // Fuchsia provides a fixed TLS slot for the stack cookie.
22742 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22743 if (Subtarget.isTargetFuchsia())
22744 return useTpOffset(IRB, -0x10);
22745
22746 // Android provides a fixed TLS slot for the stack cookie. See the definition
22747 // of TLS_SLOT_STACK_GUARD in
22748 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22749 if (Subtarget.isTargetAndroid())
22750 return useTpOffset(IRB, -0x18);
22751
22752 Module *M = IRB.GetInsertBlock()->getModule();
22753
22754 if (M->getStackProtectorGuard() == "tls") {
22755 // Users must specify the offset explicitly
22756 int Offset = M->getStackProtectorGuardOffset();
22757 return useTpOffset(IRB, Offset);
22758 }
22759
22761}
22762
22764 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22765 const DataLayout &DL) const {
22766 EVT VT = getValueType(DL, VTy);
22767 // Don't lower vlseg/vsseg for vector types that can't be split.
22768 if (!isTypeLegal(VT))
22769 return false;
22770
22772 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22773 Alignment))
22774 return false;
22775
22776 MVT ContainerVT = VT.getSimpleVT();
22777
22778 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22779 if (!Subtarget.useRVVForFixedLengthVectors())
22780 return false;
22781 // Sometimes the interleaved access pass picks up splats as interleaves of
22782 // one element. Don't lower these.
22783 if (FVTy->getNumElements() < 2)
22784 return false;
22785
22787 } else {
22788 // The intrinsics for scalable vectors are not overloaded on pointer type
22789 // and can only handle the default address space.
22790 if (AddrSpace)
22791 return false;
22792 }
22793
22794 // Need to make sure that EMUL * NFIELDS ≤ 8
22795 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22796 if (Fractional)
22797 return true;
22798 return Factor * LMUL <= 8;
22799}
22800
22802 Align Alignment) const {
22803 if (!Subtarget.hasVInstructions())
22804 return false;
22805
22806 // Only support fixed vectors if we know the minimum vector size.
22807 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22808 return false;
22809
22810 EVT ScalarType = DataType.getScalarType();
22811 if (!isLegalElementTypeForRVV(ScalarType))
22812 return false;
22813
22814 if (!Subtarget.enableUnalignedVectorMem() &&
22815 Alignment < ScalarType.getStoreSize())
22816 return false;
22817
22818 return true;
22819}
22820
22822 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22823 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22824 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22825 Intrinsic::riscv_seg8_load};
22826
22827/// Lower an interleaved load into a vlsegN intrinsic.
22828///
22829/// E.g. Lower an interleaved load (Factor = 2):
22830/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22831/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22832/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22833///
22834/// Into:
22835/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22836/// %ptr, i64 4)
22837/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22838/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22841 ArrayRef<unsigned> Indices, unsigned Factor) const {
22842 assert(Indices.size() == Shuffles.size());
22843
22844 IRBuilder<> Builder(LI);
22845
22846 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22847 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22849 LI->getDataLayout()))
22850 return false;
22851
22852 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22853
22854 // If the segment load is going to be performed segment at a time anyways
22855 // and there's only one element used, use a strided load instead. This
22856 // will be equally fast, and create less vector register pressure.
22857 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22858 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22859 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22860 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22861 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22862 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22863 Value *VL = Builder.getInt32(VTy->getNumElements());
22864
22865 CallInst *CI =
22866 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22867 {VTy, BasePtr->getType(), Stride->getType()},
22868 {BasePtr, Stride, Mask, VL});
22869 CI->addParamAttr(
22871 Shuffles[0]->replaceAllUsesWith(CI);
22872 return true;
22873 };
22874
22875 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22876
22877 CallInst *VlsegN = Builder.CreateIntrinsic(
22878 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22879 {LI->getPointerOperand(), VL});
22880
22881 for (unsigned i = 0; i < Shuffles.size(); i++) {
22882 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22883 Shuffles[i]->replaceAllUsesWith(SubVec);
22884 }
22885
22886 return true;
22887}
22888
22890 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22891 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22892 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22893 Intrinsic::riscv_seg8_store};
22894
22895/// Lower an interleaved store into a vssegN intrinsic.
22896///
22897/// E.g. Lower an interleaved store (Factor = 3):
22898/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22899/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22900/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22901///
22902/// Into:
22903/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22904/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22905/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22906/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22907/// %ptr, i32 4)
22908///
22909/// Note that the new shufflevectors will be removed and we'll only generate one
22910/// vsseg3 instruction in CodeGen.
22912 ShuffleVectorInst *SVI,
22913 unsigned Factor) const {
22914 IRBuilder<> Builder(SI);
22915 auto Mask = SVI->getShuffleMask();
22916 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22917 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22918 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22919 ShuffleVTy->getNumElements() / Factor);
22920 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22921 SI->getPointerAddressSpace(),
22922 SI->getDataLayout()))
22923 return false;
22924
22925 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22926
22927 unsigned Index;
22928 // If the segment store only has one active lane (i.e. the interleave is
22929 // just a spread shuffle), we can use a strided store instead. This will
22930 // be equally fast, and create less vector register pressure.
22931 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22932 isSpreadMask(Mask, Factor, Index)) {
22933 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22934 Value *Data = SVI->getOperand(0);
22935 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22936 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22937 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22938 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22939 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22940 Value *VL = Builder.getInt32(VTy->getNumElements());
22941
22942 CallInst *CI = Builder.CreateIntrinsic(
22943 Intrinsic::experimental_vp_strided_store,
22944 {Data->getType(), BasePtr->getType(), Stride->getType()},
22945 {Data, BasePtr, Stride, Mask, VL});
22946 CI->addParamAttr(
22947 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22948
22949 return true;
22950 }
22951
22953 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22954 {VTy, SI->getPointerOperandType(), XLenTy});
22955
22957
22958 for (unsigned i = 0; i < Factor; i++) {
22959 Value *Shuffle = Builder.CreateShuffleVector(
22960 SVI->getOperand(0), SVI->getOperand(1),
22961 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22962 Ops.push_back(Shuffle);
22963 }
22964 // This VL should be OK (should be executable in one vsseg instruction,
22965 // potentially under larger LMULs) because we checked that the fixed vector
22966 // type fits in isLegalInterleavedAccessType
22967 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22968 Ops.append({SI->getPointerOperand(), VL});
22969
22970 Builder.CreateCall(VssegNFunc, Ops);
22971
22972 return true;
22973}
22974
22976 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
22977 unsigned Factor = DeinterleaveValues.size();
22978 if (Factor > 8)
22979 return false;
22980
22981 assert(LI->isSimple());
22982 IRBuilder<> Builder(LI);
22983
22984 auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->getType());
22985
22986 const DataLayout &DL = LI->getDataLayout();
22987
22988 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22989 LI->getPointerAddressSpace(), DL))
22990 return false;
22991
22992 Value *Return;
22993 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22994
22995 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22996 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22997 Return =
22998 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22999 {ResVTy, LI->getPointerOperandType(), XLenTy},
23000 {LI->getPointerOperand(), VL});
23001 } else {
23002 static const Intrinsic::ID IntrIds[] = {
23003 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
23004 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
23005 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
23006 Intrinsic::riscv_vlseg8};
23007
23008 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
23009 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
23010 Type *VecTupTy = TargetExtType::get(
23011 LI->getContext(), "riscv.vector.tuple",
23013 NumElts * SEW / 8),
23014 Factor);
23015
23016 Value *VL = Constant::getAllOnesValue(XLenTy);
23017
23018 Value *Vlseg = Builder.CreateIntrinsic(
23019 IntrIds[Factor - 2], {VecTupTy, XLenTy},
23020 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
23021 ConstantInt::get(XLenTy, Log2_64(SEW))});
23022
23023 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
23024 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
23025 for (unsigned i = 0; i < Factor; ++i) {
23026 Value *VecExtract = Builder.CreateIntrinsic(
23027 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
23028 {Vlseg, Builder.getInt32(i)});
23029 Return = Builder.CreateInsertValue(Return, VecExtract, i);
23030 }
23031 }
23032
23033 for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
23034 // We have to create a brand new ExtractValue to replace each
23035 // of these old ExtractValue instructions.
23036 Value *NewEV =
23037 Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
23038 DIV->replaceAllUsesWith(NewEV);
23039 }
23040
23041 return true;
23042}
23043
23045 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
23046 unsigned Factor = InterleaveValues.size();
23047 if (Factor > 8)
23048 return false;
23049
23050 assert(SI->isSimple());
23051 IRBuilder<> Builder(SI);
23052
23053 auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
23054 const DataLayout &DL = SI->getDataLayout();
23055
23056 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
23057 SI->getPointerAddressSpace(), DL))
23058 return false;
23059
23060 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
23061
23062 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
23064 SI->getModule(), FixedVssegIntrIds[Factor - 2],
23065 {InVTy, SI->getPointerOperandType(), XLenTy});
23066
23067 SmallVector<Value *, 10> Ops(InterleaveValues.begin(),
23068 InterleaveValues.end());
23069 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
23070 Ops.append({SI->getPointerOperand(), VL});
23071
23072 Builder.CreateCall(VssegNFunc, Ops);
23073 } else {
23074 static const Intrinsic::ID IntrIds[] = {
23075 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
23076 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
23077 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
23078 Intrinsic::riscv_vsseg8};
23079
23080 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
23081 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
23082 Type *VecTupTy = TargetExtType::get(
23083 SI->getContext(), "riscv.vector.tuple",
23084 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
23085 NumElts * SEW / 8),
23086 Factor);
23087
23089 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
23090
23091 Value *VL = Constant::getAllOnesValue(XLenTy);
23092
23093 Value *StoredVal = PoisonValue::get(VecTupTy);
23094 for (unsigned i = 0; i < Factor; ++i)
23095 StoredVal = Builder.CreateIntrinsic(
23096 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
23097 {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
23098
23099 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
23100 ConstantInt::get(XLenTy, Log2_64(SEW))});
23101 }
23102
23103 return true;
23104}
23105
23106static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
23107 assert(N);
23108 if (N == 1)
23109 return true;
23110
23111 if (isPowerOf2_32(N)) {
23113 return KB.countMinTrailingZeros() >= Log2_32(N);
23114 }
23115
23116 using namespace PatternMatch;
23117 // Right now we're only recognizing the simplest pattern.
23118 uint64_t C;
23119 return match(V, m_c_Mul(m_Value(), m_ConstantInt(C))) && C && C % N == 0;
23120}
23121
23122/// Lower an interleaved vp.load into a vlsegN intrinsic.
23123///
23124/// E.g. Lower an interleaved vp.load (Factor = 2):
23125/// %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
23126/// %mask,
23127/// i32 %wide.rvl)
23128/// %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
23129/// @llvm.vector.deinterleave2.nxv64i8(
23130/// <vscale x 64 x i8> %l)
23131/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
23132/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
23133///
23134/// Into:
23135/// %rvl = udiv %wide.rvl, 2
23136/// %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
23137/// @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
23138/// <vscale x 32 x i8> undef,
23139/// ptr %ptr,
23140/// %mask,
23141/// i64 %rvl,
23142/// i64 1)
23143/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
23144/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
23145///
23146/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
23147/// removed by the caller
23148/// TODO: We probably can loosen the dependency on matching extractvalue when
23149/// dealing with factor of 2 (extractvalue is still required for most of other
23150/// factors though).
23152 VPIntrinsic *Load, Value *Mask,
23153 ArrayRef<Value *> DeinterleaveResults) const {
23154 assert(Mask && "Expect a valid mask");
23155 assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
23156 "Unexpected intrinsic");
23157
23158 const unsigned Factor = DeinterleaveResults.size();
23159
23160 auto *WideVTy = dyn_cast<ScalableVectorType>(Load->getType());
23161 // TODO: Support fixed vectors.
23162 if (!WideVTy)
23163 return false;
23164
23165 unsigned WideNumElements = WideVTy->getElementCount().getKnownMinValue();
23166 assert(WideNumElements % Factor == 0 &&
23167 "ElementCount of a wide load must be divisible by interleave factor");
23168 auto *VTy =
23169 VectorType::get(WideVTy->getScalarType(), WideNumElements / Factor,
23170 WideVTy->isScalableTy());
23171 auto &DL = Load->getModule()->getDataLayout();
23172 Align Alignment = Load->getParamAlign(0).value_or(
23173 DL.getABITypeAlign(WideVTy->getElementType()));
23175 VTy, Factor, Alignment,
23176 Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL))
23177 return false;
23178
23179 IRBuilder<> Builder(Load);
23180 Value *WideEVL = Load->getArgOperand(2);
23181 // Conservatively check if EVL is a multiple of factor, otherwise some
23182 // (trailing) elements might be lost after the transformation.
23183 if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
23184 return false;
23185
23186 auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
23187 Value *EVL = Builder.CreateZExt(
23188 Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
23189 XLenTy);
23190
23191 static const Intrinsic::ID IntrMaskIds[] = {
23192 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
23193 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
23194 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
23195 Intrinsic::riscv_vlseg8_mask,
23196 };
23197
23198 unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
23199 unsigned NumElts = VTy->getElementCount().getKnownMinValue();
23200 Type *VecTupTy = TargetExtType::get(
23201 Load->getContext(), "riscv.vector.tuple",
23202 ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
23203 NumElts * SEW / 8),
23204 Factor);
23205
23206 Value *PoisonVal = PoisonValue::get(VecTupTy);
23207
23209 Load->getModule(), IntrMaskIds[Factor - 2],
23210 {VecTupTy, Mask->getType(), EVL->getType()});
23211
23212 Value *Operands[] = {
23213 PoisonVal,
23214 Load->getArgOperand(0),
23215 Mask,
23216 EVL,
23217 ConstantInt::get(XLenTy, RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC),
23218 ConstantInt::get(XLenTy, Log2_64(SEW))};
23219
23220 CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
23221
23222 SmallVector<Type *, 8> AggrTypes{Factor, VTy};
23223 Value *Return =
23224 PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
23226 Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
23227 for (unsigned i = 0; i < Factor; ++i) {
23228 Value *VecExtract =
23229 Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
23230 Return = Builder.CreateInsertValue(Return, VecExtract, i);
23231 }
23232
23233 for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
23234 // We have to create a brand new ExtractValue to replace each
23235 // of these old ExtractValue instructions.
23236 Value *NewEV =
23237 Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
23238 DIO->replaceAllUsesWith(NewEV);
23239 }
23240
23241 return true;
23242}
23243
23244/// Lower an interleaved vp.store into a vssegN intrinsic.
23245///
23246/// E.g. Lower an interleaved vp.store (Factor = 2):
23247///
23248/// %is = tail call <vscale x 64 x i8>
23249/// @llvm.vector.interleave2.nxv64i8(
23250/// <vscale x 32 x i8> %load0,
23251/// <vscale x 32 x i8> %load1
23252/// %wide.rvl = shl nuw nsw i32 %rvl, 1
23253/// tail call void @llvm.vp.store.nxv64i8.p0(
23254/// <vscale x 64 x i8> %is, ptr %ptr,
23255/// %mask,
23256/// i32 %wide.rvl)
23257///
23258/// Into:
23259/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
23260/// <vscale x 32 x i8> %load1,
23261/// <vscale x 32 x i8> %load2, ptr %ptr,
23262/// %mask,
23263/// i64 %rvl)
23265 VPIntrinsic *Store, Value *Mask,
23266 ArrayRef<Value *> InterleaveOperands) const {
23267 assert(Mask && "Expect a valid mask");
23268 assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
23269 "Unexpected intrinsic");
23270
23271 const unsigned Factor = InterleaveOperands.size();
23272
23273 auto *VTy = dyn_cast<ScalableVectorType>(InterleaveOperands[0]->getType());
23274 // TODO: Support fixed vectors.
23275 if (!VTy)
23276 return false;
23277
23278 const DataLayout &DL = Store->getDataLayout();
23279 Align Alignment = Store->getParamAlign(1).value_or(
23280 DL.getABITypeAlign(VTy->getElementType()));
23282 VTy, Factor, Alignment,
23283 Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
23284 return false;
23285
23286 IRBuilder<> Builder(Store);
23287 Value *WideEVL = Store->getArgOperand(3);
23288 // Conservatively check if EVL is a multiple of factor, otherwise some
23289 // (trailing) elements might be lost after the transformation.
23290 if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor))
23291 return false;
23292
23293 auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
23294 Value *EVL = Builder.CreateZExt(
23295 Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
23296 XLenTy);
23297
23298 static const Intrinsic::ID IntrMaskIds[] = {
23299 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
23300 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
23301 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
23302 Intrinsic::riscv_vsseg8_mask,
23303 };
23304
23305 unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
23306 unsigned NumElts = VTy->getElementCount().getKnownMinValue();
23307 Type *VecTupTy = TargetExtType::get(
23308 Store->getContext(), "riscv.vector.tuple",
23309 ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
23310 NumElts * SEW / 8),
23311 Factor);
23312
23314 Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
23315 Value *StoredVal = PoisonValue::get(VecTupTy);
23316 for (unsigned i = 0; i < Factor; ++i)
23317 StoredVal = Builder.CreateCall(
23318 VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)});
23319
23321 Store->getModule(), IntrMaskIds[Factor - 2],
23322 {VecTupTy, Mask->getType(), EVL->getType()});
23323
23324 Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL,
23325 ConstantInt::get(XLenTy, Log2_64(SEW))};
23326
23327 Builder.CreateCall(VssegNFunc, Operands);
23328 return true;
23329}
23330
23334 const TargetInstrInfo *TII) const {
23335 assert(MBBI->isCall() && MBBI->getCFIType() &&
23336 "Invalid call instruction for a KCFI check");
23337 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
23338 MBBI->getOpcode()));
23339
23340 MachineOperand &Target = MBBI->getOperand(0);
23341 Target.setIsRenamable(false);
23342
23343 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
23344 .addReg(Target.getReg())
23345 .addImm(MBBI->getCFIType())
23346 .getInstr();
23347}
23348
23349#define GET_REGISTER_MATCHER
23350#include "RISCVGenAsmMatcher.inc"
23351
23354 const MachineFunction &MF) const {
23356 if (Reg == RISCV::NoRegister)
23358 if (Reg == RISCV::NoRegister)
23360 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
23361 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
23362 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
23363 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
23364 StringRef(RegName) + "\"."));
23365 return Reg;
23366}
23367
23370 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
23371
23372 if (NontemporalInfo == nullptr)
23374
23375 // 1 for default value work as __RISCV_NTLH_ALL
23376 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
23377 // 3 -> __RISCV_NTLH_ALL_PRIVATE
23378 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
23379 // 5 -> __RISCV_NTLH_ALL
23380 int NontemporalLevel = 5;
23381 const MDNode *RISCVNontemporalInfo =
23382 I.getMetadata("riscv-nontemporal-domain");
23383 if (RISCVNontemporalInfo != nullptr)
23384 NontemporalLevel =
23385 cast<ConstantInt>(
23386 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
23387 ->getValue())
23388 ->getZExtValue();
23389
23390 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
23391 "RISC-V target doesn't support this non-temporal domain.");
23392
23393 NontemporalLevel -= 2;
23395 if (NontemporalLevel & 0b1)
23396 Flags |= MONontemporalBit0;
23397 if (NontemporalLevel & 0b10)
23398 Flags |= MONontemporalBit1;
23399
23400 return Flags;
23401}
23402
23405
23406 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
23408 TargetFlags |= (NodeFlags & MONontemporalBit0);
23409 TargetFlags |= (NodeFlags & MONontemporalBit1);
23410 return TargetFlags;
23411}
23412
23414 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
23415 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
23416}
23417
23419 if (VT.isScalableVector())
23420 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
23421 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
23422 return true;
23423 return Subtarget.hasStdExtZbb() &&
23424 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
23425}
23426
23428 ISD::CondCode Cond) const {
23429 return isCtpopFast(VT) ? 0 : 1;
23430}
23431
23433 const Instruction *I) const {
23434 if (Subtarget.hasStdExtZalasr()) {
23435 if (Subtarget.hasStdExtZtso()) {
23436 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
23437 // should be lowered to plain load/store. The easiest way to do this is
23438 // to say we should insert fences for them, and the fence insertion code
23439 // will just not insert any fences
23440 auto *LI = dyn_cast<LoadInst>(I);
23441 auto *SI = dyn_cast<StoreInst>(I);
23442 if ((LI &&
23443 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
23444 (SI &&
23445 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
23446 // Here, this is a load or store which is seq_cst, and needs a .aq or
23447 // .rl therefore we shouldn't try to insert fences
23448 return false;
23449 }
23450 // Here, we are a TSO inst that isn't a seq_cst load/store
23451 return isa<LoadInst>(I) || isa<StoreInst>(I);
23452 }
23453 return false;
23454 }
23455 // Note that one specific case requires fence insertion for an
23456 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
23457 // than this hook due to limitations in the interface here.
23458 return isa<LoadInst>(I) || isa<StoreInst>(I);
23459}
23460
23462
23463 // GISel support is in progress or complete for these opcodes.
23464 unsigned Op = Inst.getOpcode();
23465 if (Op == Instruction::Add || Op == Instruction::Sub ||
23466 Op == Instruction::And || Op == Instruction::Or ||
23467 Op == Instruction::Xor || Op == Instruction::InsertElement ||
23468 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
23469 Op == Instruction::Freeze || Op == Instruction::Store)
23470 return false;
23471
23472 if (Inst.getType()->isScalableTy())
23473 return true;
23474
23475 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
23476 if (Inst.getOperand(i)->getType()->isScalableTy() &&
23477 !isa<ReturnInst>(&Inst))
23478 return true;
23479
23480 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
23481 if (AI->getAllocatedType()->isScalableTy())
23482 return true;
23483 }
23484
23485 return false;
23486}
23487
23488SDValue
23489RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
23490 SelectionDAG &DAG,
23491 SmallVectorImpl<SDNode *> &Created) const {
23493 if (isIntDivCheap(N->getValueType(0), Attr))
23494 return SDValue(N, 0); // Lower SDIV as SDIV
23495
23496 // Only perform this transform if short forward branch opt is supported.
23497 if (!Subtarget.hasShortForwardBranchOpt())
23498 return SDValue();
23499 EVT VT = N->getValueType(0);
23500 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
23501 return SDValue();
23502
23503 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
23504 if (Divisor.sgt(2048) || Divisor.slt(-2048))
23505 return SDValue();
23506 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
23507}
23508
23509bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
23510 EVT VT, const APInt &AndMask) const {
23511 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
23512 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
23514}
23515
23516unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
23517 return Subtarget.getMinimumJumpTableEntries();
23518}
23519
23522 int JTI,
23523 SelectionDAG &DAG) const {
23524 if (Subtarget.hasStdExtZicfilp()) {
23525 // When Zicfilp enabled, we need to use software guarded branch for jump
23526 // table branch.
23527 SDValue Chain = Value;
23528 // Jump table debug info is only needed if CodeView is enabled.
23530 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
23531 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
23532 }
23533 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
23534}
23535
23536// If an output pattern produces multiple instructions tablegen may pick an
23537// arbitrary type from an instructions destination register class to use for the
23538// VT of that MachineSDNode. This VT may be used to look up the representative
23539// register class. If the type isn't legal, the default implementation will
23540// not find a register class.
23541//
23542// Some integer types smaller than XLen are listed in the GPR register class to
23543// support isel patterns for GISel, but are not legal in SelectionDAG. The
23544// arbitrary type tablegen picks may be one of these smaller types.
23545//
23546// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
23547// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
23548std::pair<const TargetRegisterClass *, uint8_t>
23549RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
23550 MVT VT) const {
23551 switch (VT.SimpleTy) {
23552 default:
23553 break;
23554 case MVT::i8:
23555 case MVT::i16:
23556 case MVT::i32:
23558 case MVT::bf16:
23559 case MVT::f16:
23561 }
23562
23564}
23565
23567
23568#define GET_RISCVVIntrinsicsTable_IMPL
23569#include "RISCVGenSearchableTables.inc"
23570
23571} // namespace llvm::RISCVVIntrinsicsTable
23572
23574
23575 // If the function specifically requests inline stack probes, emit them.
23576 if (MF.getFunction().hasFnAttribute("probe-stack"))
23577 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
23578 "inline-asm";
23579
23580 return false;
23581}
23582
23584 Align StackAlign) const {
23585 // The default stack probe size is 4096 if the function has no
23586 // stack-probe-size attribute.
23587 const Function &Fn = MF.getFunction();
23588 unsigned StackProbeSize =
23589 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
23590 // Round down to the stack alignment.
23591 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
23592 return StackProbeSize ? StackProbeSize : StackAlign.value();
23593}
23594
23595SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
23596 SelectionDAG &DAG) const {
23598 if (!hasInlineStackProbe(MF))
23599 return SDValue();
23600
23601 MVT XLenVT = Subtarget.getXLenVT();
23602 // Get the inputs.
23603 SDValue Chain = Op.getOperand(0);
23604 SDValue Size = Op.getOperand(1);
23605
23607 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
23608 SDLoc dl(Op);
23609 EVT VT = Op.getValueType();
23610
23611 // Construct the new SP value in a GPR.
23612 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
23613 Chain = SP.getValue(1);
23614 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
23615 if (Align)
23616 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
23617 DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
23618
23619 // Set the real SP to the new value with a probing loop.
23620 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
23621 return DAG.getMergeValues({SP, Chain}, dl);
23622}
23623
23626 MachineBasicBlock *MBB) const {
23627 MachineFunction &MF = *MBB->getParent();
23628 MachineBasicBlock::iterator MBBI = MI.getIterator();
23630 Register TargetReg = MI.getOperand(1).getReg();
23631
23632 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
23633 bool IsRV64 = Subtarget.is64Bit();
23634 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
23635 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
23636 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
23637
23638 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
23639 MachineBasicBlock *LoopTestMBB =
23641 MF.insert(MBBInsertPoint, LoopTestMBB);
23643 MF.insert(MBBInsertPoint, ExitMBB);
23644 Register SPReg = RISCV::X2;
23645 Register ScratchReg =
23646 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
23647
23648 // ScratchReg = ProbeSize
23649 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
23650
23651 // LoopTest:
23652 // SUB SP, SP, ProbeSize
23653 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
23654 .addReg(SPReg)
23655 .addReg(ScratchReg);
23656
23657 // s[d|w] zero, 0(sp)
23658 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
23659 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
23660 .addReg(RISCV::X0)
23661 .addReg(SPReg)
23662 .addImm(0);
23663
23664 // BLT TargetReg, SP, LoopTest
23665 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
23666 .addReg(TargetReg)
23667 .addReg(SPReg)
23668 .addMBB(LoopTestMBB);
23669
23670 // Adjust with: MV SP, TargetReg.
23671 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
23672 .addReg(TargetReg)
23673 .addImm(0);
23674
23675 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
23677
23678 LoopTestMBB->addSuccessor(ExitMBB);
23679 LoopTestMBB->addSuccessor(LoopTestMBB);
23680 MBB->addSuccessor(LoopTestMBB);
23681
23682 MI.eraseFromParent();
23683 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
23684 return ExitMBB->begin()->getParent();
23685}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:396
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1499
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
unsigned size() const
Definition: DenseMap.h:99
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
Tagged union holding either a T or a Error.
Definition: Error.h:481
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:710
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:359
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:892
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:306
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:530
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1887
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1842
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1421
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:867
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:71
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:406
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
const RISCVFrameLowering * getFrameLowering() const override
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool lowerInterleaveIntrinsicToStore(StoreInst *SI, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask, ArrayRef< Value * > DeinterleaveRes) const override
Lower an interleaved vp.load into a vlsegN intrinsic.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
bool lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask, ArrayRef< Value * > InterleaveOps) const override
Lower an interleaved vp.store into a vssegN intrinsic.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, ArrayRef< Value * > DeinterleaveValues) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:802
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:397
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:761
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:505
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:891
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:797
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:874
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:907
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:87
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:81
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:758
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1205
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1201
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1425
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1368
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1458
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1348
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1234
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1350
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1351
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1110
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1443
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1447
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1307
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1312
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1457
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1346
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1347
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1278
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1502
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1267
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1440
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1300
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1444
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1156
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1349
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1135
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1459
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1452
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1344
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1290
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1415
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1327
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1352
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1120
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1380
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1460
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1342
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1343
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1261
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1287
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1341
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1448
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1225
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1149
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1284
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1580
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1580
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1567
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1618
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1598
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1663
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:756
@ Bitcast
Perform the operation on a different, but equivalently sized type.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:556
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:395
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:404
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)