LLVM 21.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (Subtarget.useCCMovInsn())
414 else if (!Subtarget.hasVendorXTHeadCondMov())
416
417 static const unsigned FPLegalNodeTypes[] = {
425
426 static const ISD::CondCode FPCCToExpand[] = {
430
431 static const unsigned FPOpToExpand[] = {
433 ISD::FREM};
434
435 static const unsigned FPRndMode[] = {
438
439 static const unsigned ZfhminZfbfminPromoteOps[] = {
449
450 if (Subtarget.hasStdExtZfbfmin()) {
456 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
463 }
464
465 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
466 if (Subtarget.hasStdExtZfhOrZhinx()) {
467 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
468 setOperationAction(FPRndMode, MVT::f16,
469 Subtarget.hasStdExtZfa() ? Legal : Custom);
472 Subtarget.hasStdExtZfa() ? Legal : Custom);
473 if (Subtarget.hasStdExtZfa())
475 } else {
476 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
481 setOperationAction(Op, MVT::f16, Custom);
487 }
488
490
493 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
497
499 ISD::FNEARBYINT, MVT::f16,
500 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
505 MVT::f16, Promote);
506
507 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
508 // complete support for all operations in LegalizeDAG.
513 MVT::f16, Promote);
514
515 // We need to custom promote this.
516 if (Subtarget.is64Bit())
518 }
519
520 if (Subtarget.hasStdExtFOrZfinx()) {
521 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
522 setOperationAction(FPRndMode, MVT::f32,
523 Subtarget.hasStdExtZfa() ? Legal : Custom);
524 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
528 setOperationAction(FPOpToExpand, MVT::f32, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
531 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
532 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
536 Subtarget.isSoftFPABI() ? LibCall : Custom);
541
542 if (Subtarget.hasStdExtZfa()) {
546 } else {
548 }
549 }
550
551 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
553
554 if (Subtarget.hasStdExtDOrZdinx()) {
555 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
556
557 if (!Subtarget.is64Bit())
559
560 if (Subtarget.hasStdExtZfa()) {
562 setOperationAction(FPRndMode, MVT::f64, Legal);
565 } else {
566 if (Subtarget.is64Bit())
567 setOperationAction(FPRndMode, MVT::f64, Custom);
568
570 }
571
574 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
578 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
579 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
580 setOperationAction(FPOpToExpand, MVT::f64, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
583 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
584 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
588 Subtarget.isSoftFPABI() ? LibCall : Custom);
593 }
594
595 if (Subtarget.is64Bit()) {
598 MVT::i32, Custom);
600 }
601
602 if (Subtarget.hasStdExtFOrZfinx()) {
604 Custom);
605
606 // f16/bf16 require custom handling.
608 Custom);
610 Custom);
611
614 }
615
618 XLenVT, Custom);
619
621
622 if (Subtarget.is64Bit())
624
625 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
626 // Unfortunately this can't be determined just from the ISA naming string.
628 Subtarget.is64Bit() ? Legal : Custom);
630 Subtarget.is64Bit() ? Legal : Custom);
631
632 if (Subtarget.is64Bit()) {
635 }
636
639 if (Subtarget.is64Bit())
641
642 if (Subtarget.hasStdExtZicbop()) {
644 }
645
646 if (Subtarget.hasStdExtA()) {
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
650 else
652 } else if (Subtarget.hasForcedAtomics()) {
654 } else {
656 }
657
659
661
662 if (getTargetMachine().getTargetTriple().isOSLinux()) {
663 // Custom lowering of llvm.clear_cache.
665 }
666
667 if (Subtarget.hasVInstructions()) {
669
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
702 ISD::EXPERIMENTAL_VP_SPLAT};
703
704 static const unsigned FloatingPointVPOps[] = {
705 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
706 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
707 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
708 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
709 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
710 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
711 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
712 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
713 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
714 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
715 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
716 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
717 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
718 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
719
720 static const unsigned IntegerVecReduceOps[] = {
724
725 static const unsigned FloatingPointVecReduceOps[] = {
728
729 static const unsigned FloatingPointLibCallOps[] = {
732
733 if (!Subtarget.is64Bit()) {
734 // We must custom-lower certain vXi64 operations on RV32 due to the vector
735 // element type being illegal.
737 MVT::i64, Custom);
738
739 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
740
741 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
745 MVT::i64, Custom);
746 }
747
748 for (MVT VT : BoolVecVTs) {
749 if (!isTypeLegal(VT))
750 continue;
751
753
754 // Mask VTs are custom-expanded into a series of standard nodes
758 VT, Custom);
759
761 Custom);
762
764 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
765 Expand);
766 setOperationAction(ISD::VP_MERGE, VT, Custom);
767
768 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
769 Custom);
770
771 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
772
775 Custom);
776
778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
779 Custom);
780
781 // RVV has native int->float & float->int conversions where the
782 // element type sizes are within one power-of-two of each other. Any
783 // wider distances between type sizes have to be lowered as sequences
784 // which progressively narrow the gap in stages.
789 VT, Custom);
791 Custom);
792
793 // Expand all extending loads to types larger than this, and truncating
794 // stores from types larger than this.
796 setTruncStoreAction(VT, OtherVT, Expand);
798 OtherVT, Expand);
799 }
800
801 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
802 ISD::VP_TRUNCATE, ISD::VP_SETCC},
803 VT, Custom);
804
807
809
810 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
811 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
812
815 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
816 }
817
818 for (MVT VT : IntVecVTs) {
819 if (!isTypeLegal(VT))
820 continue;
821
824
825 // Vectors implement MULHS/MULHU.
827
828 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
829 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
831
833 Legal);
834
836
837 // Custom-lower extensions and truncations from/to mask types.
839 VT, Custom);
840
841 // RVV has native int->float & float->int conversions where the
842 // element type sizes are within one power-of-two of each other. Any
843 // wider distances between type sizes have to be lowered as sequences
844 // which progressively narrow the gap in stages.
849 VT, Custom);
851 Custom);
855 VT, Legal);
856
857 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
858 // nodes which truncate by one power of two at a time.
861 Custom);
862
863 // Custom-lower insert/extract operations to simplify patterns.
865 Custom);
866
867 // Custom-lower reduction operations to set up the corresponding custom
868 // nodes' operands.
869 setOperationAction(IntegerVecReduceOps, VT, Custom);
870
871 setOperationAction(IntegerVPOps, VT, Custom);
872
874
876 VT, Custom);
877
879 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
880 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
881 VT, Custom);
882
885 VT, Custom);
886
889
891
893 setTruncStoreAction(VT, OtherVT, Expand);
895 OtherVT, Expand);
896 }
897
900
901 // Splice
903
904 if (Subtarget.hasStdExtZvkb()) {
906 setOperationAction(ISD::VP_BSWAP, VT, Custom);
907 } else {
908 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
910 }
911
912 if (Subtarget.hasStdExtZvbb()) {
914 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
915 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
916 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
917 VT, Custom);
918 } else {
919 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
921 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
922 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
923 VT, Expand);
924
925 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
926 // range of f32.
927 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
928 if (isTypeLegal(FloatVT)) {
930 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
931 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
932 VT, Custom);
933 }
934 }
935
937 }
938
939 for (MVT VT : VecTupleVTs) {
940 if (!isTypeLegal(VT))
941 continue;
942
944 }
945
946 // Expand various CCs to best match the RVV ISA, which natively supports UNE
947 // but no other unordered comparisons, and supports all ordered comparisons
948 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
949 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
950 // and we pattern-match those back to the "original", swapping operands once
951 // more. This way we catch both operations and both "vf" and "fv" forms with
952 // fewer patterns.
953 static const ISD::CondCode VFPCCToExpand[] = {
957 };
958
959 // TODO: support more ops.
960 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
968
969 // TODO: support more vp ops.
970 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
971 ISD::VP_FADD,
972 ISD::VP_FSUB,
973 ISD::VP_FMUL,
974 ISD::VP_FDIV,
975 ISD::VP_FMA,
976 ISD::VP_REDUCE_FMIN,
977 ISD::VP_REDUCE_FMAX,
978 ISD::VP_SQRT,
979 ISD::VP_FMINNUM,
980 ISD::VP_FMAXNUM,
981 ISD::VP_FCEIL,
982 ISD::VP_FFLOOR,
983 ISD::VP_FROUND,
984 ISD::VP_FROUNDEVEN,
985 ISD::VP_FROUNDTOZERO,
986 ISD::VP_FRINT,
987 ISD::VP_FNEARBYINT,
988 ISD::VP_SETCC,
989 ISD::VP_FMINIMUM,
990 ISD::VP_FMAXIMUM,
991 ISD::VP_REDUCE_FMINIMUM,
992 ISD::VP_REDUCE_FMAXIMUM};
993
994 // Sets common operation actions on RVV floating-point vector types.
995 const auto SetCommonVFPActions = [&](MVT VT) {
997 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
998 // sizes are within one power-of-two of each other. Therefore conversions
999 // between vXf16 and vXf64 must be lowered as sequences which convert via
1000 // vXf32.
1003 // Custom-lower insert/extract operations to simplify patterns.
1005 Custom);
1006 // Expand various condition codes (explained above).
1007 setCondCodeAction(VFPCCToExpand, VT, Expand);
1008
1011
1015 VT, Custom);
1016
1017 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1018
1019 // Expand FP operations that need libcalls.
1020 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1021
1023
1025
1027 VT, Custom);
1028
1030 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1031 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1032 VT, Custom);
1033
1036
1039 VT, Custom);
1040
1043
1045
1046 setOperationAction(FloatingPointVPOps, VT, Custom);
1047
1049 Custom);
1052 VT, Legal);
1057 VT, Custom);
1058
1060 };
1061
1062 // Sets common extload/truncstore actions on RVV floating-point vector
1063 // types.
1064 const auto SetCommonVFPExtLoadTruncStoreActions =
1065 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1066 for (auto SmallVT : SmallerVTs) {
1067 setTruncStoreAction(VT, SmallVT, Expand);
1068 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1069 }
1070 };
1071
1072 // Sets common actions for f16 and bf16 for when there's only
1073 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1074 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1077 Custom);
1078 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1079 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1080 Custom);
1082 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1088 VT, Custom);
1089 MVT EltVT = VT.getVectorElementType();
1090 if (isTypeLegal(EltVT))
1091 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1093 VT, Custom);
1094 else
1095 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1096 EltVT, Custom);
1098 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1099 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1100 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1101 ISD::VP_SCATTER},
1102 VT, Custom);
1103
1107
1108 // Expand FP operations that need libcalls.
1109 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1110
1111 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1112 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1113 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1114 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1115 } else {
1116 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1117 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1118 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1119 }
1120 };
1121
1122 if (Subtarget.hasVInstructionsF16()) {
1123 for (MVT VT : F16VecVTs) {
1124 if (!isTypeLegal(VT))
1125 continue;
1126 SetCommonVFPActions(VT);
1127 }
1128 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1129 for (MVT VT : F16VecVTs) {
1130 if (!isTypeLegal(VT))
1131 continue;
1132 SetCommonPromoteToF32Actions(VT);
1133 }
1134 }
1135
1136 if (Subtarget.hasVInstructionsBF16Minimal()) {
1137 for (MVT VT : BF16VecVTs) {
1138 if (!isTypeLegal(VT))
1139 continue;
1140 SetCommonPromoteToF32Actions(VT);
1141 }
1142 }
1143
1144 if (Subtarget.hasVInstructionsF32()) {
1145 for (MVT VT : F32VecVTs) {
1146 if (!isTypeLegal(VT))
1147 continue;
1148 SetCommonVFPActions(VT);
1149 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1151 }
1152 }
1153
1154 if (Subtarget.hasVInstructionsF64()) {
1155 for (MVT VT : F64VecVTs) {
1156 if (!isTypeLegal(VT))
1157 continue;
1158 SetCommonVFPActions(VT);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1160 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1161 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1162 }
1163 }
1164
1165 if (Subtarget.useRVVForFixedLengthVectors()) {
1167 if (!useRVVForFixedLengthVectorVT(VT))
1168 continue;
1169
1170 // By default everything must be expanded.
1171 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1174 setTruncStoreAction(VT, OtherVT, Expand);
1176 OtherVT, Expand);
1177 }
1178
1179 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1180 // expansion to a build_vector of 0s.
1182
1183 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1185 Custom);
1186
1189 Custom);
1190
1192 VT, Custom);
1193
1195
1197
1199
1201
1204 Custom);
1205
1207
1210 Custom);
1211
1213 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1214 Custom);
1215
1217 {
1226 },
1227 VT, Custom);
1229 Custom);
1230
1232
1233 // Operations below are different for between masks and other vectors.
1234 if (VT.getVectorElementType() == MVT::i1) {
1235 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1236 ISD::OR, ISD::XOR},
1237 VT, Custom);
1238
1239 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1240 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1241 VT, Custom);
1242
1243 setOperationAction(ISD::VP_MERGE, VT, Custom);
1244
1245 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1246 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1247 continue;
1248 }
1249
1250 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1251 // it before type legalization for i64 vectors on RV32. It will then be
1252 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1253 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1254 // improvements first.
1255 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1258 }
1259
1262
1263 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1264 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1265 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1266 ISD::VP_SCATTER},
1267 VT, Custom);
1268
1272 VT, Custom);
1273
1276
1278
1279 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1280 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1282
1286 VT, Custom);
1287
1289
1292
1293 // Custom-lower reduction operations to set up the corresponding custom
1294 // nodes' operands.
1298 VT, Custom);
1299
1300 setOperationAction(IntegerVPOps, VT, Custom);
1301
1302 if (Subtarget.hasStdExtZvkb())
1304
1305 if (Subtarget.hasStdExtZvbb()) {
1308 VT, Custom);
1309 } else {
1310 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1311 // range of f32.
1312 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313 if (isTypeLegal(FloatVT))
1316 Custom);
1317 }
1318
1320 }
1321
1323 // There are no extending loads or truncating stores.
1324 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1325 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1326 setTruncStoreAction(VT, InnerVT, Expand);
1327 }
1328
1329 if (!useRVVForFixedLengthVectorVT(VT))
1330 continue;
1331
1332 // By default everything must be expanded.
1333 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1335
1336 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1337 // expansion to a build_vector of 0s.
1339
1344 VT, Custom);
1345
1348 VT, Custom);
1349 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1350 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1352 VT, Custom);
1353
1356 Custom);
1357
1358 if (VT.getVectorElementType() == MVT::f16 &&
1359 !Subtarget.hasVInstructionsF16()) {
1361 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1363 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1364 Custom);
1365 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1366 Custom);
1367 if (Subtarget.hasStdExtZfhmin()) {
1369 } else {
1370 // We need to custom legalize f16 build vectors if Zfhmin isn't
1371 // available.
1373 }
1377 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1378 // Don't promote f16 vector operations to f32 if f32 vector type is
1379 // not legal.
1380 // TODO: could split the f16 vector into two vectors and do promotion.
1381 if (!isTypeLegal(F32VecVT))
1382 continue;
1383 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1384 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1385 continue;
1386 }
1387
1388 if (VT.getVectorElementType() == MVT::bf16) {
1390 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1391 if (Subtarget.hasStdExtZfbfmin()) {
1393 } else {
1394 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1395 // available.
1397 }
1399 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1400 Custom);
1401 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1402 // Don't promote f16 vector operations to f32 if f32 vector type is
1403 // not legal.
1404 // TODO: could split the f16 vector into two vectors and do promotion.
1405 if (!isTypeLegal(F32VecVT))
1406 continue;
1407 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1408 // TODO: Promote VP ops to fp32.
1409 continue;
1410 }
1411
1413 Custom);
1414
1419 VT, Custom);
1420
1423 VT, Custom);
1424
1425 setCondCodeAction(VFPCCToExpand, VT, Expand);
1426
1429
1431
1432 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1433
1434 setOperationAction(FloatingPointVPOps, VT, Custom);
1435
1442 VT, Custom);
1443 }
1444
1445 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1446 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1447 if (Subtarget.is64Bit())
1449 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1451 if (Subtarget.hasStdExtZfbfmin())
1453 if (Subtarget.hasStdExtFOrZfinx())
1455 if (Subtarget.hasStdExtDOrZdinx())
1457 }
1458 }
1459
1460 if (Subtarget.hasStdExtA())
1462
1463 if (Subtarget.hasForcedAtomics()) {
1464 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1470 XLenVT, LibCall);
1471 }
1472
1473 if (Subtarget.hasVendorXTHeadMemIdx()) {
1474 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1475 setIndexedLoadAction(im, MVT::i8, Legal);
1476 setIndexedStoreAction(im, MVT::i8, Legal);
1477 setIndexedLoadAction(im, MVT::i16, Legal);
1478 setIndexedStoreAction(im, MVT::i16, Legal);
1479 setIndexedLoadAction(im, MVT::i32, Legal);
1480 setIndexedStoreAction(im, MVT::i32, Legal);
1481
1482 if (Subtarget.is64Bit()) {
1483 setIndexedLoadAction(im, MVT::i64, Legal);
1484 setIndexedStoreAction(im, MVT::i64, Legal);
1485 }
1486 }
1487 }
1488
1489 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1493
1497 }
1498
1499 // Function alignments.
1500 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1501 setMinFunctionAlignment(FunctionAlignment);
1502 // Set preferred alignments.
1505
1511
1512 if (Subtarget.hasStdExtFOrZfinx())
1514
1515 if (Subtarget.hasStdExtZbb())
1517
1518 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1519 Subtarget.hasVInstructions())
1521
1522 if (Subtarget.hasStdExtZbkb())
1524
1525 if (Subtarget.hasStdExtFOrZfinx())
1528 if (Subtarget.hasVInstructions())
1531 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1534 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1538 ISD::VSELECT});
1539
1540 if (Subtarget.hasVendorXTHeadMemPair())
1542 if (Subtarget.useRVVForFixedLengthVectors())
1544
1545 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1546 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1547
1548 // Disable strict node mutation.
1549 IsStrictFPEnabled = true;
1550 EnableExtLdPromotion = true;
1551
1552 // Let the subtarget decide if a predictable select is more expensive than the
1553 // corresponding branch. This information is used in CGP/SelectOpt to decide
1554 // when to convert selects into branches.
1555 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1556
1557 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1558 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1559
1561 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1562 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1563
1565 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1566 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1567
1568 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1569 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1570}
1571
1573 LLVMContext &Context,
1574 EVT VT) const {
1575 if (!VT.isVector())
1576 return getPointerTy(DL);
1577 if (Subtarget.hasVInstructions() &&
1578 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1579 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1581}
1582
1583MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1584 return Subtarget.getXLenVT();
1585}
1586
1587// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1588bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1589 unsigned VF,
1590 bool IsScalable) const {
1591 if (!Subtarget.hasVInstructions())
1592 return true;
1593
1594 if (!IsScalable)
1595 return true;
1596
1597 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1598 return true;
1599
1600 // Don't allow VF=1 if those types are't legal.
1601 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1602 return true;
1603
1604 // VLEN=32 support is incomplete.
1605 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1606 return true;
1607
1608 // The maximum VF is for the smallest element width with LMUL=8.
1609 // VF must be a power of 2.
1610 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1611 return VF > MaxVF || !isPowerOf2_32(VF);
1612}
1613
1615 return !Subtarget.hasVInstructions() ||
1616 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1617}
1618
1620 const CallInst &I,
1621 MachineFunction &MF,
1622 unsigned Intrinsic) const {
1623 auto &DL = I.getDataLayout();
1624
1625 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1626 bool IsUnitStrided, bool UsePtrVal = false) {
1628 // We can't use ptrVal if the intrinsic can access memory before the
1629 // pointer. This means we can't use it for strided or indexed intrinsics.
1630 if (UsePtrVal)
1631 Info.ptrVal = I.getArgOperand(PtrOp);
1632 else
1633 Info.fallbackAddressSpace =
1634 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1635 Type *MemTy;
1636 if (IsStore) {
1637 // Store value is the first operand.
1638 MemTy = I.getArgOperand(0)->getType();
1639 } else {
1640 // Use return type. If it's segment load, return type is a struct.
1641 MemTy = I.getType();
1642 if (MemTy->isStructTy())
1643 MemTy = MemTy->getStructElementType(0);
1644 }
1645 if (!IsUnitStrided)
1646 MemTy = MemTy->getScalarType();
1647
1648 Info.memVT = getValueType(DL, MemTy);
1649 if (MemTy->isTargetExtTy()) {
1650 // RISC-V vector tuple type's alignment type should be its element type.
1651 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1652 MemTy = Type::getIntNTy(
1653 MemTy->getContext(),
1654 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1655 ->getZExtValue());
1656 Info.align = DL.getABITypeAlign(MemTy);
1657 } else {
1658 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1659 }
1661 Info.flags |=
1663 return true;
1664 };
1665
1666 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1668
1670 switch (Intrinsic) {
1671 default:
1672 return false;
1673 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1675 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1676 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1677 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1678 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1679 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1680 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1681 case Intrinsic::riscv_masked_cmpxchg_i32:
1683 Info.memVT = MVT::i32;
1684 Info.ptrVal = I.getArgOperand(0);
1685 Info.offset = 0;
1686 Info.align = Align(4);
1689 return true;
1690 case Intrinsic::riscv_seg2_load:
1691 case Intrinsic::riscv_seg3_load:
1692 case Intrinsic::riscv_seg4_load:
1693 case Intrinsic::riscv_seg5_load:
1694 case Intrinsic::riscv_seg6_load:
1695 case Intrinsic::riscv_seg7_load:
1696 case Intrinsic::riscv_seg8_load:
1697 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1698 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1699 case Intrinsic::riscv_seg2_store:
1700 case Intrinsic::riscv_seg3_store:
1701 case Intrinsic::riscv_seg4_store:
1702 case Intrinsic::riscv_seg5_store:
1703 case Intrinsic::riscv_seg6_store:
1704 case Intrinsic::riscv_seg7_store:
1705 case Intrinsic::riscv_seg8_store:
1706 // Operands are (vec, ..., vec, ptr, vl)
1707 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1708 /*IsStore*/ true,
1709 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1710 case Intrinsic::riscv_vle:
1711 case Intrinsic::riscv_vle_mask:
1712 case Intrinsic::riscv_vleff:
1713 case Intrinsic::riscv_vleff_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ false,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vse:
1719 case Intrinsic::riscv_vse_mask:
1720 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1721 /*IsStore*/ true,
1722 /*IsUnitStrided*/ true,
1723 /*UsePtrVal*/ true);
1724 case Intrinsic::riscv_vlse:
1725 case Intrinsic::riscv_vlse_mask:
1726 case Intrinsic::riscv_vloxei:
1727 case Intrinsic::riscv_vloxei_mask:
1728 case Intrinsic::riscv_vluxei:
1729 case Intrinsic::riscv_vluxei_mask:
1730 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1731 /*IsStore*/ false,
1732 /*IsUnitStrided*/ false);
1733 case Intrinsic::riscv_vsse:
1734 case Intrinsic::riscv_vsse_mask:
1735 case Intrinsic::riscv_vsoxei:
1736 case Intrinsic::riscv_vsoxei_mask:
1737 case Intrinsic::riscv_vsuxei:
1738 case Intrinsic::riscv_vsuxei_mask:
1739 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1740 /*IsStore*/ true,
1741 /*IsUnitStrided*/ false);
1742 case Intrinsic::riscv_vlseg2:
1743 case Intrinsic::riscv_vlseg3:
1744 case Intrinsic::riscv_vlseg4:
1745 case Intrinsic::riscv_vlseg5:
1746 case Intrinsic::riscv_vlseg6:
1747 case Intrinsic::riscv_vlseg7:
1748 case Intrinsic::riscv_vlseg8:
1749 case Intrinsic::riscv_vlseg2ff:
1750 case Intrinsic::riscv_vlseg3ff:
1751 case Intrinsic::riscv_vlseg4ff:
1752 case Intrinsic::riscv_vlseg5ff:
1753 case Intrinsic::riscv_vlseg6ff:
1754 case Intrinsic::riscv_vlseg7ff:
1755 case Intrinsic::riscv_vlseg8ff:
1756 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1757 /*IsStore*/ false,
1758 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1759 case Intrinsic::riscv_vlseg2_mask:
1760 case Intrinsic::riscv_vlseg3_mask:
1761 case Intrinsic::riscv_vlseg4_mask:
1762 case Intrinsic::riscv_vlseg5_mask:
1763 case Intrinsic::riscv_vlseg6_mask:
1764 case Intrinsic::riscv_vlseg7_mask:
1765 case Intrinsic::riscv_vlseg8_mask:
1766 case Intrinsic::riscv_vlseg2ff_mask:
1767 case Intrinsic::riscv_vlseg3ff_mask:
1768 case Intrinsic::riscv_vlseg4ff_mask:
1769 case Intrinsic::riscv_vlseg5ff_mask:
1770 case Intrinsic::riscv_vlseg6ff_mask:
1771 case Intrinsic::riscv_vlseg7ff_mask:
1772 case Intrinsic::riscv_vlseg8ff_mask:
1773 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1774 /*IsStore*/ false,
1775 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1776 case Intrinsic::riscv_vlsseg2:
1777 case Intrinsic::riscv_vlsseg3:
1778 case Intrinsic::riscv_vlsseg4:
1779 case Intrinsic::riscv_vlsseg5:
1780 case Intrinsic::riscv_vlsseg6:
1781 case Intrinsic::riscv_vlsseg7:
1782 case Intrinsic::riscv_vlsseg8:
1783 case Intrinsic::riscv_vloxseg2:
1784 case Intrinsic::riscv_vloxseg3:
1785 case Intrinsic::riscv_vloxseg4:
1786 case Intrinsic::riscv_vloxseg5:
1787 case Intrinsic::riscv_vloxseg6:
1788 case Intrinsic::riscv_vloxseg7:
1789 case Intrinsic::riscv_vloxseg8:
1790 case Intrinsic::riscv_vluxseg2:
1791 case Intrinsic::riscv_vluxseg3:
1792 case Intrinsic::riscv_vluxseg4:
1793 case Intrinsic::riscv_vluxseg5:
1794 case Intrinsic::riscv_vluxseg6:
1795 case Intrinsic::riscv_vluxseg7:
1796 case Intrinsic::riscv_vluxseg8:
1797 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1798 /*IsStore*/ false,
1799 /*IsUnitStrided*/ false);
1800 case Intrinsic::riscv_vlsseg2_mask:
1801 case Intrinsic::riscv_vlsseg3_mask:
1802 case Intrinsic::riscv_vlsseg4_mask:
1803 case Intrinsic::riscv_vlsseg5_mask:
1804 case Intrinsic::riscv_vlsseg6_mask:
1805 case Intrinsic::riscv_vlsseg7_mask:
1806 case Intrinsic::riscv_vlsseg8_mask:
1807 case Intrinsic::riscv_vloxseg2_mask:
1808 case Intrinsic::riscv_vloxseg3_mask:
1809 case Intrinsic::riscv_vloxseg4_mask:
1810 case Intrinsic::riscv_vloxseg5_mask:
1811 case Intrinsic::riscv_vloxseg6_mask:
1812 case Intrinsic::riscv_vloxseg7_mask:
1813 case Intrinsic::riscv_vloxseg8_mask:
1814 case Intrinsic::riscv_vluxseg2_mask:
1815 case Intrinsic::riscv_vluxseg3_mask:
1816 case Intrinsic::riscv_vluxseg4_mask:
1817 case Intrinsic::riscv_vluxseg5_mask:
1818 case Intrinsic::riscv_vluxseg6_mask:
1819 case Intrinsic::riscv_vluxseg7_mask:
1820 case Intrinsic::riscv_vluxseg8_mask:
1821 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1822 /*IsStore*/ false,
1823 /*IsUnitStrided*/ false);
1824 case Intrinsic::riscv_vsseg2:
1825 case Intrinsic::riscv_vsseg3:
1826 case Intrinsic::riscv_vsseg4:
1827 case Intrinsic::riscv_vsseg5:
1828 case Intrinsic::riscv_vsseg6:
1829 case Intrinsic::riscv_vsseg7:
1830 case Intrinsic::riscv_vsseg8:
1831 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1832 /*IsStore*/ true,
1833 /*IsUnitStrided*/ false);
1834 case Intrinsic::riscv_vsseg2_mask:
1835 case Intrinsic::riscv_vsseg3_mask:
1836 case Intrinsic::riscv_vsseg4_mask:
1837 case Intrinsic::riscv_vsseg5_mask:
1838 case Intrinsic::riscv_vsseg6_mask:
1839 case Intrinsic::riscv_vsseg7_mask:
1840 case Intrinsic::riscv_vsseg8_mask:
1841 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1842 /*IsStore*/ true,
1843 /*IsUnitStrided*/ false);
1844 case Intrinsic::riscv_vssseg2:
1845 case Intrinsic::riscv_vssseg3:
1846 case Intrinsic::riscv_vssseg4:
1847 case Intrinsic::riscv_vssseg5:
1848 case Intrinsic::riscv_vssseg6:
1849 case Intrinsic::riscv_vssseg7:
1850 case Intrinsic::riscv_vssseg8:
1851 case Intrinsic::riscv_vsoxseg2:
1852 case Intrinsic::riscv_vsoxseg3:
1853 case Intrinsic::riscv_vsoxseg4:
1854 case Intrinsic::riscv_vsoxseg5:
1855 case Intrinsic::riscv_vsoxseg6:
1856 case Intrinsic::riscv_vsoxseg7:
1857 case Intrinsic::riscv_vsoxseg8:
1858 case Intrinsic::riscv_vsuxseg2:
1859 case Intrinsic::riscv_vsuxseg3:
1860 case Intrinsic::riscv_vsuxseg4:
1861 case Intrinsic::riscv_vsuxseg5:
1862 case Intrinsic::riscv_vsuxseg6:
1863 case Intrinsic::riscv_vsuxseg7:
1864 case Intrinsic::riscv_vsuxseg8:
1865 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1866 /*IsStore*/ true,
1867 /*IsUnitStrided*/ false);
1868 case Intrinsic::riscv_vssseg2_mask:
1869 case Intrinsic::riscv_vssseg3_mask:
1870 case Intrinsic::riscv_vssseg4_mask:
1871 case Intrinsic::riscv_vssseg5_mask:
1872 case Intrinsic::riscv_vssseg6_mask:
1873 case Intrinsic::riscv_vssseg7_mask:
1874 case Intrinsic::riscv_vssseg8_mask:
1875 case Intrinsic::riscv_vsoxseg2_mask:
1876 case Intrinsic::riscv_vsoxseg3_mask:
1877 case Intrinsic::riscv_vsoxseg4_mask:
1878 case Intrinsic::riscv_vsoxseg5_mask:
1879 case Intrinsic::riscv_vsoxseg6_mask:
1880 case Intrinsic::riscv_vsoxseg7_mask:
1881 case Intrinsic::riscv_vsoxseg8_mask:
1882 case Intrinsic::riscv_vsuxseg2_mask:
1883 case Intrinsic::riscv_vsuxseg3_mask:
1884 case Intrinsic::riscv_vsuxseg4_mask:
1885 case Intrinsic::riscv_vsuxseg5_mask:
1886 case Intrinsic::riscv_vsuxseg6_mask:
1887 case Intrinsic::riscv_vsuxseg7_mask:
1888 case Intrinsic::riscv_vsuxseg8_mask:
1889 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1890 /*IsStore*/ true,
1891 /*IsUnitStrided*/ false);
1892 }
1893}
1894
1896 const AddrMode &AM, Type *Ty,
1897 unsigned AS,
1898 Instruction *I) const {
1899 // No global is ever allowed as a base.
1900 if (AM.BaseGV)
1901 return false;
1902
1903 // None of our addressing modes allows a scalable offset
1904 if (AM.ScalableOffset)
1905 return false;
1906
1907 // RVV instructions only support register addressing.
1908 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1909 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1910
1911 // Require a 12-bit signed offset.
1912 if (!isInt<12>(AM.BaseOffs))
1913 return false;
1914
1915 switch (AM.Scale) {
1916 case 0: // "r+i" or just "i", depending on HasBaseReg.
1917 break;
1918 case 1:
1919 if (!AM.HasBaseReg) // allow "r+i".
1920 break;
1921 return false; // disallow "r+r" or "r+r+i".
1922 default:
1923 return false;
1924 }
1925
1926 return true;
1927}
1928
1930 return isInt<12>(Imm);
1931}
1932
1934 return isInt<12>(Imm);
1935}
1936
1937// On RV32, 64-bit integers are split into their high and low parts and held
1938// in two different registers, so the trunc is free since the low register can
1939// just be used.
1940// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1941// isTruncateFree?
1943 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1944 return false;
1945 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1946 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1947 return (SrcBits == 64 && DestBits == 32);
1948}
1949
1951 // We consider i64->i32 free on RV64 since we have good selection of W
1952 // instructions that make promoting operations back to i64 free in many cases.
1953 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1954 !DstVT.isInteger())
1955 return false;
1956 unsigned SrcBits = SrcVT.getSizeInBits();
1957 unsigned DestBits = DstVT.getSizeInBits();
1958 return (SrcBits == 64 && DestBits == 32);
1959}
1960
1962 EVT SrcVT = Val.getValueType();
1963 // free truncate from vnsrl and vnsra
1964 if (Subtarget.hasVInstructions() &&
1965 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1966 SrcVT.isVector() && VT2.isVector()) {
1967 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1968 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1969 if (SrcBits == DestBits * 2) {
1970 return true;
1971 }
1972 }
1973 return TargetLowering::isTruncateFree(Val, VT2);
1974}
1975
1977 // Zexts are free if they can be combined with a load.
1978 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1979 // poorly with type legalization of compares preferring sext.
1980 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1981 EVT MemVT = LD->getMemoryVT();
1982 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1983 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1984 LD->getExtensionType() == ISD::ZEXTLOAD))
1985 return true;
1986 }
1987
1988 return TargetLowering::isZExtFree(Val, VT2);
1989}
1990
1992 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1993}
1994
1996 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1997}
1998
2000 return Subtarget.hasStdExtZbb() ||
2001 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2002}
2003
2005 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2006 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2007}
2008
2010 const Instruction &AndI) const {
2011 // We expect to be able to match a bit extraction instruction if the Zbs
2012 // extension is supported and the mask is a power of two. However, we
2013 // conservatively return false if the mask would fit in an ANDI instruction,
2014 // on the basis that it's possible the sinking+duplication of the AND in
2015 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2016 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2017 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2018 return false;
2019 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2020 if (!Mask)
2021 return false;
2022 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2023}
2024
2026 EVT VT = Y.getValueType();
2027
2028 // FIXME: Support vectors once we have tests.
2029 if (VT.isVector())
2030 return false;
2031
2032 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2033 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2034}
2035
2037 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2038 if (Subtarget.hasStdExtZbs())
2039 return X.getValueType().isScalarInteger();
2040 auto *C = dyn_cast<ConstantSDNode>(Y);
2041 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2042 if (Subtarget.hasVendorXTHeadBs())
2043 return C != nullptr;
2044 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2045 return C && C->getAPIntValue().ule(10);
2046}
2047
2049 EVT VT) const {
2050 // Only enable for rvv.
2051 if (!VT.isVector() || !Subtarget.hasVInstructions())
2052 return false;
2053
2054 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2055 return false;
2056
2057 return true;
2058}
2059
2061 Type *Ty) const {
2062 assert(Ty->isIntegerTy());
2063
2064 unsigned BitSize = Ty->getIntegerBitWidth();
2065 if (BitSize > Subtarget.getXLen())
2066 return false;
2067
2068 // Fast path, assume 32-bit immediates are cheap.
2069 int64_t Val = Imm.getSExtValue();
2070 if (isInt<32>(Val))
2071 return true;
2072
2073 // A constant pool entry may be more aligned thant he load we're trying to
2074 // replace. If we don't support unaligned scalar mem, prefer the constant
2075 // pool.
2076 // TODO: Can the caller pass down the alignment?
2077 if (!Subtarget.enableUnalignedScalarMem())
2078 return true;
2079
2080 // Prefer to keep the load if it would require many instructions.
2081 // This uses the same threshold we use for constant pools but doesn't
2082 // check useConstantPoolForLargeInts.
2083 // TODO: Should we keep the load only when we're definitely going to emit a
2084 // constant pool?
2085
2087 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2088}
2089
2093 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2094 SelectionDAG &DAG) const {
2095 // One interesting pattern that we'd want to form is 'bit extract':
2096 // ((1 >> Y) & 1) ==/!= 0
2097 // But we also need to be careful not to try to reverse that fold.
2098
2099 // Is this '((1 >> Y) & 1)'?
2100 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2101 return false; // Keep the 'bit extract' pattern.
2102
2103 // Will this be '((1 >> Y) & 1)' after the transform?
2104 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2105 return true; // Do form the 'bit extract' pattern.
2106
2107 // If 'X' is a constant, and we transform, then we will immediately
2108 // try to undo the fold, thus causing endless combine loop.
2109 // So only do the transform if X is not a constant. This matches the default
2110 // implementation of this function.
2111 return !XC;
2112}
2113
2115 unsigned Opc = VecOp.getOpcode();
2116
2117 // Assume target opcodes can't be scalarized.
2118 // TODO - do we have any exceptions?
2119 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2120 return false;
2121
2122 // If the vector op is not supported, try to convert to scalar.
2123 EVT VecVT = VecOp.getValueType();
2124 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2125 return true;
2126
2127 // If the vector op is supported, but the scalar op is not, the transform may
2128 // not be worthwhile.
2129 // Permit a vector binary operation can be converted to scalar binary
2130 // operation which is custom lowered with illegal type.
2131 EVT ScalarVT = VecVT.getScalarType();
2132 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2133 isOperationCustom(Opc, ScalarVT);
2134}
2135
2137 const GlobalAddressSDNode *GA) const {
2138 // In order to maximise the opportunity for common subexpression elimination,
2139 // keep a separate ADD node for the global address offset instead of folding
2140 // it in the global address node. Later peephole optimisations may choose to
2141 // fold it back in when profitable.
2142 return false;
2143}
2144
2145// Returns 0-31 if the fli instruction is available for the type and this is
2146// legal FP immediate for the type. Returns -1 otherwise.
2148 if (!Subtarget.hasStdExtZfa())
2149 return -1;
2150
2151 bool IsSupportedVT = false;
2152 if (VT == MVT::f16) {
2153 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2154 } else if (VT == MVT::f32) {
2155 IsSupportedVT = true;
2156 } else if (VT == MVT::f64) {
2157 assert(Subtarget.hasStdExtD() && "Expect D extension");
2158 IsSupportedVT = true;
2159 }
2160
2161 if (!IsSupportedVT)
2162 return -1;
2163
2164 return RISCVLoadFPImm::getLoadFPImm(Imm);
2165}
2166
2168 bool ForCodeSize) const {
2169 bool IsLegalVT = false;
2170 if (VT == MVT::f16)
2171 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2172 else if (VT == MVT::f32)
2173 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2174 else if (VT == MVT::f64)
2175 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2176 else if (VT == MVT::bf16)
2177 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2178
2179 if (!IsLegalVT)
2180 return false;
2181
2182 if (getLegalZfaFPImm(Imm, VT) >= 0)
2183 return true;
2184
2185 // Cannot create a 64 bit floating-point immediate value for rv32.
2186 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2187 // td can handle +0.0 or -0.0 already.
2188 // -0.0 can be created by fmv + fneg.
2189 return Imm.isZero();
2190 }
2191
2192 // Special case: fmv + fneg
2193 if (Imm.isNegZero())
2194 return true;
2195
2196 // Building an integer and then converting requires a fmv at the end of
2197 // the integer sequence. The fmv is not required for Zfinx.
2198 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2199 const int Cost =
2200 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2201 Subtarget.getXLen(), Subtarget);
2202 return Cost <= FPImmCost;
2203}
2204
2205// TODO: This is very conservative.
2207 unsigned Index) const {
2209 return false;
2210
2211 // Only support extracting a fixed from a fixed vector for now.
2212 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2213 return false;
2214
2215 EVT EltVT = ResVT.getVectorElementType();
2216 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2217
2218 // The smallest type we can slide is i8.
2219 // TODO: We can extract index 0 from a mask vector without a slide.
2220 if (EltVT == MVT::i1)
2221 return false;
2222
2223 unsigned ResElts = ResVT.getVectorNumElements();
2224 unsigned SrcElts = SrcVT.getVectorNumElements();
2225
2226 unsigned MinVLen = Subtarget.getRealMinVLen();
2227 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2228
2229 // If we're extracting only data from the first VLEN bits of the source
2230 // then we can always do this with an m1 vslidedown.vx. Restricting the
2231 // Index ensures we can use a vslidedown.vi.
2232 // TODO: We can generalize this when the exact VLEN is known.
2233 if (Index + ResElts <= MinVLMAX && Index < 31)
2234 return true;
2235
2236 // Convervatively only handle extracting half of a vector.
2237 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2238 // the upper half of a vector until we have more test coverage.
2239 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2240 // a cheap extract. However, this case is important in practice for
2241 // shuffled extracts of longer vectors. How resolve?
2242 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2243}
2244
2247 EVT VT) const {
2248 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2249 // We might still end up using a GPR but that will be decided based on ABI.
2250 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2251 !Subtarget.hasStdExtZfhminOrZhinxmin())
2252 return MVT::f32;
2253
2255
2256 return PartVT;
2257}
2258
2259unsigned
2261 std::optional<MVT> RegisterVT) const {
2262 // Pair inline assembly operand
2263 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2264 *RegisterVT == MVT::Untyped)
2265 return 1;
2266
2267 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2268}
2269
2272 EVT VT) const {
2273 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2274 // We might still end up using a GPR but that will be decided based on ABI.
2275 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2276 !Subtarget.hasStdExtZfhminOrZhinxmin())
2277 return 1;
2278
2280}
2281
2283 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2284 unsigned &NumIntermediates, MVT &RegisterVT) const {
2286 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2287
2288 return NumRegs;
2289}
2290
2291// Changes the condition code and swaps operands if necessary, so the SetCC
2292// operation matches one of the comparisons supported directly by branches
2293// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2294// with 1/-1.
2295static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2296 ISD::CondCode &CC, SelectionDAG &DAG) {
2297 // If this is a single bit test that can't be handled by ANDI, shift the
2298 // bit to be tested to the MSB and perform a signed compare with 0.
2299 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2300 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2301 isa<ConstantSDNode>(LHS.getOperand(1))) {
2302 uint64_t Mask = LHS.getConstantOperandVal(1);
2303 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2304 unsigned ShAmt = 0;
2305 if (isPowerOf2_64(Mask)) {
2307 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2308 } else {
2309 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2310 }
2311
2312 LHS = LHS.getOperand(0);
2313 if (ShAmt != 0)
2314 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2315 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2316 return;
2317 }
2318 }
2319
2320 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2321 int64_t C = RHSC->getSExtValue();
2322 switch (CC) {
2323 default: break;
2324 case ISD::SETGT:
2325 // Convert X > -1 to X >= 0.
2326 if (C == -1) {
2327 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2328 CC = ISD::SETGE;
2329 return;
2330 }
2331 break;
2332 case ISD::SETLT:
2333 // Convert X < 1 to 0 >= X.
2334 if (C == 1) {
2335 RHS = LHS;
2336 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2337 CC = ISD::SETGE;
2338 return;
2339 }
2340 break;
2341 }
2342 }
2343
2344 switch (CC) {
2345 default:
2346 break;
2347 case ISD::SETGT:
2348 case ISD::SETLE:
2349 case ISD::SETUGT:
2350 case ISD::SETULE:
2352 std::swap(LHS, RHS);
2353 break;
2354 }
2355}
2356
2358 if (VT.isRISCVVectorTuple()) {
2359 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2361 return RISCVII::LMUL_F8;
2362 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2364 return RISCVII::LMUL_F4;
2365 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2367 return RISCVII::LMUL_F2;
2368 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2369 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2370 return RISCVII::LMUL_1;
2371 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2372 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2373 return RISCVII::LMUL_2;
2374 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2375 return RISCVII::LMUL_4;
2376 llvm_unreachable("Invalid vector tuple type LMUL.");
2377 }
2378
2379 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2380 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2381 if (VT.getVectorElementType() == MVT::i1)
2382 KnownSize *= 8;
2383
2384 switch (KnownSize) {
2385 default:
2386 llvm_unreachable("Invalid LMUL.");
2387 case 8:
2389 case 16:
2391 case 32:
2393 case 64:
2395 case 128:
2397 case 256:
2399 case 512:
2401 }
2402}
2403
2405 switch (LMul) {
2406 default:
2407 llvm_unreachable("Invalid LMUL.");
2412 return RISCV::VRRegClassID;
2414 return RISCV::VRM2RegClassID;
2416 return RISCV::VRM4RegClassID;
2418 return RISCV::VRM8RegClassID;
2419 }
2420}
2421
2422unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2423 RISCVII::VLMUL LMUL = getLMUL(VT);
2424 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2425 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2426 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2427 LMUL == RISCVII::VLMUL::LMUL_1) {
2428 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2429 "Unexpected subreg numbering");
2430 return RISCV::sub_vrm1_0 + Index;
2431 }
2432 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2433 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2434 "Unexpected subreg numbering");
2435 return RISCV::sub_vrm2_0 + Index;
2436 }
2437 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2438 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2439 "Unexpected subreg numbering");
2440 return RISCV::sub_vrm4_0 + Index;
2441 }
2442 llvm_unreachable("Invalid vector type.");
2443}
2444
2446 if (VT.isRISCVVectorTuple()) {
2447 unsigned NF = VT.getRISCVVectorTupleNumFields();
2448 unsigned RegsPerField =
2449 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2450 (NF * RISCV::RVVBitsPerBlock));
2451 switch (RegsPerField) {
2452 case 1:
2453 if (NF == 2)
2454 return RISCV::VRN2M1RegClassID;
2455 if (NF == 3)
2456 return RISCV::VRN3M1RegClassID;
2457 if (NF == 4)
2458 return RISCV::VRN4M1RegClassID;
2459 if (NF == 5)
2460 return RISCV::VRN5M1RegClassID;
2461 if (NF == 6)
2462 return RISCV::VRN6M1RegClassID;
2463 if (NF == 7)
2464 return RISCV::VRN7M1RegClassID;
2465 if (NF == 8)
2466 return RISCV::VRN8M1RegClassID;
2467 break;
2468 case 2:
2469 if (NF == 2)
2470 return RISCV::VRN2M2RegClassID;
2471 if (NF == 3)
2472 return RISCV::VRN3M2RegClassID;
2473 if (NF == 4)
2474 return RISCV::VRN4M2RegClassID;
2475 break;
2476 case 4:
2477 assert(NF == 2);
2478 return RISCV::VRN2M4RegClassID;
2479 default:
2480 break;
2481 }
2482 llvm_unreachable("Invalid vector tuple type RegClass.");
2483 }
2484
2485 if (VT.getVectorElementType() == MVT::i1)
2486 return RISCV::VRRegClassID;
2487 return getRegClassIDForLMUL(getLMUL(VT));
2488}
2489
2490// Attempt to decompose a subvector insert/extract between VecVT and
2491// SubVecVT via subregister indices. Returns the subregister index that
2492// can perform the subvector insert/extract with the given element index, as
2493// well as the index corresponding to any leftover subvectors that must be
2494// further inserted/extracted within the register class for SubVecVT.
2495std::pair<unsigned, unsigned>
2497 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2498 const RISCVRegisterInfo *TRI) {
2499 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2500 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2501 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2502 "Register classes not ordered");
2503 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2504 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2505
2506 // If VecVT is a vector tuple type, either it's the tuple type with same
2507 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2508 if (VecVT.isRISCVVectorTuple()) {
2509 if (VecRegClassID == SubRegClassID)
2510 return {RISCV::NoSubRegister, 0};
2511
2512 assert(SubVecVT.isScalableVector() &&
2513 "Only allow scalable vector subvector.");
2514 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2515 "Invalid vector tuple insert/extract for vector and subvector with "
2516 "different LMUL.");
2517 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2518 }
2519
2520 // Try to compose a subregister index that takes us from the incoming
2521 // LMUL>1 register class down to the outgoing one. At each step we half
2522 // the LMUL:
2523 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2524 // Note that this is not guaranteed to find a subregister index, such as
2525 // when we are extracting from one VR type to another.
2526 unsigned SubRegIdx = RISCV::NoSubRegister;
2527 for (const unsigned RCID :
2528 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2529 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2530 VecVT = VecVT.getHalfNumVectorElementsVT();
2531 bool IsHi =
2532 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2533 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2534 getSubregIndexByMVT(VecVT, IsHi));
2535 if (IsHi)
2536 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2537 }
2538 return {SubRegIdx, InsertExtractIdx};
2539}
2540
2541// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2542// stores for those types.
2543bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2544 return !Subtarget.useRVVForFixedLengthVectors() ||
2545 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2546}
2547
2549 if (!ScalarTy.isSimple())
2550 return false;
2551 switch (ScalarTy.getSimpleVT().SimpleTy) {
2552 case MVT::iPTR:
2553 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2554 case MVT::i8:
2555 case MVT::i16:
2556 case MVT::i32:
2557 return true;
2558 case MVT::i64:
2559 return Subtarget.hasVInstructionsI64();
2560 case MVT::f16:
2561 return Subtarget.hasVInstructionsF16Minimal();
2562 case MVT::bf16:
2563 return Subtarget.hasVInstructionsBF16Minimal();
2564 case MVT::f32:
2565 return Subtarget.hasVInstructionsF32();
2566 case MVT::f64:
2567 return Subtarget.hasVInstructionsF64();
2568 default:
2569 return false;
2570 }
2571}
2572
2573
2574unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2575 return NumRepeatedDivisors;
2576}
2577
2579 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2580 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2581 "Unexpected opcode");
2582 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2583 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2585 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2586 if (!II)
2587 return SDValue();
2588 return Op.getOperand(II->VLOperand + 1 + HasChain);
2589}
2590
2592 const RISCVSubtarget &Subtarget) {
2593 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2594 if (!Subtarget.useRVVForFixedLengthVectors())
2595 return false;
2596
2597 // We only support a set of vector types with a consistent maximum fixed size
2598 // across all supported vector element types to avoid legalization issues.
2599 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2600 // fixed-length vector type we support is 1024 bytes.
2601 if (VT.getFixedSizeInBits() > 1024 * 8)
2602 return false;
2603
2604 unsigned MinVLen = Subtarget.getRealMinVLen();
2605
2606 MVT EltVT = VT.getVectorElementType();
2607
2608 // Don't use RVV for vectors we cannot scalarize if required.
2609 switch (EltVT.SimpleTy) {
2610 // i1 is supported but has different rules.
2611 default:
2612 return false;
2613 case MVT::i1:
2614 // Masks can only use a single register.
2615 if (VT.getVectorNumElements() > MinVLen)
2616 return false;
2617 MinVLen /= 8;
2618 break;
2619 case MVT::i8:
2620 case MVT::i16:
2621 case MVT::i32:
2622 break;
2623 case MVT::i64:
2624 if (!Subtarget.hasVInstructionsI64())
2625 return false;
2626 break;
2627 case MVT::f16:
2628 if (!Subtarget.hasVInstructionsF16Minimal())
2629 return false;
2630 break;
2631 case MVT::bf16:
2632 if (!Subtarget.hasVInstructionsBF16Minimal())
2633 return false;
2634 break;
2635 case MVT::f32:
2636 if (!Subtarget.hasVInstructionsF32())
2637 return false;
2638 break;
2639 case MVT::f64:
2640 if (!Subtarget.hasVInstructionsF64())
2641 return false;
2642 break;
2643 }
2644
2645 // Reject elements larger than ELEN.
2646 if (EltVT.getSizeInBits() > Subtarget.getELen())
2647 return false;
2648
2649 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2650 // Don't use RVV for types that don't fit.
2651 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2652 return false;
2653
2654 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2655 // the base fixed length RVV support in place.
2656 if (!VT.isPow2VectorType())
2657 return false;
2658
2659 return true;
2660}
2661
2662bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2663 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2664}
2665
2666// Return the largest legal scalable vector type that matches VT's element type.
2668 const RISCVSubtarget &Subtarget) {
2669 // This may be called before legal types are setup.
2670 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2671 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2672 "Expected legal fixed length vector!");
2673
2674 unsigned MinVLen = Subtarget.getRealMinVLen();
2675 unsigned MaxELen = Subtarget.getELen();
2676
2677 MVT EltVT = VT.getVectorElementType();
2678 switch (EltVT.SimpleTy) {
2679 default:
2680 llvm_unreachable("unexpected element type for RVV container");
2681 case MVT::i1:
2682 case MVT::i8:
2683 case MVT::i16:
2684 case MVT::i32:
2685 case MVT::i64:
2686 case MVT::bf16:
2687 case MVT::f16:
2688 case MVT::f32:
2689 case MVT::f64: {
2690 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2691 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2692 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2693 unsigned NumElts =
2695 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2696 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2697 return MVT::getScalableVectorVT(EltVT, NumElts);
2698 }
2699 }
2700}
2701
2703 const RISCVSubtarget &Subtarget) {
2705 Subtarget);
2706}
2707
2709 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2710}
2711
2712// Grow V to consume an entire RVV register.
2714 const RISCVSubtarget &Subtarget) {
2715 assert(VT.isScalableVector() &&
2716 "Expected to convert into a scalable vector!");
2717 assert(V.getValueType().isFixedLengthVector() &&
2718 "Expected a fixed length vector operand!");
2719 SDLoc DL(V);
2720 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2721 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2722}
2723
2724// Shrink V so it's just big enough to maintain a VT's worth of data.
2726 const RISCVSubtarget &Subtarget) {
2728 "Expected to convert into a fixed length vector!");
2729 assert(V.getValueType().isScalableVector() &&
2730 "Expected a scalable vector operand!");
2731 SDLoc DL(V);
2732 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2733 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2734}
2735
2736/// Return the type of the mask type suitable for masking the provided
2737/// vector type. This is simply an i1 element type vector of the same
2738/// (possibly scalable) length.
2739static MVT getMaskTypeFor(MVT VecVT) {
2740 assert(VecVT.isVector());
2742 return MVT::getVectorVT(MVT::i1, EC);
2743}
2744
2745/// Creates an all ones mask suitable for masking a vector of type VecTy with
2746/// vector length VL. .
2747static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2748 SelectionDAG &DAG) {
2749 MVT MaskVT = getMaskTypeFor(VecVT);
2750 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2751}
2752
2753static std::pair<SDValue, SDValue>
2755 const RISCVSubtarget &Subtarget) {
2756 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2757 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2758 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2759 return {Mask, VL};
2760}
2761
2762static std::pair<SDValue, SDValue>
2763getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2764 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2765 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2766 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2767 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2768 return {Mask, VL};
2769}
2770
2771// Gets the two common "VL" operands: an all-ones mask and the vector length.
2772// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2773// the vector type that the fixed-length vector is contained in. Otherwise if
2774// VecVT is scalable, then ContainerVT should be the same as VecVT.
2775static std::pair<SDValue, SDValue>
2776getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2777 const RISCVSubtarget &Subtarget) {
2778 if (VecVT.isFixedLengthVector())
2779 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2780 Subtarget);
2781 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2782 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2783}
2784
2786 SelectionDAG &DAG) const {
2787 assert(VecVT.isScalableVector() && "Expected scalable vector");
2788 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2789 VecVT.getVectorElementCount());
2790}
2791
2792std::pair<unsigned, unsigned>
2794 const RISCVSubtarget &Subtarget) {
2795 assert(VecVT.isScalableVector() && "Expected scalable vector");
2796
2797 unsigned EltSize = VecVT.getScalarSizeInBits();
2798 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2799
2800 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2801 unsigned MaxVLMAX =
2802 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2803
2804 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2805 unsigned MinVLMAX =
2806 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2807
2808 return std::make_pair(MinVLMAX, MaxVLMAX);
2809}
2810
2811// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2812// of either is (currently) supported. This can get us into an infinite loop
2813// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2814// as a ..., etc.
2815// Until either (or both) of these can reliably lower any node, reporting that
2816// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2817// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2818// which is not desirable.
2820 EVT VT, unsigned DefinedValues) const {
2821 return false;
2822}
2823
2825 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2826 // implementation-defined.
2827 if (!VT.isVector())
2829 unsigned DLenFactor = Subtarget.getDLenFactor();
2830 unsigned Cost;
2831 if (VT.isScalableVector()) {
2832 unsigned LMul;
2833 bool Fractional;
2834 std::tie(LMul, Fractional) =
2836 if (Fractional)
2837 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2838 else
2839 Cost = (LMul * DLenFactor);
2840 } else {
2841 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2842 }
2843 return Cost;
2844}
2845
2846
2847/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2848/// is generally quadratic in the number of vreg implied by LMUL. Note that
2849/// operand (index and possibly mask) are handled separately.
2851 return getLMULCost(VT) * getLMULCost(VT);
2852}
2853
2854/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2855/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2856/// or may track the vrgather.vv cost. It is implementation-dependent.
2858 return getLMULCost(VT);
2859}
2860
2861/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2862/// for the type VT. (This does not cover the vslide1up or vslide1down
2863/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2864/// or may track the vrgather.vv cost. It is implementation-dependent.
2866 return getLMULCost(VT);
2867}
2868
2869/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2870/// for the type VT. (This does not cover the vslide1up or vslide1down
2871/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2872/// or may track the vrgather.vv cost. It is implementation-dependent.
2874 return getLMULCost(VT);
2875}
2876
2878 const RISCVSubtarget &Subtarget) {
2879 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2880 // bf16 conversions are always promoted to f32.
2881 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2882 Op.getValueType() == MVT::bf16) {
2883 bool IsStrict = Op->isStrictFPOpcode();
2884
2885 SDLoc DL(Op);
2886 if (IsStrict) {
2887 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2888 {Op.getOperand(0), Op.getOperand(1)});
2889 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2890 {Op.getValueType(), MVT::Other},
2891 {Val.getValue(1), Val.getValue(0),
2892 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2893 }
2894 return DAG.getNode(
2895 ISD::FP_ROUND, DL, Op.getValueType(),
2896 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2897 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2898 }
2899
2900 // Other operations are legal.
2901 return Op;
2902}
2903
2905 const RISCVSubtarget &Subtarget) {
2906 // RISC-V FP-to-int conversions saturate to the destination register size, but
2907 // don't produce 0 for nan. We can use a conversion instruction and fix the
2908 // nan case with a compare and a select.
2909 SDValue Src = Op.getOperand(0);
2910
2911 MVT DstVT = Op.getSimpleValueType();
2912 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2913
2914 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2915
2916 if (!DstVT.isVector()) {
2917 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2918 // the result.
2919 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2920 Src.getValueType() == MVT::bf16) {
2921 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2922 }
2923
2924 unsigned Opc;
2925 if (SatVT == DstVT)
2926 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2927 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2929 else
2930 return SDValue();
2931 // FIXME: Support other SatVTs by clamping before or after the conversion.
2932
2933 SDLoc DL(Op);
2934 SDValue FpToInt = DAG.getNode(
2935 Opc, DL, DstVT, Src,
2937
2938 if (Opc == RISCVISD::FCVT_WU_RV64)
2939 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2940
2941 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2942 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2944 }
2945
2946 // Vectors.
2947
2948 MVT DstEltVT = DstVT.getVectorElementType();
2949 MVT SrcVT = Src.getSimpleValueType();
2950 MVT SrcEltVT = SrcVT.getVectorElementType();
2951 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2952 unsigned DstEltSize = DstEltVT.getSizeInBits();
2953
2954 // Only handle saturating to the destination type.
2955 if (SatVT != DstEltVT)
2956 return SDValue();
2957
2958 MVT DstContainerVT = DstVT;
2959 MVT SrcContainerVT = SrcVT;
2960 if (DstVT.isFixedLengthVector()) {
2961 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2962 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2963 assert(DstContainerVT.getVectorElementCount() ==
2964 SrcContainerVT.getVectorElementCount() &&
2965 "Expected same element count");
2966 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2967 }
2968
2969 SDLoc DL(Op);
2970
2971 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2972
2973 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2974 {Src, Src, DAG.getCondCode(ISD::SETNE),
2975 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2976
2977 // Need to widen by more than 1 step, promote the FP type, then do a widening
2978 // convert.
2979 if (DstEltSize > (2 * SrcEltSize)) {
2980 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2981 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2982 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2983 }
2984
2985 MVT CvtContainerVT = DstContainerVT;
2986 MVT CvtEltVT = DstEltVT;
2987 if (SrcEltSize > (2 * DstEltSize)) {
2988 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2989 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2990 }
2991
2992 unsigned RVVOpc =
2994 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2995
2996 while (CvtContainerVT != DstContainerVT) {
2997 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2998 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2999 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3000 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3002 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3003 }
3004
3005 SDValue SplatZero = DAG.getNode(
3006 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3007 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3008 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3009 Res, DAG.getUNDEF(DstContainerVT), VL);
3010
3011 if (DstVT.isFixedLengthVector())
3012 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3013
3014 return Res;
3015}
3016
3018 const RISCVSubtarget &Subtarget) {
3019 bool IsStrict = Op->isStrictFPOpcode();
3020 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3021
3022 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3023 // bf16 conversions are always promoted to f32.
3024 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3025 SrcVal.getValueType() == MVT::bf16) {
3026 SDLoc DL(Op);
3027 if (IsStrict) {
3028 SDValue Ext =
3029 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3030 {Op.getOperand(0), SrcVal});
3031 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3032 {Ext.getValue(1), Ext.getValue(0)});
3033 }
3034 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3035 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3036 }
3037
3038 // Other operations are legal.
3039 return Op;
3040}
3041
3043 switch (Opc) {
3044 case ISD::FROUNDEVEN:
3046 case ISD::VP_FROUNDEVEN:
3047 return RISCVFPRndMode::RNE;
3048 case ISD::FTRUNC:
3049 case ISD::STRICT_FTRUNC:
3050 case ISD::VP_FROUNDTOZERO:
3051 return RISCVFPRndMode::RTZ;
3052 case ISD::FFLOOR:
3053 case ISD::STRICT_FFLOOR:
3054 case ISD::VP_FFLOOR:
3055 return RISCVFPRndMode::RDN;
3056 case ISD::FCEIL:
3057 case ISD::STRICT_FCEIL:
3058 case ISD::VP_FCEIL:
3059 return RISCVFPRndMode::RUP;
3060 case ISD::FROUND:
3061 case ISD::STRICT_FROUND:
3062 case ISD::VP_FROUND:
3063 return RISCVFPRndMode::RMM;
3064 case ISD::FRINT:
3065 case ISD::VP_FRINT:
3066 return RISCVFPRndMode::DYN;
3067 }
3068
3070}
3071
3072// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3073// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3074// the integer domain and back. Taking care to avoid converting values that are
3075// nan or already correct.
3076static SDValue
3078 const RISCVSubtarget &Subtarget) {
3079 MVT VT = Op.getSimpleValueType();
3080 assert(VT.isVector() && "Unexpected type");
3081
3082 SDLoc DL(Op);
3083
3084 SDValue Src = Op.getOperand(0);
3085
3086 MVT ContainerVT = VT;
3087 if (VT.isFixedLengthVector()) {
3088 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3089 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3090 }
3091
3092 SDValue Mask, VL;
3093 if (Op->isVPOpcode()) {
3094 Mask = Op.getOperand(1);
3095 if (VT.isFixedLengthVector())
3096 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3097 Subtarget);
3098 VL = Op.getOperand(2);
3099 } else {
3100 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3101 }
3102
3103 // Freeze the source since we are increasing the number of uses.
3104 Src = DAG.getFreeze(Src);
3105
3106 // We do the conversion on the absolute value and fix the sign at the end.
3107 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3108
3109 // Determine the largest integer that can be represented exactly. This and
3110 // values larger than it don't have any fractional bits so don't need to
3111 // be converted.
3112 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3113 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3114 APFloat MaxVal = APFloat(FltSem);
3115 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3116 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3117 SDValue MaxValNode =
3118 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3119 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3120 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3121
3122 // If abs(Src) was larger than MaxVal or nan, keep it.
3123 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3124 Mask =
3125 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3126 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3127 Mask, Mask, VL});
3128
3129 // Truncate to integer and convert back to FP.
3130 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3131 MVT XLenVT = Subtarget.getXLenVT();
3132 SDValue Truncated;
3133
3134 switch (Op.getOpcode()) {
3135 default:
3136 llvm_unreachable("Unexpected opcode");
3137 case ISD::FRINT:
3138 case ISD::VP_FRINT:
3139 case ISD::FCEIL:
3140 case ISD::VP_FCEIL:
3141 case ISD::FFLOOR:
3142 case ISD::VP_FFLOOR:
3143 case ISD::FROUND:
3144 case ISD::FROUNDEVEN:
3145 case ISD::VP_FROUND:
3146 case ISD::VP_FROUNDEVEN:
3147 case ISD::VP_FROUNDTOZERO: {
3150 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3151 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3152 break;
3153 }
3154 case ISD::FTRUNC:
3155 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3156 Mask, VL);
3157 break;
3158 case ISD::FNEARBYINT:
3159 case ISD::VP_FNEARBYINT:
3160 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3161 Mask, VL);
3162 break;
3163 }
3164
3165 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3166 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3167 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3168 Mask, VL);
3169
3170 // Restore the original sign so that -0.0 is preserved.
3171 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3172 Src, Src, Mask, VL);
3173
3174 if (!VT.isFixedLengthVector())
3175 return Truncated;
3176
3177 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3178}
3179
3180// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3181// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3182// qNan and coverting the new source to integer and back to FP.
3183static SDValue
3185 const RISCVSubtarget &Subtarget) {
3186 SDLoc DL(Op);
3187 MVT VT = Op.getSimpleValueType();
3188 SDValue Chain = Op.getOperand(0);
3189 SDValue Src = Op.getOperand(1);
3190
3191 MVT ContainerVT = VT;
3192 if (VT.isFixedLengthVector()) {
3193 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3194 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3195 }
3196
3197 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3198
3199 // Freeze the source since we are increasing the number of uses.
3200 Src = DAG.getFreeze(Src);
3201
3202 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3203 MVT MaskVT = Mask.getSimpleValueType();
3205 DAG.getVTList(MaskVT, MVT::Other),
3206 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3207 DAG.getUNDEF(MaskVT), Mask, VL});
3208 Chain = Unorder.getValue(1);
3210 DAG.getVTList(ContainerVT, MVT::Other),
3211 {Chain, Src, Src, Src, Unorder, VL});
3212 Chain = Src.getValue(1);
3213
3214 // We do the conversion on the absolute value and fix the sign at the end.
3215 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3216
3217 // Determine the largest integer that can be represented exactly. This and
3218 // values larger than it don't have any fractional bits so don't need to
3219 // be converted.
3220 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3221 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3222 APFloat MaxVal = APFloat(FltSem);
3223 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3224 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3225 SDValue MaxValNode =
3226 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3227 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3228 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3229
3230 // If abs(Src) was larger than MaxVal or nan, keep it.
3231 Mask = DAG.getNode(
3232 RISCVISD::SETCC_VL, DL, MaskVT,
3233 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3234
3235 // Truncate to integer and convert back to FP.
3236 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3237 MVT XLenVT = Subtarget.getXLenVT();
3238 SDValue Truncated;
3239
3240 switch (Op.getOpcode()) {
3241 default:
3242 llvm_unreachable("Unexpected opcode");
3243 case ISD::STRICT_FCEIL:
3244 case ISD::STRICT_FFLOOR:
3245 case ISD::STRICT_FROUND:
3249 Truncated = DAG.getNode(
3250 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3251 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3252 break;
3253 }
3254 case ISD::STRICT_FTRUNC:
3255 Truncated =
3257 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3258 break;
3261 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3262 Mask, VL);
3263 break;
3264 }
3265 Chain = Truncated.getValue(1);
3266
3267 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3268 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3269 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3270 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3271 Truncated, Mask, VL);
3272 Chain = Truncated.getValue(1);
3273 }
3274
3275 // Restore the original sign so that -0.0 is preserved.
3276 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3277 Src, Src, Mask, VL);
3278
3279 if (VT.isFixedLengthVector())
3280 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3281 return DAG.getMergeValues({Truncated, Chain}, DL);
3282}
3283
3284static SDValue
3286 const RISCVSubtarget &Subtarget) {
3287 MVT VT = Op.getSimpleValueType();
3288 if (VT.isVector())
3289 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3290
3291 if (DAG.shouldOptForSize())
3292 return SDValue();
3293
3294 SDLoc DL(Op);
3295 SDValue Src = Op.getOperand(0);
3296
3297 // Create an integer the size of the mantissa with the MSB set. This and all
3298 // values larger than it don't have any fractional bits so don't need to be
3299 // converted.
3300 const fltSemantics &FltSem = VT.getFltSemantics();
3301 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3302 APFloat MaxVal = APFloat(FltSem);
3303 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3304 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3305 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3306
3308 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3309 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3310}
3311
3312// Expand vector LRINT and LLRINT by converting to the integer domain.
3314 const RISCVSubtarget &Subtarget) {
3315 MVT VT = Op.getSimpleValueType();
3316 assert(VT.isVector() && "Unexpected type");
3317
3318 SDLoc DL(Op);
3319 SDValue Src = Op.getOperand(0);
3320 MVT ContainerVT = VT;
3321
3322 if (VT.isFixedLengthVector()) {
3323 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3324 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3325 }
3326
3327 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3328 SDValue Truncated = DAG.getNode(
3329 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3331 VL);
3332
3333 if (!VT.isFixedLengthVector())
3334 return Truncated;
3335
3336 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3337}
3338
3339static SDValue
3341 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3342 SDValue Offset, SDValue Mask, SDValue VL,
3344 if (Passthru.isUndef())
3346 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3347 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3348 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3349}
3350
3351static SDValue
3352getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3353 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3354 SDValue VL,
3356 if (Passthru.isUndef())
3358 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3359 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3360 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3361}
3362
3363static MVT getLMUL1VT(MVT VT) {
3365 "Unexpected vector MVT");
3369}
3370
3374 int64_t Addend;
3375};
3376
3377static std::optional<APInt> getExactInteger(const APFloat &APF,
3379 // We will use a SINT_TO_FP to materialize this constant so we should use a
3380 // signed APSInt here.
3381 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3382 // We use an arbitrary rounding mode here. If a floating-point is an exact
3383 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3384 // the rounding mode changes the output value, then it is not an exact
3385 // integer.
3387 bool IsExact;
3388 // If it is out of signed integer range, it will return an invalid operation.
3389 // If it is not an exact integer, IsExact is false.
3390 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3392 !IsExact)
3393 return std::nullopt;
3394 return ValInt.extractBits(BitWidth, 0);
3395}
3396
3397// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3398// to the (non-zero) step S and start value X. This can be then lowered as the
3399// RVV sequence (VID * S) + X, for example.
3400// The step S is represented as an integer numerator divided by a positive
3401// denominator. Note that the implementation currently only identifies
3402// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3403// cannot detect 2/3, for example.
3404// Note that this method will also match potentially unappealing index
3405// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3406// determine whether this is worth generating code for.
3407//
3408// EltSizeInBits is the size of the type that the sequence will be calculated
3409// in, i.e. SEW for build_vectors or XLEN for address calculations.
3410static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3411 unsigned EltSizeInBits) {
3412 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3413 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3414 return std::nullopt;
3415 bool IsInteger = Op.getValueType().isInteger();
3416
3417 std::optional<unsigned> SeqStepDenom;
3418 std::optional<APInt> SeqStepNum;
3419 std::optional<APInt> SeqAddend;
3420 std::optional<std::pair<APInt, unsigned>> PrevElt;
3421 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3422
3423 // First extract the ops into a list of constant integer values. This may not
3424 // be possible for floats if they're not all representable as integers.
3426 const unsigned OpSize = Op.getScalarValueSizeInBits();
3427 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3428 if (Elt.isUndef()) {
3429 Elts[Idx] = std::nullopt;
3430 continue;
3431 }
3432 if (IsInteger) {
3433 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3434 } else {
3435 auto ExactInteger =
3436 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3437 if (!ExactInteger)
3438 return std::nullopt;
3439 Elts[Idx] = *ExactInteger;
3440 }
3441 }
3442
3443 for (auto [Idx, Elt] : enumerate(Elts)) {
3444 // Assume undef elements match the sequence; we just have to be careful
3445 // when interpolating across them.
3446 if (!Elt)
3447 continue;
3448
3449 if (PrevElt) {
3450 // Calculate the step since the last non-undef element, and ensure
3451 // it's consistent across the entire sequence.
3452 unsigned IdxDiff = Idx - PrevElt->second;
3453 APInt ValDiff = *Elt - PrevElt->first;
3454
3455 // A zero-value value difference means that we're somewhere in the middle
3456 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3457 // step change before evaluating the sequence.
3458 if (ValDiff == 0)
3459 continue;
3460
3461 int64_t Remainder = ValDiff.srem(IdxDiff);
3462 // Normalize the step if it's greater than 1.
3463 if (Remainder != ValDiff.getSExtValue()) {
3464 // The difference must cleanly divide the element span.
3465 if (Remainder != 0)
3466 return std::nullopt;
3467 ValDiff = ValDiff.sdiv(IdxDiff);
3468 IdxDiff = 1;
3469 }
3470
3471 if (!SeqStepNum)
3472 SeqStepNum = ValDiff;
3473 else if (ValDiff != SeqStepNum)
3474 return std::nullopt;
3475
3476 if (!SeqStepDenom)
3477 SeqStepDenom = IdxDiff;
3478 else if (IdxDiff != *SeqStepDenom)
3479 return std::nullopt;
3480 }
3481
3482 // Record this non-undef element for later.
3483 if (!PrevElt || PrevElt->first != *Elt)
3484 PrevElt = std::make_pair(*Elt, Idx);
3485 }
3486
3487 // We need to have logged a step for this to count as a legal index sequence.
3488 if (!SeqStepNum || !SeqStepDenom)
3489 return std::nullopt;
3490
3491 // Loop back through the sequence and validate elements we might have skipped
3492 // while waiting for a valid step. While doing this, log any sequence addend.
3493 for (auto [Idx, Elt] : enumerate(Elts)) {
3494 if (!Elt)
3495 continue;
3496 APInt ExpectedVal =
3497 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3498 *SeqStepNum)
3499 .sdiv(*SeqStepDenom);
3500
3501 APInt Addend = *Elt - ExpectedVal;
3502 if (!SeqAddend)
3503 SeqAddend = Addend;
3504 else if (Addend != SeqAddend)
3505 return std::nullopt;
3506 }
3507
3508 assert(SeqAddend && "Must have an addend if we have a step");
3509
3510 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3511 SeqAddend->getSExtValue()};
3512}
3513
3514// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3515// and lower it as a VRGATHER_VX_VL from the source vector.
3516static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3517 SelectionDAG &DAG,
3518 const RISCVSubtarget &Subtarget) {
3519 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3520 return SDValue();
3521 SDValue Src = SplatVal.getOperand(0);
3522 // Don't perform this optimization for i1 vectors, or if the element types are
3523 // different
3524 // FIXME: Support i1 vectors, maybe by promoting to i8?
3525 MVT EltTy = VT.getVectorElementType();
3526 MVT SrcVT = Src.getSimpleValueType();
3527 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3528 return SDValue();
3529 SDValue Idx = SplatVal.getOperand(1);
3530 // The index must be a legal type.
3531 if (Idx.getValueType() != Subtarget.getXLenVT())
3532 return SDValue();
3533
3534 // Check that we know Idx lies within VT
3535 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3536 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3537 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3538 return SDValue();
3539 }
3540
3541 // Convert fixed length vectors to scalable
3542 MVT ContainerVT = VT;
3543 if (VT.isFixedLengthVector())
3544 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3545
3546 MVT SrcContainerVT = SrcVT;
3547 if (SrcVT.isFixedLengthVector()) {
3548 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3549 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3550 }
3551
3552 // Put Vec in a VT sized vector
3553 if (SrcContainerVT.getVectorMinNumElements() <
3554 ContainerVT.getVectorMinNumElements())
3555 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3556 DAG.getUNDEF(ContainerVT), Src,
3557 DAG.getVectorIdxConstant(0, DL));
3558 else
3559 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3560 DAG.getVectorIdxConstant(0, DL));
3561
3562 // We checked that Idx fits inside VT earlier
3563 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3564 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3565 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3566 if (VT.isFixedLengthVector())
3567 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3568 return Gather;
3569}
3570
3571/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3572/// which constitute a large proportion of the elements. In such cases we can
3573/// splat a vector with the dominant element and make up the shortfall with
3574/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3575/// Note that this includes vectors of 2 elements by association. The
3576/// upper-most element is the "dominant" one, allowing us to use a splat to
3577/// "insert" the upper element, and an insert of the lower element at position
3578/// 0, which improves codegen.
3580 const RISCVSubtarget &Subtarget) {
3581 MVT VT = Op.getSimpleValueType();
3582 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3583
3584 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3585
3586 SDLoc DL(Op);
3587 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3588
3589 MVT XLenVT = Subtarget.getXLenVT();
3590 unsigned NumElts = Op.getNumOperands();
3591
3592 SDValue DominantValue;
3593 unsigned MostCommonCount = 0;
3594 DenseMap<SDValue, unsigned> ValueCounts;
3595 unsigned NumUndefElts =
3596 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3597
3598 // Track the number of scalar loads we know we'd be inserting, estimated as
3599 // any non-zero floating-point constant. Other kinds of element are either
3600 // already in registers or are materialized on demand. The threshold at which
3601 // a vector load is more desirable than several scalar materializion and
3602 // vector-insertion instructions is not known.
3603 unsigned NumScalarLoads = 0;
3604
3605 for (SDValue V : Op->op_values()) {
3606 if (V.isUndef())
3607 continue;
3608
3609 unsigned &Count = ValueCounts[V];
3610 if (0 == Count)
3611 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3612 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3613
3614 // Is this value dominant? In case of a tie, prefer the highest element as
3615 // it's cheaper to insert near the beginning of a vector than it is at the
3616 // end.
3617 if (++Count >= MostCommonCount) {
3618 DominantValue = V;
3619 MostCommonCount = Count;
3620 }
3621 }
3622
3623 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3624 unsigned NumDefElts = NumElts - NumUndefElts;
3625 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3626
3627 // Don't perform this optimization when optimizing for size, since
3628 // materializing elements and inserting them tends to cause code bloat.
3629 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3630 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3631 ((MostCommonCount > DominantValueCountThreshold) ||
3632 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3633 // Start by splatting the most common element.
3634 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3635
3636 DenseSet<SDValue> Processed{DominantValue};
3637
3638 // We can handle an insert into the last element (of a splat) via
3639 // v(f)slide1down. This is slightly better than the vslideup insert
3640 // lowering as it avoids the need for a vector group temporary. It
3641 // is also better than using vmerge.vx as it avoids the need to
3642 // materialize the mask in a vector register.
3643 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3644 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3645 LastOp != DominantValue) {
3646 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3647 auto OpCode =
3649 if (!VT.isFloatingPoint())
3650 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3651 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3652 LastOp, Mask, VL);
3653 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3654 Processed.insert(LastOp);
3655 }
3656
3657 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3658 for (const auto &OpIdx : enumerate(Op->ops())) {
3659 const SDValue &V = OpIdx.value();
3660 if (V.isUndef() || !Processed.insert(V).second)
3661 continue;
3662 if (ValueCounts[V] == 1) {
3663 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3664 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3665 } else {
3666 // Blend in all instances of this value using a VSELECT, using a
3667 // mask where each bit signals whether that element is the one
3668 // we're after.
3670 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3671 return DAG.getConstant(V == V1, DL, XLenVT);
3672 });
3673 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3674 DAG.getBuildVector(SelMaskTy, DL, Ops),
3675 DAG.getSplatBuildVector(VT, DL, V), Vec);
3676 }
3677 }
3678
3679 return Vec;
3680 }
3681
3682 return SDValue();
3683}
3684
3686 const RISCVSubtarget &Subtarget) {
3687 MVT VT = Op.getSimpleValueType();
3688 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3689
3690 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3691
3692 SDLoc DL(Op);
3693 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3694
3695 MVT XLenVT = Subtarget.getXLenVT();
3696 unsigned NumElts = Op.getNumOperands();
3697
3698 if (VT.getVectorElementType() == MVT::i1) {
3699 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3700 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3701 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3702 }
3703
3704 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3705 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3706 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3707 }
3708
3709 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3710 // scalar integer chunks whose bit-width depends on the number of mask
3711 // bits and XLEN.
3712 // First, determine the most appropriate scalar integer type to use. This
3713 // is at most XLenVT, but may be shrunk to a smaller vector element type
3714 // according to the size of the final vector - use i8 chunks rather than
3715 // XLenVT if we're producing a v8i1. This results in more consistent
3716 // codegen across RV32 and RV64.
3717 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3718 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3719 // If we have to use more than one INSERT_VECTOR_ELT then this
3720 // optimization is likely to increase code size; avoid peforming it in
3721 // such a case. We can use a load from a constant pool in this case.
3722 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3723 return SDValue();
3724 // Now we can create our integer vector type. Note that it may be larger
3725 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3726 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3727 MVT IntegerViaVecVT =
3728 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3729 IntegerViaVecElts);
3730
3731 uint64_t Bits = 0;
3732 unsigned BitPos = 0, IntegerEltIdx = 0;
3733 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3734
3735 for (unsigned I = 0; I < NumElts;) {
3736 SDValue V = Op.getOperand(I);
3737 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3738 Bits |= ((uint64_t)BitValue << BitPos);
3739 ++BitPos;
3740 ++I;
3741
3742 // Once we accumulate enough bits to fill our scalar type or process the
3743 // last element, insert into our vector and clear our accumulated data.
3744 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3745 if (NumViaIntegerBits <= 32)
3746 Bits = SignExtend64<32>(Bits);
3747 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3748 Elts[IntegerEltIdx] = Elt;
3749 Bits = 0;
3750 BitPos = 0;
3751 IntegerEltIdx++;
3752 }
3753 }
3754
3755 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3756
3757 if (NumElts < NumViaIntegerBits) {
3758 // If we're producing a smaller vector than our minimum legal integer
3759 // type, bitcast to the equivalent (known-legal) mask type, and extract
3760 // our final mask.
3761 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3762 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3763 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3764 DAG.getConstant(0, DL, XLenVT));
3765 } else {
3766 // Else we must have produced an integer type with the same size as the
3767 // mask type; bitcast for the final result.
3768 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3769 Vec = DAG.getBitcast(VT, Vec);
3770 }
3771
3772 return Vec;
3773 }
3774
3775 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3776 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3778 if (!VT.isFloatingPoint())
3779 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3780 Splat =
3781 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3782 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3783 }
3784
3785 // Try and match index sequences, which we can lower to the vid instruction
3786 // with optional modifications. An all-undef vector is matched by
3787 // getSplatValue, above.
3788 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3789 int64_t StepNumerator = SimpleVID->StepNumerator;
3790 unsigned StepDenominator = SimpleVID->StepDenominator;
3791 int64_t Addend = SimpleVID->Addend;
3792
3793 assert(StepNumerator != 0 && "Invalid step");
3794 bool Negate = false;
3795 int64_t SplatStepVal = StepNumerator;
3796 unsigned StepOpcode = ISD::MUL;
3797 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3798 // anyway as the shift of 63 won't fit in uimm5.
3799 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3800 isPowerOf2_64(std::abs(StepNumerator))) {
3801 Negate = StepNumerator < 0;
3802 StepOpcode = ISD::SHL;
3803 SplatStepVal = Log2_64(std::abs(StepNumerator));
3804 }
3805
3806 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3807 // threshold since it's the immediate value many RVV instructions accept.
3808 // There is no vmul.vi instruction so ensure multiply constant can fit in
3809 // a single addi instruction.
3810 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3811 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3812 isPowerOf2_32(StepDenominator) &&
3813 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3814 MVT VIDVT =
3816 MVT VIDContainerVT =
3817 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3818 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3819 // Convert right out of the scalable type so we can use standard ISD
3820 // nodes for the rest of the computation. If we used scalable types with
3821 // these, we'd lose the fixed-length vector info and generate worse
3822 // vsetvli code.
3823 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3824 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3825 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3826 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3827 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3828 }
3829 if (StepDenominator != 1) {
3830 SDValue SplatStep =
3831 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3832 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3833 }
3834 if (Addend != 0 || Negate) {
3835 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3836 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3837 VID);
3838 }
3839 if (VT.isFloatingPoint()) {
3840 // TODO: Use vfwcvt to reduce register pressure.
3841 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3842 }
3843 return VID;
3844 }
3845 }
3846
3847 // For very small build_vectors, use a single scalar insert of a constant.
3848 // TODO: Base this on constant rematerialization cost, not size.
3849 const unsigned EltBitSize = VT.getScalarSizeInBits();
3850 if (VT.getSizeInBits() <= 32 &&
3852 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3853 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3854 "Unexpected sequence type");
3855 // If we can use the original VL with the modified element type, this
3856 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3857 // be moved into InsertVSETVLI?
3858 unsigned ViaVecLen =
3859 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3860 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3861
3862 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3863 uint64_t SplatValue = 0;
3864 // Construct the amalgamated value at this larger vector type.
3865 for (const auto &OpIdx : enumerate(Op->op_values())) {
3866 const auto &SeqV = OpIdx.value();
3867 if (!SeqV.isUndef())
3868 SplatValue |=
3869 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3870 }
3871
3872 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3873 // achieve better constant materializion.
3874 // On RV32, we need to sign-extend to use getSignedConstant.
3875 if (ViaIntVT == MVT::i32)
3876 SplatValue = SignExtend64<32>(SplatValue);
3877
3878 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3879 DAG.getUNDEF(ViaVecVT),
3880 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3881 DAG.getVectorIdxConstant(0, DL));
3882 if (ViaVecLen != 1)
3884 MVT::getVectorVT(ViaIntVT, 1), Vec,
3885 DAG.getConstant(0, DL, XLenVT));
3886 return DAG.getBitcast(VT, Vec);
3887 }
3888
3889
3890 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3891 // when re-interpreted as a vector with a larger element type. For example,
3892 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3893 // could be instead splat as
3894 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3895 // TODO: This optimization could also work on non-constant splats, but it
3896 // would require bit-manipulation instructions to construct the splat value.
3897 SmallVector<SDValue> Sequence;
3898 const auto *BV = cast<BuildVectorSDNode>(Op);
3899 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3901 BV->getRepeatedSequence(Sequence) &&
3902 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3903 unsigned SeqLen = Sequence.size();
3904 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3905 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3906 ViaIntVT == MVT::i64) &&
3907 "Unexpected sequence type");
3908
3909 // If we can use the original VL with the modified element type, this
3910 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3911 // be moved into InsertVSETVLI?
3912 const unsigned RequiredVL = NumElts / SeqLen;
3913 const unsigned ViaVecLen =
3914 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3915 NumElts : RequiredVL;
3916 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3917
3918 unsigned EltIdx = 0;
3919 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3920 uint64_t SplatValue = 0;
3921 // Construct the amalgamated value which can be splatted as this larger
3922 // vector type.
3923 for (const auto &SeqV : Sequence) {
3924 if (!SeqV.isUndef())
3925 SplatValue |=
3926 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3927 EltIdx++;
3928 }
3929
3930 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3931 // achieve better constant materializion.
3932 // On RV32, we need to sign-extend to use getSignedConstant.
3933 if (ViaIntVT == MVT::i32)
3934 SplatValue = SignExtend64<32>(SplatValue);
3935
3936 // Since we can't introduce illegal i64 types at this stage, we can only
3937 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3938 // way we can use RVV instructions to splat.
3939 assert((ViaIntVT.bitsLE(XLenVT) ||
3940 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3941 "Unexpected bitcast sequence");
3942 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3943 SDValue ViaVL =
3944 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3945 MVT ViaContainerVT =
3946 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3947 SDValue Splat =
3948 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3949 DAG.getUNDEF(ViaContainerVT),
3950 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3951 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3952 if (ViaVecLen != RequiredVL)
3954 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3955 DAG.getConstant(0, DL, XLenVT));
3956 return DAG.getBitcast(VT, Splat);
3957 }
3958 }
3959
3960 // If the number of signbits allows, see if we can lower as a <N x i8>.
3961 // Our main goal here is to reduce LMUL (and thus work) required to
3962 // build the constant, but we will also narrow if the resulting
3963 // narrow vector is known to materialize cheaply.
3964 // TODO: We really should be costing the smaller vector. There are
3965 // profitable cases this misses.
3966 if (EltBitSize > 8 && VT.isInteger() &&
3967 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3968 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3969 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3970 DL, Op->ops());
3971 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3972 Source, DAG, Subtarget);
3973 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3974 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3975 }
3976
3977 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3978 return Res;
3979
3980 // For constant vectors, use generic constant pool lowering. Otherwise,
3981 // we'd have to materialize constants in GPRs just to move them into the
3982 // vector.
3983 return SDValue();
3984}
3985
3986static unsigned getPACKOpcode(unsigned DestBW,
3987 const RISCVSubtarget &Subtarget) {
3988 switch (DestBW) {
3989 default:
3990 llvm_unreachable("Unsupported pack size");
3991 case 16:
3992 return RISCV::PACKH;
3993 case 32:
3994 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3995 case 64:
3996 assert(Subtarget.is64Bit());
3997 return RISCV::PACK;
3998 }
3999}
4000
4001/// Double the element size of the build vector to reduce the number
4002/// of vslide1down in the build vector chain. In the worst case, this
4003/// trades three scalar operations for 1 vector operation. Scalar
4004/// operations are generally lower latency, and for out-of-order cores
4005/// we also benefit from additional parallelism.
4007 const RISCVSubtarget &Subtarget) {
4008 SDLoc DL(Op);
4009 MVT VT = Op.getSimpleValueType();
4010 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4011 MVT ElemVT = VT.getVectorElementType();
4012 if (!ElemVT.isInteger())
4013 return SDValue();
4014
4015 // TODO: Relax these architectural restrictions, possibly with costing
4016 // of the actual instructions required.
4017 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4018 return SDValue();
4019
4020 unsigned NumElts = VT.getVectorNumElements();
4021 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4022 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4023 NumElts % 2 != 0)
4024 return SDValue();
4025
4026 // Produce [B,A] packed into a type twice as wide. Note that all
4027 // scalars are XLenVT, possibly masked (see below).
4028 MVT XLenVT = Subtarget.getXLenVT();
4029 SDValue Mask = DAG.getConstant(
4030 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4031 auto pack = [&](SDValue A, SDValue B) {
4032 // Bias the scheduling of the inserted operations to near the
4033 // definition of the element - this tends to reduce register
4034 // pressure overall.
4035 SDLoc ElemDL(B);
4036 if (Subtarget.hasStdExtZbkb())
4037 // Note that we're relying on the high bits of the result being
4038 // don't care. For PACKW, the result is *sign* extended.
4039 return SDValue(
4040 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4041 ElemDL, XLenVT, A, B),
4042 0);
4043
4044 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4045 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4046 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4047 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4048 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4050 };
4051
4052 SmallVector<SDValue> NewOperands;
4053 NewOperands.reserve(NumElts / 2);
4054 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4055 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4056 assert(NumElts == NewOperands.size() * 2);
4057 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4058 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4059 return DAG.getNode(ISD::BITCAST, DL, VT,
4060 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4061}
4062
4064 const RISCVSubtarget &Subtarget) {
4065 MVT VT = Op.getSimpleValueType();
4066 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4067
4068 MVT EltVT = VT.getVectorElementType();
4069 MVT XLenVT = Subtarget.getXLenVT();
4070
4071 SDLoc DL(Op);
4072
4073 // Proper support for f16 requires Zvfh. bf16 always requires special
4074 // handling. We need to cast the scalar to integer and create an integer
4075 // build_vector.
4076 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4077 MVT IVT = VT.changeVectorElementType(MVT::i16);
4079 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4080 SDValue Elem = Op.getOperand(I);
4081 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4082 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4083 // Called by LegalizeDAG, we need to use XLenVT operations since we
4084 // can't create illegal types.
4085 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4086 // Manually constant fold so the integer build_vector can be lowered
4087 // better. Waiting for DAGCombine will be too late.
4088 APInt V =
4089 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4090 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4091 } else {
4092 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4093 }
4094 } else {
4095 // Called by scalar type legalizer, we can use i16.
4096 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4097 }
4098 }
4099 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4100 return DAG.getBitcast(VT, Res);
4101 }
4102
4103 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4105 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4106
4107 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4108
4109 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4110
4111 if (VT.getVectorElementType() == MVT::i1) {
4112 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4113 // vector type, we have a legal equivalently-sized i8 type, so we can use
4114 // that.
4115 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4116 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4117
4118 SDValue WideVec;
4119 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4120 // For a splat, perform a scalar truncate before creating the wider
4121 // vector.
4122 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4123 DAG.getConstant(1, DL, Splat.getValueType()));
4124 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4125 } else {
4126 SmallVector<SDValue, 8> Ops(Op->op_values());
4127 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4128 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4129 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4130 }
4131
4132 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4133 }
4134
4135 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4136 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4137 return Gather;
4138 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4140 if (!VT.isFloatingPoint())
4141 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4142 Splat =
4143 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4144 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4145 }
4146
4147 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4148 return Res;
4149
4150 // If we're compiling for an exact VLEN value, we can split our work per
4151 // register in the register group.
4152 if (const auto VLen = Subtarget.getRealVLen();
4153 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4154 MVT ElemVT = VT.getVectorElementType();
4155 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4156 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4157 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4158 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4159 assert(M1VT == getLMUL1VT(M1VT));
4160
4161 // The following semantically builds up a fixed length concat_vector
4162 // of the component build_vectors. We eagerly lower to scalable and
4163 // insert_subvector here to avoid DAG combining it back to a large
4164 // build_vector.
4165 SmallVector<SDValue> BuildVectorOps(Op->ops());
4166 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4167 SDValue Vec = DAG.getUNDEF(ContainerVT);
4168 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4169 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4170 SDValue SubBV =
4171 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4172 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4173 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4174 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4175 DAG.getVectorIdxConstant(InsertIdx, DL));
4176 }
4177 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4178 }
4179
4180 // If we're about to resort to vslide1down (or stack usage), pack our
4181 // elements into the widest scalar type we can. This will force a VL/VTYPE
4182 // toggle, but reduces the critical path, the number of vslide1down ops
4183 // required, and possibly enables scalar folds of the values.
4184 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4185 return Res;
4186
4187 // For m1 vectors, if we have non-undef values in both halves of our vector,
4188 // split the vector into low and high halves, build them separately, then
4189 // use a vselect to combine them. For long vectors, this cuts the critical
4190 // path of the vslide1down sequence in half, and gives us an opportunity
4191 // to special case each half independently. Note that we don't change the
4192 // length of the sub-vectors here, so if both fallback to the generic
4193 // vslide1down path, we should be able to fold the vselect into the final
4194 // vslidedown (for the undef tail) for the first half w/ masking.
4195 unsigned NumElts = VT.getVectorNumElements();
4196 unsigned NumUndefElts =
4197 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4198 unsigned NumDefElts = NumElts - NumUndefElts;
4199 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4200 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4201 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4202 SmallVector<SDValue> MaskVals;
4203 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4204 SubVecAOps.reserve(NumElts);
4205 SubVecBOps.reserve(NumElts);
4206 for (unsigned i = 0; i < NumElts; i++) {
4207 SDValue Elem = Op->getOperand(i);
4208 if (i < NumElts / 2) {
4209 SubVecAOps.push_back(Elem);
4210 SubVecBOps.push_back(UndefElem);
4211 } else {
4212 SubVecAOps.push_back(UndefElem);
4213 SubVecBOps.push_back(Elem);
4214 }
4215 bool SelectMaskVal = (i < NumElts / 2);
4216 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4217 }
4218 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4219 MaskVals.size() == NumElts);
4220
4221 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4222 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4223 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4224 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4225 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4226 }
4227
4228 // Cap the cost at a value linear to the number of elements in the vector.
4229 // The default lowering is to use the stack. The vector store + scalar loads
4230 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4231 // being (at least) linear in LMUL. As a result, using the vslidedown
4232 // lowering for every element ends up being VL*LMUL..
4233 // TODO: Should we be directly costing the stack alternative? Doing so might
4234 // give us a more accurate upper bound.
4235 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4236
4237 // TODO: unify with TTI getSlideCost.
4238 InstructionCost PerSlideCost = 1;
4239 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4240 default: break;
4242 PerSlideCost = 2;
4243 break;
4245 PerSlideCost = 4;
4246 break;
4248 PerSlideCost = 8;
4249 break;
4250 }
4251
4252 // TODO: Should we be using the build instseq then cost + evaluate scheme
4253 // we use for integer constants here?
4254 unsigned UndefCount = 0;
4255 for (const SDValue &V : Op->ops()) {
4256 if (V.isUndef()) {
4257 UndefCount++;
4258 continue;
4259 }
4260 if (UndefCount) {
4261 LinearBudget -= PerSlideCost;
4262 UndefCount = 0;
4263 }
4264 LinearBudget -= PerSlideCost;
4265 }
4266 if (UndefCount) {
4267 LinearBudget -= PerSlideCost;
4268 }
4269
4270 if (LinearBudget < 0)
4271 return SDValue();
4272
4273 assert((!VT.isFloatingPoint() ||
4274 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4275 "Illegal type which will result in reserved encoding");
4276
4277 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4278
4279 SDValue Vec;
4280 UndefCount = 0;
4281 for (SDValue V : Op->ops()) {
4282 if (V.isUndef()) {
4283 UndefCount++;
4284 continue;
4285 }
4286
4287 // Start our sequence with a TA splat in the hopes that hardware is able to
4288 // recognize there's no dependency on the prior value of our temporary
4289 // register.
4290 if (!Vec) {
4291 Vec = DAG.getSplatVector(VT, DL, V);
4292 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4293 UndefCount = 0;
4294 continue;
4295 }
4296
4297 if (UndefCount) {
4298 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4299 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4300 Vec, Offset, Mask, VL, Policy);
4301 UndefCount = 0;
4302 }
4303 auto OpCode =
4305 if (!VT.isFloatingPoint())
4306 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4307 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4308 V, Mask, VL);
4309 }
4310 if (UndefCount) {
4311 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4312 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4313 Vec, Offset, Mask, VL, Policy);
4314 }
4315 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4316}
4317
4318static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4320 SelectionDAG &DAG) {
4321 if (!Passthru)
4322 Passthru = DAG.getUNDEF(VT);
4323 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4324 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4325 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4326 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4327 // node in order to try and match RVV vector/scalar instructions.
4328 if ((LoC >> 31) == HiC)
4329 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4330
4331 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4332 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4333 // vlmax vsetvli or vsetivli to change the VL.
4334 // FIXME: Support larger constants?
4335 // FIXME: Support non-constant VLs by saturating?
4336 if (LoC == HiC) {
4337 SDValue NewVL;
4338 if (isAllOnesConstant(VL) ||
4339 (isa<RegisterSDNode>(VL) &&
4340 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4341 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4342 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4343 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4344
4345 if (NewVL) {
4346 MVT InterVT =
4347 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4348 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4349 DAG.getUNDEF(InterVT), Lo, NewVL);
4350 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4351 }
4352 }
4353 }
4354
4355 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4356 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4357 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4358 Hi.getConstantOperandVal(1) == 31)
4359 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4360
4361 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4362 // even if it might be sign extended.
4363 if (Hi.isUndef())
4364 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4365
4366 // Fall back to a stack store and stride x0 vector load.
4367 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4368 Hi, VL);
4369}
4370
4371// Called by type legalization to handle splat of i64 on RV32.
4372// FIXME: We can optimize this when the type has sign or zero bits in one
4373// of the halves.
4374static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4375 SDValue Scalar, SDValue VL,
4376 SelectionDAG &DAG) {
4377 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4378 SDValue Lo, Hi;
4379 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4380 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4381}
4382
4383// This function lowers a splat of a scalar operand Splat with the vector
4384// length VL. It ensures the final sequence is type legal, which is useful when
4385// lowering a splat after type legalization.
4386static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4387 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4388 const RISCVSubtarget &Subtarget) {
4389 bool HasPassthru = Passthru && !Passthru.isUndef();
4390 if (!HasPassthru && !Passthru)
4391 Passthru = DAG.getUNDEF(VT);
4392
4393 MVT EltVT = VT.getVectorElementType();
4394 MVT XLenVT = Subtarget.getXLenVT();
4395
4396 if (VT.isFloatingPoint()) {
4397 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4398 EltVT == MVT::bf16) {
4399 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4400 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4401 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4402 else
4403 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4404 MVT IVT = VT.changeVectorElementType(MVT::i16);
4405 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4406 SDValue Splat =
4407 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4408 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4409 }
4410 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4411 }
4412
4413 // Simplest case is that the operand needs to be promoted to XLenVT.
4414 if (Scalar.getValueType().bitsLE(XLenVT)) {
4415 // If the operand is a constant, sign extend to increase our chances
4416 // of being able to use a .vi instruction. ANY_EXTEND would become a
4417 // a zero extend and the simm5 check in isel would fail.
4418 // FIXME: Should we ignore the upper bits in isel instead?
4419 unsigned ExtOpc =
4420 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4421 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4422 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4423 }
4424
4425 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4426 "Unexpected scalar for splat lowering!");
4427
4428 if (isOneConstant(VL) && isNullConstant(Scalar))
4429 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4430 DAG.getConstant(0, DL, XLenVT), VL);
4431
4432 // Otherwise use the more complicated splatting algorithm.
4433 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4434}
4435
4436// This function lowers an insert of a scalar operand Scalar into lane
4437// 0 of the vector regardless of the value of VL. The contents of the
4438// remaining lanes of the result vector are unspecified. VL is assumed
4439// to be non-zero.
4441 const SDLoc &DL, SelectionDAG &DAG,
4442 const RISCVSubtarget &Subtarget) {
4443 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4444
4445 const MVT XLenVT = Subtarget.getXLenVT();
4446 SDValue Passthru = DAG.getUNDEF(VT);
4447
4448 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4449 isNullConstant(Scalar.getOperand(1))) {
4450 SDValue ExtractedVal = Scalar.getOperand(0);
4451 // The element types must be the same.
4452 if (ExtractedVal.getValueType().getVectorElementType() ==
4453 VT.getVectorElementType()) {
4454 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4455 MVT ExtractedContainerVT = ExtractedVT;
4456 if (ExtractedContainerVT.isFixedLengthVector()) {
4457 ExtractedContainerVT = getContainerForFixedLengthVector(
4458 DAG, ExtractedContainerVT, Subtarget);
4459 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4460 ExtractedVal, DAG, Subtarget);
4461 }
4462 if (ExtractedContainerVT.bitsLE(VT))
4463 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4464 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4465 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4466 DAG.getVectorIdxConstant(0, DL));
4467 }
4468 }
4469
4470
4471 if (VT.isFloatingPoint())
4472 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4473 DAG.getUNDEF(VT), Scalar, VL);
4474
4475 // Avoid the tricky legalization cases by falling back to using the
4476 // splat code which already handles it gracefully.
4477 if (!Scalar.getValueType().bitsLE(XLenVT))
4478 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4479 DAG.getConstant(1, DL, XLenVT),
4480 VT, DL, DAG, Subtarget);
4481
4482 // If the operand is a constant, sign extend to increase our chances
4483 // of being able to use a .vi instruction. ANY_EXTEND would become a
4484 // a zero extend and the simm5 check in isel would fail.
4485 // FIXME: Should we ignore the upper bits in isel instead?
4486 unsigned ExtOpc =
4487 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4488 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4489 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4490 VL);
4491}
4492
4493// Can this shuffle be performed on exactly one (possibly larger) input?
4494static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4495 SDValue V2) {
4496
4497 if (V2.isUndef() &&
4499 return V1;
4500
4501 // Both input must be extracts.
4502 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4503 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4504 return SDValue();
4505
4506 // Extracting from the same source.
4507 SDValue Src = V1.getOperand(0);
4508 if (Src != V2.getOperand(0))
4509 return SDValue();
4510
4511 // Src needs to have twice the number of elements.
4512 unsigned NumElts = VT.getVectorNumElements();
4513 if (!Src.getValueType().isFixedLengthVector() ||
4514 Src.getValueType().getVectorNumElements() != (NumElts * 2))
4515 return SDValue();
4516
4517 // The extracts must extract the two halves of the source.
4518 if (V1.getConstantOperandVal(1) != 0 ||
4519 V2.getConstantOperandVal(1) != NumElts)
4520 return SDValue();
4521
4522 return Src;
4523}
4524
4525/// Is this shuffle interleaving contiguous elements from one vector into the
4526/// even elements and contiguous elements from another vector into the odd
4527/// elements. \p EvenSrc will contain the element that should be in the first
4528/// even element. \p OddSrc will contain the element that should be in the first
4529/// odd element. These can be the first element in a source or the element half
4530/// way through the source.
4531static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4532 int &OddSrc, const RISCVSubtarget &Subtarget) {
4533 // We need to be able to widen elements to the next larger integer type.
4534 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4535 return false;
4536
4537 int Size = Mask.size();
4538 int NumElts = VT.getVectorNumElements();
4539 assert(Size == (int)NumElts && "Unexpected mask size");
4540
4541 SmallVector<unsigned, 2> StartIndexes;
4542 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4543 return false;
4544
4545 EvenSrc = StartIndexes[0];
4546 OddSrc = StartIndexes[1];
4547
4548 // One source should be low half of first vector.
4549 if (EvenSrc != 0 && OddSrc != 0)
4550 return false;
4551
4552 // Subvectors will be subtracted from either at the start of the two input
4553 // vectors, or at the start and middle of the first vector if it's an unary
4554 // interleave.
4555 // In both cases, HalfNumElts will be extracted.
4556 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4557 // we'll create an illegal extract_subvector.
4558 // FIXME: We could support other values using a slidedown first.
4559 int HalfNumElts = NumElts / 2;
4560 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4561}
4562
4563/// Match shuffles that concatenate two vectors, rotate the concatenation,
4564/// and then extract the original number of elements from the rotated result.
4565/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4566/// returned rotation amount is for a rotate right, where elements move from
4567/// higher elements to lower elements. \p LoSrc indicates the first source
4568/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4569/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4570/// 0 or 1 if a rotation is found.
4571///
4572/// NOTE: We talk about rotate to the right which matches how bit shift and
4573/// rotate instructions are described where LSBs are on the right, but LLVM IR
4574/// and the table below write vectors with the lowest elements on the left.
4575static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4576 int Size = Mask.size();
4577
4578 // We need to detect various ways of spelling a rotation:
4579 // [11, 12, 13, 14, 15, 0, 1, 2]
4580 // [-1, 12, 13, 14, -1, -1, 1, -1]
4581 // [-1, -1, -1, -1, -1, -1, 1, 2]
4582 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4583 // [-1, 4, 5, 6, -1, -1, 9, -1]
4584 // [-1, 4, 5, 6, -1, -1, -1, -1]
4585 int Rotation = 0;
4586 LoSrc = -1;
4587 HiSrc = -1;
4588 for (int i = 0; i != Size; ++i) {
4589 int M = Mask[i];
4590 if (M < 0)
4591 continue;
4592
4593 // Determine where a rotate vector would have started.
4594 int StartIdx = i - (M % Size);
4595 // The identity rotation isn't interesting, stop.
4596 if (StartIdx == 0)
4597 return -1;
4598
4599 // If we found the tail of a vector the rotation must be the missing
4600 // front. If we found the head of a vector, it must be how much of the
4601 // head.
4602 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4603
4604 if (Rotation == 0)
4605 Rotation = CandidateRotation;
4606 else if (Rotation != CandidateRotation)
4607 // The rotations don't match, so we can't match this mask.
4608 return -1;
4609
4610 // Compute which value this mask is pointing at.
4611 int MaskSrc = M < Size ? 0 : 1;
4612
4613 // Compute which of the two target values this index should be assigned to.
4614 // This reflects whether the high elements are remaining or the low elemnts
4615 // are remaining.
4616 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4617
4618 // Either set up this value if we've not encountered it before, or check
4619 // that it remains consistent.
4620 if (TargetSrc < 0)
4621 TargetSrc = MaskSrc;
4622 else if (TargetSrc != MaskSrc)
4623 // This may be a rotation, but it pulls from the inputs in some
4624 // unsupported interleaving.
4625 return -1;
4626 }
4627
4628 // Check that we successfully analyzed the mask, and normalize the results.
4629 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4630 assert((LoSrc >= 0 || HiSrc >= 0) &&
4631 "Failed to find a rotated input vector!");
4632
4633 return Rotation;
4634}
4635
4636// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4637// 2, 4, 8 and the integer type Factor-times larger than VT's
4638// element type must be a legal element type.
4639// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4640// -> [p, q, r, s] (Factor=2, Index=1)
4642 SDValue Src, unsigned Factor,
4643 unsigned Index, SelectionDAG &DAG) {
4644 unsigned EltBits = VT.getScalarSizeInBits();
4645 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4646 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4647 SrcEC.divideCoefficientBy(Factor));
4648 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4649 SrcEC.divideCoefficientBy(Factor));
4650 Src = DAG.getBitcast(WideSrcVT, Src);
4651
4652 unsigned Shift = Index * EltBits;
4653 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4654 DAG.getConstant(Shift, DL, WideSrcVT));
4655 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4657 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4658 DAG.getVectorIdxConstant(0, DL));
4659 return DAG.getBitcast(VT, Res);
4660}
4661
4662// Lower the following shuffle to vslidedown.
4663// a)
4664// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4665// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4666// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4667// b)
4668// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4669// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4670// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4671// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4672// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4673// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4675 SDValue V1, SDValue V2,
4676 ArrayRef<int> Mask,
4677 const RISCVSubtarget &Subtarget,
4678 SelectionDAG &DAG) {
4679 auto findNonEXTRACT_SUBVECTORParent =
4680 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4681 uint64_t Offset = 0;
4682 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4683 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4684 // a scalable vector. But we don't want to match the case.
4685 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4686 Offset += Parent.getConstantOperandVal(1);
4687 Parent = Parent.getOperand(0);
4688 }
4689 return std::make_pair(Parent, Offset);
4690 };
4691
4692 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4693 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4694
4695 // Extracting from the same source.
4696 SDValue Src = V1Src;
4697 if (Src != V2Src)
4698 return SDValue();
4699
4700 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4701 SmallVector<int, 16> NewMask(Mask);
4702 for (size_t i = 0; i != NewMask.size(); ++i) {
4703 if (NewMask[i] == -1)
4704 continue;
4705
4706 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4707 NewMask[i] = NewMask[i] + V1IndexOffset;
4708 } else {
4709 // Minus NewMask.size() is needed. Otherwise, the b case would be
4710 // <5,6,7,12> instead of <5,6,7,8>.
4711 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4712 }
4713 }
4714
4715 // First index must be known and non-zero. It will be used as the slidedown
4716 // amount.
4717 if (NewMask[0] <= 0)
4718 return SDValue();
4719
4720 // NewMask is also continuous.
4721 for (unsigned i = 1; i != NewMask.size(); ++i)
4722 if (NewMask[i - 1] + 1 != NewMask[i])
4723 return SDValue();
4724
4725 MVT XLenVT = Subtarget.getXLenVT();
4726 MVT SrcVT = Src.getSimpleValueType();
4727 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4728 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4729 SDValue Slidedown =
4730 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4731 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4732 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4733 return DAG.getNode(
4735 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4736 DAG.getConstant(0, DL, XLenVT));
4737}
4738
4739// Because vslideup leaves the destination elements at the start intact, we can
4740// use it to perform shuffles that insert subvectors:
4741//
4742// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4743// ->
4744// vsetvli zero, 8, e8, mf2, ta, ma
4745// vslideup.vi v8, v9, 4
4746//
4747// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4748// ->
4749// vsetvli zero, 5, e8, mf2, tu, ma
4750// vslideup.v1 v8, v9, 2
4752 SDValue V1, SDValue V2,
4753 ArrayRef<int> Mask,
4754 const RISCVSubtarget &Subtarget,
4755 SelectionDAG &DAG) {
4756 unsigned NumElts = VT.getVectorNumElements();
4757 int NumSubElts, Index;
4758 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4759 Index))
4760 return SDValue();
4761
4762 bool OpsSwapped = Mask[Index] < (int)NumElts;
4763 SDValue InPlace = OpsSwapped ? V2 : V1;
4764 SDValue ToInsert = OpsSwapped ? V1 : V2;
4765
4766 MVT XLenVT = Subtarget.getXLenVT();
4767 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4768 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4769 // We slide up by the index that the subvector is being inserted at, and set
4770 // VL to the index + the number of elements being inserted.
4772 // If the we're adding a suffix to the in place vector, i.e. inserting right
4773 // up to the very end of it, then we don't actually care about the tail.
4774 if (NumSubElts + Index >= (int)NumElts)
4775 Policy |= RISCVII::TAIL_AGNOSTIC;
4776
4777 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4778 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4779 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4780
4781 SDValue Res;
4782 // If we're inserting into the lowest elements, use a tail undisturbed
4783 // vmv.v.v.
4784 if (Index == 0)
4785 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4786 VL);
4787 else
4788 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4789 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4790 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4791}
4792
4793/// Match v(f)slide1up/down idioms. These operations involve sliding
4794/// N-1 elements to make room for an inserted scalar at one end.
4796 SDValue V1, SDValue V2,
4797 ArrayRef<int> Mask,
4798 const RISCVSubtarget &Subtarget,
4799 SelectionDAG &DAG) {
4800 bool OpsSwapped = false;
4801 if (!isa<BuildVectorSDNode>(V1)) {
4802 if (!isa<BuildVectorSDNode>(V2))
4803 return SDValue();
4804 std::swap(V1, V2);
4805 OpsSwapped = true;
4806 }
4807 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4808 if (!Splat)
4809 return SDValue();
4810
4811 // Return true if the mask could describe a slide of Mask.size() - 1
4812 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4813 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4814 const unsigned S = (Offset > 0) ? 0 : -Offset;
4815 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4816 for (unsigned i = S; i != E; ++i)
4817 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4818 return false;
4819 return true;
4820 };
4821
4822 const unsigned NumElts = VT.getVectorNumElements();
4823 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4824 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4825 return SDValue();
4826
4827 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4828 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4829 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4830 return SDValue();
4831
4832 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4833 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4834
4835 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4836 // vslide1{down,up}.vx instead.
4837 if (VT.getVectorElementType() == MVT::bf16 ||
4838 (VT.getVectorElementType() == MVT::f16 &&
4839 !Subtarget.hasVInstructionsF16())) {
4840 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4841 Splat =
4842 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4843 V2 = DAG.getBitcast(
4844 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4845 SDValue Vec = DAG.getNode(
4847 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4848 Vec = DAG.getBitcast(ContainerVT, Vec);
4849 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4850 }
4851
4852 auto OpCode = IsVSlidedown ?
4855 if (!VT.isFloatingPoint())
4856 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4857 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4858 DAG.getUNDEF(ContainerVT),
4859 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4860 Splat, TrueMask, VL);
4861 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4862}
4863
4864// Match a mask which "spreads" the leading elements of a vector evenly
4865// across the result. Factor is the spread amount, and Index is the
4866// offset applied. (on success, Index < Factor) This is the inverse
4867// of a deinterleave with the same Factor and Index. This is analogous
4868// to an interleave, except that all but one lane is undef.
4869static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4870 SmallVector<bool> LaneIsUndef(Factor, true);
4871 for (unsigned i = 0; i < Mask.size(); i++)
4872 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4873
4874 bool Found = false;
4875 for (unsigned i = 0; i < Factor; i++) {
4876 if (LaneIsUndef[i])
4877 continue;
4878 if (Found)
4879 return false;
4880 Index = i;
4881 Found = true;
4882 }
4883 if (!Found)
4884 return false;
4885
4886 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4887 unsigned j = i * Factor + Index;
4888 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4889 return false;
4890 }
4891 return true;
4892}
4893
4894// Given a vector a, b, c, d return a vector Factor times longer
4895// with Factor-1 undef's between elements. Ex:
4896// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4897// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4898static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4899 const SDLoc &DL, SelectionDAG &DAG) {
4900
4901 MVT VT = V.getSimpleValueType();
4902 unsigned EltBits = VT.getScalarSizeInBits();
4904 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4905
4906 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4907
4908 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4909 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4910 // allow the SHL to fold away if Index is 0.
4911 if (Index != 0)
4912 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4913 DAG.getConstant(EltBits * Index, DL, WideVT));
4914 // Make sure to use original element type
4916 EC.multiplyCoefficientBy(Factor));
4917 return DAG.getBitcast(ResultVT, Result);
4918}
4919
4920// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4921// to create an interleaved vector of <[vscale x] n*2 x ty>.
4922// This requires that the size of ty is less than the subtarget's maximum ELEN.
4924 const SDLoc &DL, SelectionDAG &DAG,
4925 const RISCVSubtarget &Subtarget) {
4926
4927 // FIXME: Not only does this optimize the code, it fixes some correctness
4928 // issues because MIR does not have freeze.
4929 if (EvenV.isUndef())
4930 return getWideningSpread(OddV, 2, 1, DL, DAG);
4931 if (OddV.isUndef())
4932 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4933
4934 MVT VecVT = EvenV.getSimpleValueType();
4935 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4936 // Convert fixed vectors to scalable if needed
4937 if (VecContainerVT.isFixedLengthVector()) {
4938 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4939 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4940 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4941 }
4942
4943 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4944
4945 // We're working with a vector of the same size as the resulting
4946 // interleaved vector, but with half the number of elements and
4947 // twice the SEW (Hence the restriction on not using the maximum
4948 // ELEN)
4949 MVT WideVT =
4951 VecVT.getVectorElementCount());
4952 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4953 if (WideContainerVT.isFixedLengthVector())
4954 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4955
4956 // Bitcast the input vectors to integers in case they are FP
4957 VecContainerVT = VecContainerVT.changeTypeToInteger();
4958 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4959 OddV = DAG.getBitcast(VecContainerVT, OddV);
4960
4961 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4962 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4963
4964 SDValue Interleaved;
4965 if (Subtarget.hasStdExtZvbb()) {
4966 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4967 SDValue OffsetVec =
4968 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4969 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4970 OffsetVec, Passthru, Mask, VL);
4971 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4972 Interleaved, EvenV, Passthru, Mask, VL);
4973 } else {
4974 // FIXME: We should freeze the odd vector here. We already handled the case
4975 // of provably undef/poison above.
4976
4977 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4978 // vwaddu.vv
4979 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4980 OddV, Passthru, Mask, VL);
4981
4982 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4983 SDValue AllOnesVec = DAG.getSplatVector(
4984 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4985 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4986 OddV, AllOnesVec, Passthru, Mask, VL);
4987
4988 // Add the two together so we get
4989 // (OddV * 0xff...ff) + (OddV + EvenV)
4990 // = (OddV * 0x100...00) + EvenV
4991 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4992 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4993 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4994 Interleaved, OddsMul, Passthru, Mask, VL);
4995 }
4996
4997 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4998 MVT ResultContainerVT = MVT::getVectorVT(
4999 VecVT.getVectorElementType(), // Make sure to use original type
5000 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5001 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5002
5003 // Convert back to a fixed vector if needed
5004 MVT ResultVT =
5007 if (ResultVT.isFixedLengthVector())
5008 Interleaved =
5009 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5010
5011 return Interleaved;
5012}
5013
5014// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5015// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5017 SelectionDAG &DAG,
5018 const RISCVSubtarget &Subtarget) {
5019 SDLoc DL(SVN);
5020 MVT VT = SVN->getSimpleValueType(0);
5021 SDValue V = SVN->getOperand(0);
5022 unsigned NumElts = VT.getVectorNumElements();
5023
5024 assert(VT.getVectorElementType() == MVT::i1);
5025
5027 SVN->getMask().size()) ||
5028 !SVN->getOperand(1).isUndef())
5029 return SDValue();
5030
5031 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5032 EVT ViaVT = EVT::getVectorVT(
5033 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5034 EVT ViaBitVT =
5035 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5036
5037 // If we don't have zvbb or the larger element type > ELEN, the operation will
5038 // be illegal.
5040 ViaVT) ||
5041 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5042 return SDValue();
5043
5044 // If the bit vector doesn't fit exactly into the larger element type, we need
5045 // to insert it into the larger vector and then shift up the reversed bits
5046 // afterwards to get rid of the gap introduced.
5047 if (ViaEltSize > NumElts)
5048 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5049 V, DAG.getVectorIdxConstant(0, DL));
5050
5051 SDValue Res =
5052 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5053
5054 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5055 // element type.
5056 if (ViaEltSize > NumElts)
5057 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5058 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5059
5060 Res = DAG.getBitcast(ViaBitVT, Res);
5061
5062 if (ViaEltSize > NumElts)
5063 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5064 DAG.getVectorIdxConstant(0, DL));
5065 return Res;
5066}
5067
5069 SelectionDAG &DAG,
5070 const RISCVSubtarget &Subtarget,
5071 MVT &RotateVT, unsigned &RotateAmt) {
5072 SDLoc DL(SVN);
5073
5074 EVT VT = SVN->getValueType(0);
5075 unsigned NumElts = VT.getVectorNumElements();
5076 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5077 unsigned NumSubElts;
5078 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5079 NumElts, NumSubElts, RotateAmt))
5080 return false;
5081 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5082 NumElts / NumSubElts);
5083
5084 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5085 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5086}
5087
5088// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5089// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5090// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5092 SelectionDAG &DAG,
5093 const RISCVSubtarget &Subtarget) {
5094 SDLoc DL(SVN);
5095
5096 EVT VT = SVN->getValueType(0);
5097 unsigned RotateAmt;
5098 MVT RotateVT;
5099 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5100 return SDValue();
5101
5102 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5103
5104 SDValue Rotate;
5105 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5106 // so canonicalize to vrev8.
5107 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5108 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5109 else
5110 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5111 DAG.getConstant(RotateAmt, DL, RotateVT));
5112
5113 return DAG.getBitcast(VT, Rotate);
5114}
5115
5116// If compiling with an exactly known VLEN, see if we can split a
5117// shuffle on m2 or larger into a small number of m1 sized shuffles
5118// which write each destination registers exactly once.
5120 SelectionDAG &DAG,
5121 const RISCVSubtarget &Subtarget) {
5122 SDLoc DL(SVN);
5123 MVT VT = SVN->getSimpleValueType(0);
5124 SDValue V1 = SVN->getOperand(0);
5125 SDValue V2 = SVN->getOperand(1);
5126 ArrayRef<int> Mask = SVN->getMask();
5127
5128 // If we don't know exact data layout, not much we can do. If this
5129 // is already m1 or smaller, no point in splitting further.
5130 const auto VLen = Subtarget.getRealVLen();
5131 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5132 return SDValue();
5133
5134 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5135 // expansion for.
5136 unsigned RotateAmt;
5137 MVT RotateVT;
5138 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5139 return SDValue();
5140
5141 MVT ElemVT = VT.getVectorElementType();
5142 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5143
5144 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5145 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5146 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5147 assert(M1VT == getLMUL1VT(M1VT));
5148 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5149 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5150 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5151 unsigned NumOfDestRegs = NumElts / NumOpElts;
5152 // The following semantically builds up a fixed length concat_vector
5153 // of the component shuffle_vectors. We eagerly lower to scalable here
5154 // to avoid DAG combining it back to a large shuffle_vector again.
5155 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5156 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5158 Operands;
5160 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5161 [&]() { Operands.emplace_back(); },
5162 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5163 Operands.emplace_back().emplace_back(
5164 SrcVecIdx, UINT_MAX,
5165 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5166 },
5167 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5168 if (NewReg)
5169 Operands.emplace_back();
5170 Operands.back().emplace_back(
5171 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5172 });
5173 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5174 // Note: check that we do not emit too many shuffles here to prevent code
5175 // size explosion.
5176 // TODO: investigate, if it can be improved by extra analysis of the masks to
5177 // check if the code is more profitable.
5178 unsigned NumShuffles = std::accumulate(
5179 Operands.begin(), Operands.end(), 0u,
5180 [&](unsigned N,
5181 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5182 if (Data.empty())
5183 return N;
5184 N += Data.size();
5185 for (const auto &P : Data) {
5186 unsigned Idx2 = std::get<1>(P);
5187 ArrayRef<int> Mask = std::get<2>(P);
5188 if (Idx2 != UINT_MAX)
5189 ++N;
5190 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5191 --N;
5192 }
5193 return N;
5194 });
5195 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5196 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5197 return SDValue();
5198 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5199 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5200 DAG.getVectorIdxConstant(ExtractIdx, DL));
5201 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5202 return SubVec;
5203 };
5204 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5206 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5207 return SubVec;
5208 };
5209 SDValue Vec = DAG.getUNDEF(ContainerVT);
5210 for (auto [I, Data] : enumerate(Operands)) {
5211 if (Data.empty())
5212 continue;
5214 for (unsigned I : seq<unsigned>(Data.size())) {
5215 const auto &[Idx1, Idx2, _] = Data[I];
5216 if (Values.contains(Idx1)) {
5217 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5218 "Expected both indices to be extracted already.");
5219 break;
5220 }
5221 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5222 (Idx1 % NumOfSrcRegs) * NumOpElts);
5223 Values[Idx1] = V;
5224 if (Idx2 != UINT_MAX)
5225 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5226 (Idx2 % NumOfSrcRegs) * NumOpElts);
5227 }
5228 SDValue V;
5229 for (const auto &[Idx1, Idx2, Mask] : Data) {
5230 SDValue V1 = Values.at(Idx1);
5231 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5232 V = PerformShuffle(V1, V2, Mask);
5233 Values[Idx1] = V;
5234 }
5235
5236 unsigned InsertIdx = I * NumOpElts;
5237 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5238 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5239 DAG.getVectorIdxConstant(InsertIdx, DL));
5240 }
5241 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5242}
5243
5244// Matches a subset of compress masks with a contiguous prefix of output
5245// elements. This could be extended to allow gaps by deciding which
5246// source elements to spuriously demand.
5248 int Last = -1;
5249 bool SawUndef = false;
5250 for (unsigned i = 0; i < Mask.size(); i++) {
5251 if (Mask[i] == -1) {
5252 SawUndef = true;
5253 continue;
5254 }
5255 if (SawUndef)
5256 return false;
5257 if (i > (unsigned)Mask[i])
5258 return false;
5259 if (Mask[i] <= Last)
5260 return false;
5261 Last = Mask[i];
5262 }
5263 return true;
5264}
5265
5266/// Given a shuffle where the indices are disjoint between the two sources,
5267/// e.g.:
5268///
5269/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5270///
5271/// Merge the two sources into one and do a single source shuffle:
5272///
5273/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5274/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5275///
5276/// A vselect will either be merged into a masked instruction or be lowered as a
5277/// vmerge.vvm, which is cheaper than a vrgather.vv.
5279 SelectionDAG &DAG,
5280 const RISCVSubtarget &Subtarget) {
5281 MVT VT = SVN->getSimpleValueType(0);
5282 MVT XLenVT = Subtarget.getXLenVT();
5283 SDLoc DL(SVN);
5284
5285 const ArrayRef<int> Mask = SVN->getMask();
5286
5287 // Work out which source each lane will come from.
5288 SmallVector<int, 16> Srcs(Mask.size(), -1);
5289
5290 for (int Idx : Mask) {
5291 if (Idx == -1)
5292 continue;
5293 unsigned SrcIdx = Idx % Mask.size();
5294 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5295 if (Srcs[SrcIdx] == -1)
5296 // Mark this source as using this lane.
5297 Srcs[SrcIdx] = Src;
5298 else if (Srcs[SrcIdx] != Src)
5299 // The other source is using this lane: not disjoint.
5300 return SDValue();
5301 }
5302
5303 SmallVector<SDValue> SelectMaskVals;
5304 for (int Lane : Srcs) {
5305 if (Lane == -1)
5306 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5307 else
5308 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5309 }
5310 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5311 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5312 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5313 SVN->getOperand(0), SVN->getOperand(1));
5314
5315 // Move all indices relative to the first source.
5316 SmallVector<int> NewMask(Mask.size());
5317 for (unsigned I = 0; I < Mask.size(); I++) {
5318 if (Mask[I] == -1)
5319 NewMask[I] = -1;
5320 else
5321 NewMask[I] = Mask[I] % Mask.size();
5322 }
5323
5324 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5325}
5326
5327/// Try to widen element type to get a new mask value for a better permutation
5328/// sequence. This doesn't try to inspect the widened mask for profitability;
5329/// we speculate the widened form is equal or better. This has the effect of
5330/// reducing mask constant sizes - allowing cheaper materialization sequences
5331/// - and index sequence sizes - reducing register pressure and materialization
5332/// cost, at the cost of (possibly) an extra VTYPE toggle.
5334 SDLoc DL(Op);
5335 MVT VT = Op.getSimpleValueType();
5336 MVT ScalarVT = VT.getVectorElementType();
5337 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5338 SDValue V0 = Op.getOperand(0);
5339 SDValue V1 = Op.getOperand(1);
5340 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5341
5342 // Avoid wasted work leading to isTypeLegal check failing below
5343 if (ElementSize > 32)
5344 return SDValue();
5345
5346 SmallVector<int, 8> NewMask;
5347 if (!widenShuffleMaskElts(Mask, NewMask))
5348 return SDValue();
5349
5350 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5351 : MVT::getIntegerVT(ElementSize * 2);
5352 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5353 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5354 return SDValue();
5355 V0 = DAG.getBitcast(NewVT, V0);
5356 V1 = DAG.getBitcast(NewVT, V1);
5357 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5358}
5359
5361 const RISCVSubtarget &Subtarget) {
5362 SDValue V1 = Op.getOperand(0);
5363 SDValue V2 = Op.getOperand(1);
5364 SDLoc DL(Op);
5365 MVT XLenVT = Subtarget.getXLenVT();
5366 MVT VT = Op.getSimpleValueType();
5367 unsigned NumElts = VT.getVectorNumElements();
5368 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5369
5370 if (VT.getVectorElementType() == MVT::i1) {
5371 // Lower to a vror.vi of a larger element type if possible before we promote
5372 // i1s to i8s.
5373 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5374 return V;
5375 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5376 return V;
5377
5378 // Promote i1 shuffle to i8 shuffle.
5379 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5380 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5381 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5382 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5383 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5384 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5385 ISD::SETNE);
5386 }
5387
5388 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5389
5390 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5391
5392 if (SVN->isSplat()) {
5393 const int Lane = SVN->getSplatIndex();
5394 if (Lane >= 0) {
5395 MVT SVT = VT.getVectorElementType();
5396
5397 // Turn splatted vector load into a strided load with an X0 stride.
5398 SDValue V = V1;
5399 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5400 // with undef.
5401 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5402 int Offset = Lane;
5403 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5404 int OpElements =
5405 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5406 V = V.getOperand(Offset / OpElements);
5407 Offset %= OpElements;
5408 }
5409
5410 // We need to ensure the load isn't atomic or volatile.
5411 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5412 auto *Ld = cast<LoadSDNode>(V);
5413 Offset *= SVT.getStoreSize();
5414 SDValue NewAddr = DAG.getMemBasePlusOffset(
5415 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5416
5417 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5418 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5419 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5420 SDValue IntID =
5421 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5422 SDValue Ops[] = {Ld->getChain(),
5423 IntID,
5424 DAG.getUNDEF(ContainerVT),
5425 NewAddr,
5426 DAG.getRegister(RISCV::X0, XLenVT),
5427 VL};
5428 SDValue NewLoad = DAG.getMemIntrinsicNode(
5429 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5431 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5432 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5433 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5434 }
5435
5436 MVT SplatVT = ContainerVT;
5437
5438 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5439 if (SVT == MVT::bf16 ||
5440 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5441 SVT = MVT::i16;
5442 SplatVT = ContainerVT.changeVectorElementType(SVT);
5443 }
5444
5445 // Otherwise use a scalar load and splat. This will give the best
5446 // opportunity to fold a splat into the operation. ISel can turn it into
5447 // the x0 strided load if we aren't able to fold away the select.
5448 if (SVT.isFloatingPoint())
5449 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5450 Ld->getPointerInfo().getWithOffset(Offset),
5451 Ld->getOriginalAlign(),
5452 Ld->getMemOperand()->getFlags());
5453 else
5454 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5455 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5456 Ld->getOriginalAlign(),
5457 Ld->getMemOperand()->getFlags());
5459
5460 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5462 SDValue Splat =
5463 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5464 Splat = DAG.getBitcast(ContainerVT, Splat);
5465 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5466 }
5467
5468 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5469 assert(Lane < (int)NumElts && "Unexpected lane!");
5470 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5471 V1, DAG.getConstant(Lane, DL, XLenVT),
5472 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5473 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5474 }
5475 }
5476
5477 // For exact VLEN m2 or greater, try to split to m1 operations if we
5478 // can split cleanly.
5479 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5480 return V;
5481
5482 ArrayRef<int> Mask = SVN->getMask();
5483
5484 if (SDValue V =
5485 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5486 return V;
5487
5488 if (SDValue V =
5489 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5490 return V;
5491
5492 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5493 // available.
5494 if (Subtarget.hasStdExtZvkb())
5495 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5496 return V;
5497
5498 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5499 // be undef which can be handled with a single SLIDEDOWN/UP.
5500 int LoSrc, HiSrc;
5501 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5502 if (Rotation > 0) {
5503 SDValue LoV, HiV;
5504 if (LoSrc >= 0) {
5505 LoV = LoSrc == 0 ? V1 : V2;
5506 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5507 }
5508 if (HiSrc >= 0) {
5509 HiV = HiSrc == 0 ? V1 : V2;
5510 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5511 }
5512
5513 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5514 // to slide LoV up by (NumElts - Rotation).
5515 unsigned InvRotate = NumElts - Rotation;
5516
5517 SDValue Res = DAG.getUNDEF(ContainerVT);
5518 if (HiV) {
5519 // Even though we could use a smaller VL, don't to avoid a vsetivli
5520 // toggle.
5521 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5522 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5523 }
5524 if (LoV)
5525 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5526 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5528
5529 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5530 }
5531
5532 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5533 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5534
5535 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5536 // use shift and truncate to perform the shuffle.
5537 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5538 // shift-and-trunc reducing total cost for everything except an mf8 result.
5539 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5540 // to do the entire operation.
5541 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5542 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5543 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5544 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5545 unsigned Index = 0;
5546 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5547 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5548 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5549 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5550 }
5551 }
5552 }
5553
5554 if (SDValue V =
5555 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5556 return V;
5557
5558 // Detect an interleave shuffle and lower to
5559 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5560 int EvenSrc, OddSrc;
5561 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5562 // Extract the halves of the vectors.
5563 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5564
5565 // Recognize if one half is actually undef; the matching above will
5566 // otherwise reuse the even stream for the undef one. This improves
5567 // spread(2) shuffles.
5568 bool LaneIsUndef[2] = { true, true};
5569 for (unsigned i = 0; i < Mask.size(); i++)
5570 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5571
5572 int Size = Mask.size();
5573 SDValue EvenV, OddV;
5574 if (LaneIsUndef[0]) {
5575 EvenV = DAG.getUNDEF(HalfVT);
5576 } else {
5577 assert(EvenSrc >= 0 && "Undef source?");
5578 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5579 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5580 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5581 }
5582
5583 if (LaneIsUndef[1]) {
5584 OddV = DAG.getUNDEF(HalfVT);
5585 } else {
5586 assert(OddSrc >= 0 && "Undef source?");
5587 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5588 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5589 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5590 }
5591
5592 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5593 }
5594
5595
5596 // Handle any remaining single source shuffles
5597 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5598 if (V2.isUndef()) {
5599 // We might be able to express the shuffle as a bitrotate. But even if we
5600 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5601 // shifts and a vor will have a higher throughput than a vrgather.
5602 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5603 return V;
5604
5605 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5606 // is fully covered in interleave(2) above, so it is ignored here.
5607 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5608 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5609 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5610 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5611 unsigned Index;
5612 if (isSpreadMask(Mask, Factor, Index)) {
5613 MVT NarrowVT =
5614 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5615 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5616 DAG.getVectorIdxConstant(0, DL));
5617 return getWideningSpread(Src, Factor, Index, DL, DAG);
5618 }
5619 }
5620 }
5621
5622 // Before hitting generic lowering fallbacks, try to widen the mask
5623 // to a wider SEW.
5624 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5625 return V;
5626
5627 // Can we generate a vcompress instead of a vrgather? These scale better
5628 // at high LMUL, at the cost of not being able to fold a following select
5629 // into them. The mask constants are also smaller than the index vector
5630 // constants, and thus easier to materialize.
5631 if (isCompressMask(Mask)) {
5632 SmallVector<SDValue> MaskVals(NumElts,
5633 DAG.getConstant(false, DL, XLenVT));
5634 for (auto Idx : Mask) {
5635 if (Idx == -1)
5636 break;
5637 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5638 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5639 }
5640 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5641 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5642 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5643 DAG.getUNDEF(VT));
5644 }
5645
5646 if (VT.getScalarSizeInBits() == 8 &&
5647 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5648 // On such a vector we're unable to use i8 as the index type.
5649 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5650 // may involve vector splitting if we're already at LMUL=8, or our
5651 // user-supplied maximum fixed-length LMUL.
5652 return SDValue();
5653 }
5654
5655 // Base case for the two operand recursion below - handle the worst case
5656 // single source shuffle.
5657 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5658 MVT IndexVT = VT.changeTypeToInteger();
5659 // Since we can't introduce illegal index types at this stage, use i16 and
5660 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5661 // than XLenVT.
5662 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5663 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5664 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5665 }
5666
5667 // If the mask allows, we can do all the index computation in 16 bits. This
5668 // requires less work and less register pressure at high LMUL, and creates
5669 // smaller constants which may be cheaper to materialize.
5670 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5671 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5672 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5673 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5674 }
5675
5676 MVT IndexContainerVT =
5677 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5678
5679 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5680 SmallVector<SDValue> GatherIndicesLHS;
5681 for (int MaskIndex : Mask) {
5682 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5683 GatherIndicesLHS.push_back(IsLHSIndex
5684 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5685 : DAG.getUNDEF(XLenVT));
5686 }
5687 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5688 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5689 Subtarget);
5690 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5691 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5692 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5693 }
5694
5695 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5696 // merged with a second vrgather.
5697 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5698
5699 // Now construct the mask that will be used by the blended vrgather operation.
5700 // Construct the appropriate indices into each vector.
5701 for (int MaskIndex : Mask) {
5702 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5703 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5704 ? MaskIndex : -1);
5705 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5706 }
5707
5708 // If the mask indices are disjoint between the two sources, we can lower it
5709 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5710 // operands may end up being lowered to something cheaper than a vrgather.vv.
5711 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5712 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5713 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5714 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5715 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5716 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5717 return V;
5718
5719 // Before hitting generic lowering fallbacks, try to widen the mask
5720 // to a wider SEW.
5721 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5722 return V;
5723
5724 // Try to pick a profitable operand order.
5725 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5726 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5727
5728 // Recursively invoke lowering for each operand if we had two
5729 // independent single source shuffles, and then combine the result via a
5730 // vselect. Note that the vselect will likely be folded back into the
5731 // second permute (vrgather, or other) by the post-isel combine.
5732 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5733 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5734
5735 SmallVector<SDValue> MaskVals;
5736 for (int MaskIndex : Mask) {
5737 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5738 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5739 }
5740
5741 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5742 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5743 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5744
5745 if (SwapOps)
5746 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5747 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5748}
5749
5751 // Only support legal VTs for other shuffles for now.
5752 if (!isTypeLegal(VT))
5753 return false;
5754
5755 // Support splats for any type. These should type legalize well.
5756 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5757 return true;
5758
5759 MVT SVT = VT.getSimpleVT();
5760
5761 // Not for i1 vectors.
5762 if (SVT.getScalarType() == MVT::i1)
5763 return false;
5764
5765 int Dummy1, Dummy2;
5766 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5767 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5768}
5769
5770// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5771// the exponent.
5772SDValue
5773RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5774 SelectionDAG &DAG) const {
5775 MVT VT = Op.getSimpleValueType();
5776 unsigned EltSize = VT.getScalarSizeInBits();
5777 SDValue Src = Op.getOperand(0);
5778 SDLoc DL(Op);
5779 MVT ContainerVT = VT;
5780
5781 SDValue Mask, VL;
5782 if (Op->isVPOpcode()) {
5783 Mask = Op.getOperand(1);
5784 if (VT.isFixedLengthVector())
5785 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5786 Subtarget);
5787 VL = Op.getOperand(2);
5788 }
5789
5790 // We choose FP type that can represent the value if possible. Otherwise, we
5791 // use rounding to zero conversion for correct exponent of the result.
5792 // TODO: Use f16 for i8 when possible?
5793 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5794 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5795 FloatEltVT = MVT::f32;
5796 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5797
5798 // Legal types should have been checked in the RISCVTargetLowering
5799 // constructor.
5800 // TODO: Splitting may make sense in some cases.
5801 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5802 "Expected legal float type!");
5803
5804 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5805 // The trailing zero count is equal to log2 of this single bit value.
5806 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5807 SDValue Neg = DAG.getNegative(Src, DL, VT);
5808 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5809 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5810 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5811 Src, Mask, VL);
5812 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5813 }
5814
5815 // We have a legal FP type, convert to it.
5816 SDValue FloatVal;
5817 if (FloatVT.bitsGT(VT)) {
5818 if (Op->isVPOpcode())
5819 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5820 else
5821 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5822 } else {
5823 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5824 if (VT.isFixedLengthVector()) {
5825 ContainerVT = getContainerForFixedLengthVector(VT);
5826 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5827 }
5828 if (!Op->isVPOpcode())
5829 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5830 SDValue RTZRM =
5832 MVT ContainerFloatVT =
5833 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5834 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5835 Src, Mask, RTZRM, VL);
5836 if (VT.isFixedLengthVector())
5837 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5838 }
5839 // Bitcast to integer and shift the exponent to the LSB.
5840 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5841 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5842 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5843
5844 SDValue Exp;
5845 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5846 if (Op->isVPOpcode()) {
5847 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5848 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5849 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5850 } else {
5851 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5852 DAG.getConstant(ShiftAmt, DL, IntVT));
5853 if (IntVT.bitsLT(VT))
5854 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5855 else if (IntVT.bitsGT(VT))
5856 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5857 }
5858
5859 // The exponent contains log2 of the value in biased form.
5860 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5861 // For trailing zeros, we just need to subtract the bias.
5862 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5863 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5864 DAG.getConstant(ExponentBias, DL, VT));
5865 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5866 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5867 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5868
5869 // For leading zeros, we need to remove the bias and convert from log2 to
5870 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5871 unsigned Adjust = ExponentBias + (EltSize - 1);
5872 SDValue Res;
5873 if (Op->isVPOpcode())
5874 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5875 Mask, VL);
5876 else
5877 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5878
5879 // The above result with zero input equals to Adjust which is greater than
5880 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5881 if (Op.getOpcode() == ISD::CTLZ)
5882 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5883 else if (Op.getOpcode() == ISD::VP_CTLZ)
5884 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5885 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5886 return Res;
5887}
5888
5889SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5890 SelectionDAG &DAG) const {
5891 SDLoc DL(Op);
5892 MVT XLenVT = Subtarget.getXLenVT();
5893 SDValue Source = Op->getOperand(0);
5894 MVT SrcVT = Source.getSimpleValueType();
5895 SDValue Mask = Op->getOperand(1);
5896 SDValue EVL = Op->getOperand(2);
5897
5898 if (SrcVT.isFixedLengthVector()) {
5899 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5900 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5901 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5902 Subtarget);
5903 SrcVT = ContainerVT;
5904 }
5905
5906 // Convert to boolean vector.
5907 if (SrcVT.getScalarType() != MVT::i1) {
5908 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5909 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5910 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5911 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5912 DAG.getUNDEF(SrcVT), Mask, EVL});
5913 }
5914
5915 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5916 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5917 // In this case, we can interpret poison as -1, so nothing to do further.
5918 return Res;
5919
5920 // Convert -1 to VL.
5921 SDValue SetCC =
5922 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5923 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5924 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5925}
5926
5927// While RVV has alignment restrictions, we should always be able to load as a
5928// legal equivalently-sized byte-typed vector instead. This method is
5929// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5930// the load is already correctly-aligned, it returns SDValue().
5931SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5932 SelectionDAG &DAG) const {
5933 auto *Load = cast<LoadSDNode>(Op);
5934 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5935
5937 Load->getMemoryVT(),
5938 *Load->getMemOperand()))
5939 return SDValue();
5940
5941 SDLoc DL(Op);
5942 MVT VT = Op.getSimpleValueType();
5943 unsigned EltSizeBits = VT.getScalarSizeInBits();
5944 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5945 "Unexpected unaligned RVV load type");
5946 MVT NewVT =
5947 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5948 assert(NewVT.isValid() &&
5949 "Expecting equally-sized RVV vector types to be legal");
5950 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5951 Load->getPointerInfo(), Load->getOriginalAlign(),
5952 Load->getMemOperand()->getFlags());
5953 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5954}
5955
5956// While RVV has alignment restrictions, we should always be able to store as a
5957// legal equivalently-sized byte-typed vector instead. This method is
5958// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5959// returns SDValue() if the store is already correctly aligned.
5960SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5961 SelectionDAG &DAG) const {
5962 auto *Store = cast<StoreSDNode>(Op);
5963 assert(Store && Store->getValue().getValueType().isVector() &&
5964 "Expected vector store");
5965
5967 Store->getMemoryVT(),
5968 *Store->getMemOperand()))
5969 return SDValue();
5970
5971 SDLoc DL(Op);
5972 SDValue StoredVal = Store->getValue();
5973 MVT VT = StoredVal.getSimpleValueType();
5974 unsigned EltSizeBits = VT.getScalarSizeInBits();
5975 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5976 "Unexpected unaligned RVV store type");
5977 MVT NewVT =
5978 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5979 assert(NewVT.isValid() &&
5980 "Expecting equally-sized RVV vector types to be legal");
5981 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5982 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5983 Store->getPointerInfo(), Store->getOriginalAlign(),
5984 Store->getMemOperand()->getFlags());
5985}
5986
5988 const RISCVSubtarget &Subtarget) {
5989 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5990
5991 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5992
5993 // All simm32 constants should be handled by isel.
5994 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5995 // this check redundant, but small immediates are common so this check
5996 // should have better compile time.
5997 if (isInt<32>(Imm))
5998 return Op;
5999
6000 // We only need to cost the immediate, if constant pool lowering is enabled.
6001 if (!Subtarget.useConstantPoolForLargeInts())
6002 return Op;
6003
6005 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6006 return Op;
6007
6008 // Optimizations below are disabled for opt size. If we're optimizing for
6009 // size, use a constant pool.
6010 if (DAG.shouldOptForSize())
6011 return SDValue();
6012
6013 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6014 // that if it will avoid a constant pool.
6015 // It will require an extra temporary register though.
6016 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6017 // low and high 32 bits are the same and bit 31 and 63 are set.
6018 unsigned ShiftAmt, AddOpc;
6019 RISCVMatInt::InstSeq SeqLo =
6020 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6021 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6022 return Op;
6023
6024 return SDValue();
6025}
6026
6027SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6028 SelectionDAG &DAG) const {
6029 MVT VT = Op.getSimpleValueType();
6030 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6031
6032 // Can this constant be selected by a Zfa FLI instruction?
6033 bool Negate = false;
6034 int Index = getLegalZfaFPImm(Imm, VT);
6035
6036 // If the constant is negative, try negating.
6037 if (Index < 0 && Imm.isNegative()) {
6038 Index = getLegalZfaFPImm(-Imm, VT);
6039 Negate = true;
6040 }
6041
6042 // If we couldn't find a FLI lowering, fall back to generic code.
6043 if (Index < 0)
6044 return SDValue();
6045
6046 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6047 SDLoc DL(Op);
6048 SDValue Const =
6049 DAG.getNode(RISCVISD::FLI, DL, VT,
6050 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6051 if (!Negate)
6052 return Const;
6053
6054 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6055}
6056
6058 const RISCVSubtarget &Subtarget) {
6059 SDLoc dl(Op);
6060 AtomicOrdering FenceOrdering =
6061 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6062 SyncScope::ID FenceSSID =
6063 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6064
6065 if (Subtarget.hasStdExtZtso()) {
6066 // The only fence that needs an instruction is a sequentially-consistent
6067 // cross-thread fence.
6068 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6069 FenceSSID == SyncScope::System)
6070 return Op;
6071
6072 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6073 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6074 }
6075
6076 // singlethread fences only synchronize with signal handlers on the same
6077 // thread and thus only need to preserve instruction order, not actually
6078 // enforce memory ordering.
6079 if (FenceSSID == SyncScope::SingleThread)
6080 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6081 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6082
6083 return Op;
6084}
6085
6086SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6087 SelectionDAG &DAG) const {
6088 SDLoc DL(Op);
6089 MVT VT = Op.getSimpleValueType();
6090 MVT XLenVT = Subtarget.getXLenVT();
6091 unsigned Check = Op.getConstantOperandVal(1);
6092 unsigned TDCMask = 0;
6093 if (Check & fcSNan)
6094 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6095 if (Check & fcQNan)
6096 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6097 if (Check & fcPosInf)
6099 if (Check & fcNegInf)
6101 if (Check & fcPosNormal)
6103 if (Check & fcNegNormal)
6105 if (Check & fcPosSubnormal)
6107 if (Check & fcNegSubnormal)
6109 if (Check & fcPosZero)
6110 TDCMask |= RISCV::FPMASK_Positive_Zero;
6111 if (Check & fcNegZero)
6112 TDCMask |= RISCV::FPMASK_Negative_Zero;
6113
6114 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6115
6116 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6117
6118 if (VT.isVector()) {
6119 SDValue Op0 = Op.getOperand(0);
6120 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6121
6122 if (VT.isScalableVector()) {
6124 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6125 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6126 Mask = Op.getOperand(2);
6127 VL = Op.getOperand(3);
6128 }
6129 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6130 VL, Op->getFlags());
6131 if (IsOneBitMask)
6132 return DAG.getSetCC(DL, VT, FPCLASS,
6133 DAG.getConstant(TDCMask, DL, DstVT),
6135 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6136 DAG.getConstant(TDCMask, DL, DstVT));
6137 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6138 ISD::SETNE);
6139 }
6140
6141 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6142 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6143 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6144 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6145 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6146 Mask = Op.getOperand(2);
6147 MVT MaskContainerVT =
6148 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6149 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6150 VL = Op.getOperand(3);
6151 }
6152 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6153
6154 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6155 Mask, VL, Op->getFlags());
6156
6157 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6158 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6159 if (IsOneBitMask) {
6160 SDValue VMSEQ =
6161 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6162 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6163 DAG.getUNDEF(ContainerVT), Mask, VL});
6164 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6165 }
6166 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6167 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6168
6169 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6170 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6171 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6172
6173 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6174 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6175 DAG.getUNDEF(ContainerVT), Mask, VL});
6176 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6177 }
6178
6179 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6180 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6181 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6183 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6184}
6185
6186// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6187// operations propagate nans.
6189 const RISCVSubtarget &Subtarget) {
6190 SDLoc DL(Op);
6191 MVT VT = Op.getSimpleValueType();
6192
6193 SDValue X = Op.getOperand(0);
6194 SDValue Y = Op.getOperand(1);
6195
6196 if (!VT.isVector()) {
6197 MVT XLenVT = Subtarget.getXLenVT();
6198
6199 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6200 // ensures that when one input is a nan, the other will also be a nan
6201 // allowing the nan to propagate. If both inputs are nan, this will swap the
6202 // inputs which is harmless.
6203
6204 SDValue NewY = Y;
6205 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6206 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6207 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6208 }
6209
6210 SDValue NewX = X;
6211 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6212 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6213 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6214 }
6215
6216 unsigned Opc =
6217 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6218 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6219 }
6220
6221 // Check no NaNs before converting to fixed vector scalable.
6222 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6223 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6224
6225 MVT ContainerVT = VT;
6226 if (VT.isFixedLengthVector()) {
6227 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6228 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6229 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6230 }
6231
6232 SDValue Mask, VL;
6233 if (Op->isVPOpcode()) {
6234 Mask = Op.getOperand(2);
6235 if (VT.isFixedLengthVector())
6236 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6237 Subtarget);
6238 VL = Op.getOperand(3);
6239 } else {
6240 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6241 }
6242
6243 SDValue NewY = Y;
6244 if (!XIsNeverNan) {
6245 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6246 {X, X, DAG.getCondCode(ISD::SETOEQ),
6247 DAG.getUNDEF(ContainerVT), Mask, VL});
6248 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6249 DAG.getUNDEF(ContainerVT), VL);
6250 }
6251
6252 SDValue NewX = X;
6253 if (!YIsNeverNan) {
6254 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6255 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6256 DAG.getUNDEF(ContainerVT), Mask, VL});
6257 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6258 DAG.getUNDEF(ContainerVT), VL);
6259 }
6260
6261 unsigned Opc =
6262 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6265 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6266 DAG.getUNDEF(ContainerVT), Mask, VL);
6267 if (VT.isFixedLengthVector())
6268 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6269 return Res;
6270}
6271
6273 const RISCVSubtarget &Subtarget) {
6274 bool IsFABS = Op.getOpcode() == ISD::FABS;
6275 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6276 "Wrong opcode for lowering FABS or FNEG.");
6277
6278 MVT XLenVT = Subtarget.getXLenVT();
6279 MVT VT = Op.getSimpleValueType();
6280 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6281
6282 SDLoc DL(Op);
6283 SDValue Fmv =
6284 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6285
6286 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6287 Mask = Mask.sext(Subtarget.getXLen());
6288
6289 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6290 SDValue Logic =
6291 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6292 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6293}
6294
6296 const RISCVSubtarget &Subtarget) {
6297 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6298
6299 MVT XLenVT = Subtarget.getXLenVT();
6300 MVT VT = Op.getSimpleValueType();
6301 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6302
6303 SDValue Mag = Op.getOperand(0);
6304 SDValue Sign = Op.getOperand(1);
6305
6306 SDLoc DL(Op);
6307
6308 // Get sign bit into an integer value.
6309 SDValue SignAsInt;
6310 unsigned SignSize = Sign.getValueSizeInBits();
6311 if (SignSize == Subtarget.getXLen()) {
6312 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6313 } else if (SignSize == 16) {
6314 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6315 } else if (SignSize == 32) {
6316 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6317 } else if (SignSize == 64) {
6318 assert(XLenVT == MVT::i32 && "Unexpected type");
6319 // Copy the upper word to integer.
6320 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6321 .getValue(1);
6322 SignSize = 32;
6323 } else
6324 llvm_unreachable("Unexpected sign size");
6325
6326 // Get the signbit at the right position for MagAsInt.
6327 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6328 if (ShiftAmount > 0) {
6329 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6330 DAG.getConstant(ShiftAmount, DL, XLenVT));
6331 } else if (ShiftAmount < 0) {
6332 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6333 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6334 }
6335
6336 // Mask the sign bit and any bits above it. The extra bits will be dropped
6337 // when we convert back to FP.
6338 SDValue SignMask = DAG.getConstant(
6339 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6340 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6341
6342 // Transform Mag value to integer, and clear the sign bit.
6343 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6344 SDValue ClearSignMask = DAG.getConstant(
6345 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6346 SDValue ClearedSign =
6347 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6348
6349 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6351
6352 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6353}
6354
6355/// Get a RISC-V target specified VL op for a given SDNode.
6356static unsigned getRISCVVLOp(SDValue Op) {
6357#define OP_CASE(NODE) \
6358 case ISD::NODE: \
6359 return RISCVISD::NODE##_VL;
6360#define VP_CASE(NODE) \
6361 case ISD::VP_##NODE: \
6362 return RISCVISD::NODE##_VL;
6363 // clang-format off
6364 switch (Op.getOpcode()) {
6365 default:
6366 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6367 OP_CASE(ADD)
6368 OP_CASE(SUB)
6369 OP_CASE(MUL)
6370 OP_CASE(MULHS)
6371 OP_CASE(MULHU)
6372 OP_CASE(SDIV)
6373 OP_CASE(SREM)
6374 OP_CASE(UDIV)
6375 OP_CASE(UREM)
6376 OP_CASE(SHL)
6377 OP_CASE(SRA)
6378 OP_CASE(SRL)
6379 OP_CASE(ROTL)
6380 OP_CASE(ROTR)
6381 OP_CASE(BSWAP)
6382 OP_CASE(CTTZ)
6383 OP_CASE(CTLZ)
6384 OP_CASE(CTPOP)
6385 OP_CASE(BITREVERSE)
6386 OP_CASE(SADDSAT)
6387 OP_CASE(UADDSAT)
6388 OP_CASE(SSUBSAT)
6389 OP_CASE(USUBSAT)
6390 OP_CASE(AVGFLOORS)
6391 OP_CASE(AVGFLOORU)
6392 OP_CASE(AVGCEILS)
6393 OP_CASE(AVGCEILU)
6394 OP_CASE(FADD)
6395 OP_CASE(FSUB)
6396 OP_CASE(FMUL)
6397 OP_CASE(FDIV)
6398 OP_CASE(FNEG)
6399 OP_CASE(FABS)
6400 OP_CASE(FSQRT)
6401 OP_CASE(SMIN)
6402 OP_CASE(SMAX)
6403 OP_CASE(UMIN)
6404 OP_CASE(UMAX)
6405 OP_CASE(STRICT_FADD)
6406 OP_CASE(STRICT_FSUB)
6407 OP_CASE(STRICT_FMUL)
6408 OP_CASE(STRICT_FDIV)
6409 OP_CASE(STRICT_FSQRT)
6410 VP_CASE(ADD) // VP_ADD
6411 VP_CASE(SUB) // VP_SUB
6412 VP_CASE(MUL) // VP_MUL
6413 VP_CASE(SDIV) // VP_SDIV
6414 VP_CASE(SREM) // VP_SREM
6415 VP_CASE(UDIV) // VP_UDIV
6416 VP_CASE(UREM) // VP_UREM
6417 VP_CASE(SHL) // VP_SHL
6418 VP_CASE(FADD) // VP_FADD
6419 VP_CASE(FSUB) // VP_FSUB
6420 VP_CASE(FMUL) // VP_FMUL
6421 VP_CASE(FDIV) // VP_FDIV
6422 VP_CASE(FNEG) // VP_FNEG
6423 VP_CASE(FABS) // VP_FABS
6424 VP_CASE(SMIN) // VP_SMIN
6425 VP_CASE(SMAX) // VP_SMAX
6426 VP_CASE(UMIN) // VP_UMIN
6427 VP_CASE(UMAX) // VP_UMAX
6428 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6429 VP_CASE(SETCC) // VP_SETCC
6430 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6431 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6432 VP_CASE(BITREVERSE) // VP_BITREVERSE
6433 VP_CASE(SADDSAT) // VP_SADDSAT
6434 VP_CASE(UADDSAT) // VP_UADDSAT
6435 VP_CASE(SSUBSAT) // VP_SSUBSAT
6436 VP_CASE(USUBSAT) // VP_USUBSAT
6437 VP_CASE(BSWAP) // VP_BSWAP
6438 VP_CASE(CTLZ) // VP_CTLZ
6439 VP_CASE(CTTZ) // VP_CTTZ
6440 VP_CASE(CTPOP) // VP_CTPOP
6442 case ISD::VP_CTLZ_ZERO_UNDEF:
6443 return RISCVISD::CTLZ_VL;
6445 case ISD::VP_CTTZ_ZERO_UNDEF:
6446 return RISCVISD::CTTZ_VL;
6447 case ISD::FMA:
6448 case ISD::VP_FMA:
6449 return RISCVISD::VFMADD_VL;
6450 case ISD::STRICT_FMA:
6452 case ISD::AND:
6453 case ISD::VP_AND:
6454 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6455 return RISCVISD::VMAND_VL;
6456 return RISCVISD::AND_VL;
6457 case ISD::OR:
6458 case ISD::VP_OR:
6459 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6460 return RISCVISD::VMOR_VL;
6461 return RISCVISD::OR_VL;
6462 case ISD::XOR:
6463 case ISD::VP_XOR:
6464 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6465 return RISCVISD::VMXOR_VL;
6466 return RISCVISD::XOR_VL;
6467 case ISD::VP_SELECT:
6468 case ISD::VP_MERGE:
6469 return RISCVISD::VMERGE_VL;
6470 case ISD::VP_SRA:
6471 return RISCVISD::SRA_VL;
6472 case ISD::VP_SRL:
6473 return RISCVISD::SRL_VL;
6474 case ISD::VP_SQRT:
6475 return RISCVISD::FSQRT_VL;
6476 case ISD::VP_SIGN_EXTEND:
6477 return RISCVISD::VSEXT_VL;
6478 case ISD::VP_ZERO_EXTEND:
6479 return RISCVISD::VZEXT_VL;
6480 case ISD::VP_FP_TO_SINT:
6482 case ISD::VP_FP_TO_UINT:
6484 case ISD::FMINNUM:
6485 case ISD::VP_FMINNUM:
6486 return RISCVISD::VFMIN_VL;
6487 case ISD::FMAXNUM:
6488 case ISD::VP_FMAXNUM:
6489 return RISCVISD::VFMAX_VL;
6490 case ISD::LRINT:
6491 case ISD::VP_LRINT:
6492 case ISD::LLRINT:
6493 case ISD::VP_LLRINT:
6495 }
6496 // clang-format on
6497#undef OP_CASE
6498#undef VP_CASE
6499}
6500
6501/// Return true if a RISC-V target specified op has a passthru operand.
6502static bool hasPassthruOp(unsigned Opcode) {
6503 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6505 "not a RISC-V target specific op");
6506 static_assert(
6509 "adding target specific op should update this function");
6510 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6511 return true;
6512 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6513 return true;
6514 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6515 return true;
6516 if (Opcode == RISCVISD::SETCC_VL)
6517 return true;
6518 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6519 return true;
6520 if (Opcode == RISCVISD::VMERGE_VL)
6521 return true;
6522 return false;
6523}
6524
6525/// Return true if a RISC-V target specified op has a mask operand.
6526static bool hasMaskOp(unsigned Opcode) {
6527 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6529 "not a RISC-V target specific op");
6530 static_assert(
6533 "adding target specific op should update this function");
6534 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6535 return true;
6536 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6537 return true;
6538 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6540 return true;
6541 return false;
6542}
6543
6545 const RISCVSubtarget &Subtarget) {
6546 if (Op.getValueType() == MVT::nxv32f16 &&
6547 (Subtarget.hasVInstructionsF16Minimal() &&
6548 !Subtarget.hasVInstructionsF16()))
6549 return true;
6550 if (Op.getValueType() == MVT::nxv32bf16)
6551 return true;
6552 return false;
6553}
6554
6556 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6557 SDLoc DL(Op);
6558
6561
6562 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6563 if (!Op.getOperand(j).getValueType().isVector()) {
6564 LoOperands[j] = Op.getOperand(j);
6565 HiOperands[j] = Op.getOperand(j);
6566 continue;
6567 }
6568 std::tie(LoOperands[j], HiOperands[j]) =
6569 DAG.SplitVector(Op.getOperand(j), DL);
6570 }
6571
6572 SDValue LoRes =
6573 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6574 SDValue HiRes =
6575 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6576
6577 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6578}
6579
6581 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6582 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6583 SDLoc DL(Op);
6584
6587
6588 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6589 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6590 std::tie(LoOperands[j], HiOperands[j]) =
6591 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6592 continue;
6593 }
6594 if (!Op.getOperand(j).getValueType().isVector()) {
6595 LoOperands[j] = Op.getOperand(j);
6596 HiOperands[j] = Op.getOperand(j);
6597 continue;
6598 }
6599 std::tie(LoOperands[j], HiOperands[j]) =
6600 DAG.SplitVector(Op.getOperand(j), DL);
6601 }
6602
6603 SDValue LoRes =
6604 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6605 SDValue HiRes =
6606 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6607
6608 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6609}
6610
6612 SDLoc DL(Op);
6613
6614 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6615 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6616 auto [EVLLo, EVLHi] =
6617 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6618
6619 SDValue ResLo =
6620 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6621 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6622 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6623 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6624}
6625
6627
6628 assert(Op->isStrictFPOpcode());
6629
6630 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6631
6632 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6633 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6634
6635 SDLoc DL(Op);
6636
6639
6640 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6641 if (!Op.getOperand(j).getValueType().isVector()) {
6642 LoOperands[j] = Op.getOperand(j);
6643 HiOperands[j] = Op.getOperand(j);
6644 continue;
6645 }
6646 std::tie(LoOperands[j], HiOperands[j]) =
6647 DAG.SplitVector(Op.getOperand(j), DL);
6648 }
6649
6650 SDValue LoRes =
6651 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6652 HiOperands[0] = LoRes.getValue(1);
6653 SDValue HiRes =
6654 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6655
6656 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6657 LoRes.getValue(0), HiRes.getValue(0));
6658 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6659}
6660
6662 SelectionDAG &DAG) const {
6663 switch (Op.getOpcode()) {
6664 default:
6665 report_fatal_error("unimplemented operand");
6666 case ISD::ATOMIC_FENCE:
6667 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6668 case ISD::GlobalAddress:
6669 return lowerGlobalAddress(Op, DAG);
6670 case ISD::BlockAddress:
6671 return lowerBlockAddress(Op, DAG);
6672 case ISD::ConstantPool:
6673 return lowerConstantPool(Op, DAG);
6674 case ISD::JumpTable:
6675 return lowerJumpTable(Op, DAG);
6677 return lowerGlobalTLSAddress(Op, DAG);
6678 case ISD::Constant:
6679 return lowerConstant(Op, DAG, Subtarget);
6680 case ISD::ConstantFP:
6681 return lowerConstantFP(Op, DAG);
6682 case ISD::SELECT:
6683 return lowerSELECT(Op, DAG);
6684 case ISD::BRCOND:
6685 return lowerBRCOND(Op, DAG);
6686 case ISD::VASTART:
6687 return lowerVASTART(Op, DAG);
6688 case ISD::FRAMEADDR:
6689 return lowerFRAMEADDR(Op, DAG);
6690 case ISD::RETURNADDR:
6691 return lowerRETURNADDR(Op, DAG);
6692 case ISD::SHL_PARTS:
6693 return lowerShiftLeftParts(Op, DAG);
6694 case ISD::SRA_PARTS:
6695 return lowerShiftRightParts(Op, DAG, true);
6696 case ISD::SRL_PARTS:
6697 return lowerShiftRightParts(Op, DAG, false);
6698 case ISD::ROTL:
6699 case ISD::ROTR:
6700 if (Op.getValueType().isFixedLengthVector()) {
6701 assert(Subtarget.hasStdExtZvkb());
6702 return lowerToScalableOp(Op, DAG);
6703 }
6704 assert(Subtarget.hasVendorXTHeadBb() &&
6705 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6706 "Unexpected custom legalization");
6707 // XTHeadBb only supports rotate by constant.
6708 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6709 return SDValue();
6710 return Op;
6711 case ISD::BITCAST: {
6712 SDLoc DL(Op);
6713 EVT VT = Op.getValueType();
6714 SDValue Op0 = Op.getOperand(0);
6715 EVT Op0VT = Op0.getValueType();
6716 MVT XLenVT = Subtarget.getXLenVT();
6717 if (Op0VT == MVT::i16 &&
6718 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6719 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6720 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6721 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6722 }
6723 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6724 Subtarget.hasStdExtFOrZfinx()) {
6725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6726 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6727 }
6728 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6729 Subtarget.hasStdExtDOrZdinx()) {
6730 SDValue Lo, Hi;
6731 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6732 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6733 }
6734
6735 // Consider other scalar<->scalar casts as legal if the types are legal.
6736 // Otherwise expand them.
6737 if (!VT.isVector() && !Op0VT.isVector()) {
6738 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6739 return Op;
6740 return SDValue();
6741 }
6742
6743 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6744 "Unexpected types");
6745
6746 if (VT.isFixedLengthVector()) {
6747 // We can handle fixed length vector bitcasts with a simple replacement
6748 // in isel.
6749 if (Op0VT.isFixedLengthVector())
6750 return Op;
6751 // When bitcasting from scalar to fixed-length vector, insert the scalar
6752 // into a one-element vector of the result type, and perform a vector
6753 // bitcast.
6754 if (!Op0VT.isVector()) {
6755 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6756 if (!isTypeLegal(BVT))
6757 return SDValue();
6758 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6759 DAG.getUNDEF(BVT), Op0,
6760 DAG.getVectorIdxConstant(0, DL)));
6761 }
6762 return SDValue();
6763 }
6764 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6765 // thus: bitcast the vector to a one-element vector type whose element type
6766 // is the same as the result type, and extract the first element.
6767 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6768 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6769 if (!isTypeLegal(BVT))
6770 return SDValue();
6771 SDValue BVec = DAG.getBitcast(BVT, Op0);
6772 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6773 DAG.getVectorIdxConstant(0, DL));
6774 }
6775 return SDValue();
6776 }
6778 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6780 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6782 return LowerINTRINSIC_VOID(Op, DAG);
6783 case ISD::IS_FPCLASS:
6784 return LowerIS_FPCLASS(Op, DAG);
6785 case ISD::BITREVERSE: {
6786 MVT VT = Op.getSimpleValueType();
6787 if (VT.isFixedLengthVector()) {
6788 assert(Subtarget.hasStdExtZvbb());
6789 return lowerToScalableOp(Op, DAG);
6790 }
6791 SDLoc DL(Op);
6792 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6793 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6794 // Expand bitreverse to a bswap(rev8) followed by brev8.
6795 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6796 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6797 }
6798 case ISD::TRUNCATE:
6801 // Only custom-lower vector truncates
6802 if (!Op.getSimpleValueType().isVector())
6803 return Op;
6804 return lowerVectorTruncLike(Op, DAG);
6805 case ISD::ANY_EXTEND:
6806 case ISD::ZERO_EXTEND:
6807 if (Op.getOperand(0).getValueType().isVector() &&
6808 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6809 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6810 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6811 case ISD::SIGN_EXTEND:
6812 if (Op.getOperand(0).getValueType().isVector() &&
6813 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6814 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6815 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6817 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6819 return lowerINSERT_VECTOR_ELT(Op, DAG);
6821 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6822 case ISD::SCALAR_TO_VECTOR: {
6823 MVT VT = Op.getSimpleValueType();
6824 SDLoc DL(Op);
6825 SDValue Scalar = Op.getOperand(0);
6826 if (VT.getVectorElementType() == MVT::i1) {
6827 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6828 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6829 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6830 }
6831 MVT ContainerVT = VT;
6832 if (VT.isFixedLengthVector())
6833 ContainerVT = getContainerForFixedLengthVector(VT);
6834 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6835
6836 SDValue V;
6837 if (VT.isFloatingPoint()) {
6838 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6839 DAG.getUNDEF(ContainerVT), Scalar, VL);
6840 } else {
6841 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6842 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6843 DAG.getUNDEF(ContainerVT), Scalar, VL);
6844 }
6845 if (VT.isFixedLengthVector())
6846 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6847 return V;
6848 }
6849 case ISD::VSCALE: {
6850 MVT XLenVT = Subtarget.getXLenVT();
6851 MVT VT = Op.getSimpleValueType();
6852 SDLoc DL(Op);
6853 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6854 // We define our scalable vector types for lmul=1 to use a 64 bit known
6855 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6856 // vscale as VLENB / 8.
6857 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6858 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6859 report_fatal_error("Support for VLEN==32 is incomplete.");
6860 // We assume VLENB is a multiple of 8. We manually choose the best shift
6861 // here because SimplifyDemandedBits isn't always able to simplify it.
6862 uint64_t Val = Op.getConstantOperandVal(0);
6863 if (isPowerOf2_64(Val)) {
6864 uint64_t Log2 = Log2_64(Val);
6865 if (Log2 < 3)
6866 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6867 DAG.getConstant(3 - Log2, DL, VT));
6868 else if (Log2 > 3)
6869 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6870 DAG.getConstant(Log2 - 3, DL, XLenVT));
6871 } else if ((Val % 8) == 0) {
6872 // If the multiplier is a multiple of 8, scale it down to avoid needing
6873 // to shift the VLENB value.
6874 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6875 DAG.getConstant(Val / 8, DL, XLenVT));
6876 } else {
6877 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6878 DAG.getConstant(3, DL, XLenVT));
6879 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6880 DAG.getConstant(Val, DL, XLenVT));
6881 }
6882 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6883 }
6884 case ISD::FPOWI: {
6885 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6886 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6887 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6888 Op.getOperand(1).getValueType() == MVT::i32) {
6889 SDLoc DL(Op);
6890 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6891 SDValue Powi =
6892 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6893 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6894 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6895 }
6896 return SDValue();
6897 }
6898 case ISD::FMAXIMUM:
6899 case ISD::FMINIMUM:
6900 if (isPromotedOpNeedingSplit(Op, Subtarget))
6901 return SplitVectorOp(Op, DAG);
6902 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6903 case ISD::FP_EXTEND:
6904 case ISD::FP_ROUND:
6905 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6908 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6909 case ISD::SINT_TO_FP:
6910 case ISD::UINT_TO_FP:
6911 if (Op.getValueType().isVector() &&
6912 ((Op.getValueType().getScalarType() == MVT::f16 &&
6913 (Subtarget.hasVInstructionsF16Minimal() &&
6914 !Subtarget.hasVInstructionsF16())) ||
6915 Op.getValueType().getScalarType() == MVT::bf16)) {
6916 if (isPromotedOpNeedingSplit(Op, Subtarget))
6917 return SplitVectorOp(Op, DAG);
6918 // int -> f32
6919 SDLoc DL(Op);
6920 MVT NVT =
6921 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6922 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6923 // f32 -> [b]f16
6924 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6925 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6926 }
6927 [[fallthrough]];
6928 case ISD::FP_TO_SINT:
6929 case ISD::FP_TO_UINT:
6930 if (SDValue Op1 = Op.getOperand(0);
6931 Op1.getValueType().isVector() &&
6932 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6933 (Subtarget.hasVInstructionsF16Minimal() &&
6934 !Subtarget.hasVInstructionsF16())) ||
6935 Op1.getValueType().getScalarType() == MVT::bf16)) {
6936 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6937 return SplitVectorOp(Op, DAG);
6938 // [b]f16 -> f32
6939 SDLoc DL(Op);
6940 MVT NVT = MVT::getVectorVT(MVT::f32,
6941 Op1.getValueType().getVectorElementCount());
6942 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6943 // f32 -> int
6944 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6945 }
6946 [[fallthrough]];
6951 // RVV can only do fp<->int conversions to types half/double the size as
6952 // the source. We custom-lower any conversions that do two hops into
6953 // sequences.
6954 MVT VT = Op.getSimpleValueType();
6955 if (VT.isScalarInteger())
6956 return lowerFP_TO_INT(Op, DAG, Subtarget);
6957 bool IsStrict = Op->isStrictFPOpcode();
6958 SDValue Src = Op.getOperand(0 + IsStrict);
6959 MVT SrcVT = Src.getSimpleValueType();
6960 if (SrcVT.isScalarInteger())
6961 return lowerINT_TO_FP(Op, DAG, Subtarget);
6962 if (!VT.isVector())
6963 return Op;
6964 SDLoc DL(Op);
6965 MVT EltVT = VT.getVectorElementType();
6966 MVT SrcEltVT = SrcVT.getVectorElementType();
6967 unsigned EltSize = EltVT.getSizeInBits();
6968 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6969 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6970 "Unexpected vector element types");
6971
6972 bool IsInt2FP = SrcEltVT.isInteger();
6973 // Widening conversions
6974 if (EltSize > (2 * SrcEltSize)) {
6975 if (IsInt2FP) {
6976 // Do a regular integer sign/zero extension then convert to float.
6977 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6979 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6980 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6983 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6984 if (IsStrict)
6985 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6986 Op.getOperand(0), Ext);
6987 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6988 }
6989 // FP2Int
6990 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6991 // Do one doubling fp_extend then complete the operation by converting
6992 // to int.
6993 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6994 if (IsStrict) {
6995 auto [FExt, Chain] =
6996 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6997 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6998 }
6999 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7000 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7001 }
7002
7003 // Narrowing conversions
7004 if (SrcEltSize > (2 * EltSize)) {
7005 if (IsInt2FP) {
7006 // One narrowing int_to_fp, then an fp_round.
7007 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7008 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7009 if (IsStrict) {
7010 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7011 DAG.getVTList(InterimFVT, MVT::Other),
7012 Op.getOperand(0), Src);
7013 SDValue Chain = Int2FP.getValue(1);
7014 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7015 }
7016 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7017 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7018 }
7019 // FP2Int
7020 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7021 // representable by the integer, the result is poison.
7022 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7024 if (IsStrict) {
7025 SDValue FP2Int =
7026 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7027 Op.getOperand(0), Src);
7028 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7029 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7030 }
7031 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7032 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7033 }
7034
7035 // Scalable vectors can exit here. Patterns will handle equally-sized
7036 // conversions halving/doubling ones.
7037 if (!VT.isFixedLengthVector())
7038 return Op;
7039
7040 // For fixed-length vectors we lower to a custom "VL" node.
7041 unsigned RVVOpc = 0;
7042 switch (Op.getOpcode()) {
7043 default:
7044 llvm_unreachable("Impossible opcode");
7045 case ISD::FP_TO_SINT:
7047 break;
7048 case ISD::FP_TO_UINT:
7050 break;
7051 case ISD::SINT_TO_FP:
7052 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7053 break;
7054 case ISD::UINT_TO_FP:
7055 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7056 break;
7059 break;
7062 break;
7065 break;
7068 break;
7069 }
7070
7071 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7072 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7073 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7074 "Expected same element count");
7075
7076 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7077
7078 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7079 if (IsStrict) {
7080 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7081 Op.getOperand(0), Src, Mask, VL);
7082 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7083 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7084 }
7085 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7086 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7087 }
7090 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7091 case ISD::FP_TO_BF16: {
7092 // Custom lower to ensure the libcall return is passed in an FPR on hard
7093 // float ABIs.
7094 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7095 SDLoc DL(Op);
7096 MakeLibCallOptions CallOptions;
7097 RTLIB::Libcall LC =
7098 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7099 SDValue Res =
7100 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7101 if (Subtarget.is64Bit())
7102 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7103 return DAG.getBitcast(MVT::i32, Res);
7104 }
7105 case ISD::BF16_TO_FP: {
7106 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7107 MVT VT = Op.getSimpleValueType();
7108 SDLoc DL(Op);
7109 Op = DAG.getNode(
7110 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7111 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7112 SDValue Res = Subtarget.is64Bit()
7113 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7114 : DAG.getBitcast(MVT::f32, Op);
7115 // fp_extend if the target VT is bigger than f32.
7116 if (VT != MVT::f32)
7117 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7118 return Res;
7119 }
7121 case ISD::FP_TO_FP16: {
7122 // Custom lower to ensure the libcall return is passed in an FPR on hard
7123 // float ABIs.
7124 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7125 SDLoc DL(Op);
7126 MakeLibCallOptions CallOptions;
7127 bool IsStrict = Op->isStrictFPOpcode();
7128 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7129 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7130 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7131 SDValue Res;
7132 std::tie(Res, Chain) =
7133 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7134 if (Subtarget.is64Bit())
7135 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7136 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7137 if (IsStrict)
7138 return DAG.getMergeValues({Result, Chain}, DL);
7139 return Result;
7140 }
7142 case ISD::FP16_TO_FP: {
7143 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7144 // float ABIs.
7145 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7146 SDLoc DL(Op);
7147 MakeLibCallOptions CallOptions;
7148 bool IsStrict = Op->isStrictFPOpcode();
7149 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7150 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7151 SDValue Arg = Subtarget.is64Bit()
7152 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7153 : DAG.getBitcast(MVT::f32, Op0);
7154 SDValue Res;
7155 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7156 CallOptions, DL, Chain);
7157 if (IsStrict)
7158 return DAG.getMergeValues({Res, Chain}, DL);
7159 return Res;
7160 }
7161 case ISD::FTRUNC:
7162 case ISD::FCEIL:
7163 case ISD::FFLOOR:
7164 case ISD::FNEARBYINT:
7165 case ISD::FRINT:
7166 case ISD::FROUND:
7167 case ISD::FROUNDEVEN:
7168 if (isPromotedOpNeedingSplit(Op, Subtarget))
7169 return SplitVectorOp(Op, DAG);
7170 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7171 case ISD::LRINT:
7172 case ISD::LLRINT:
7173 if (Op.getValueType().isVector())
7174 return lowerVectorXRINT(Op, DAG, Subtarget);
7175 [[fallthrough]];
7176 case ISD::LROUND:
7177 case ISD::LLROUND: {
7178 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7179 "Unexpected custom legalisation");
7180 SDLoc DL(Op);
7181 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7182 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7183 }
7184 case ISD::STRICT_LRINT:
7185 case ISD::STRICT_LLRINT:
7186 case ISD::STRICT_LROUND:
7187 case ISD::STRICT_LLROUND: {
7188 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7189 "Unexpected custom legalisation");
7190 SDLoc DL(Op);
7191 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7192 {Op.getOperand(0), Op.getOperand(1)});
7193 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7194 {Ext.getValue(1), Ext.getValue(0)});
7195 }
7196 case ISD::VECREDUCE_ADD:
7201 return lowerVECREDUCE(Op, DAG);
7202 case ISD::VECREDUCE_AND:
7203 case ISD::VECREDUCE_OR:
7204 case ISD::VECREDUCE_XOR:
7205 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7206 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7207 return lowerVECREDUCE(Op, DAG);
7214 return lowerFPVECREDUCE(Op, DAG);
7215 case ISD::VP_REDUCE_ADD:
7216 case ISD::VP_REDUCE_UMAX:
7217 case ISD::VP_REDUCE_SMAX:
7218 case ISD::VP_REDUCE_UMIN:
7219 case ISD::VP_REDUCE_SMIN:
7220 case ISD::VP_REDUCE_FADD:
7221 case ISD::VP_REDUCE_SEQ_FADD:
7222 case ISD::VP_REDUCE_FMIN:
7223 case ISD::VP_REDUCE_FMAX:
7224 case ISD::VP_REDUCE_FMINIMUM:
7225 case ISD::VP_REDUCE_FMAXIMUM:
7226 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7227 return SplitVectorReductionOp(Op, DAG);
7228 return lowerVPREDUCE(Op, DAG);
7229 case ISD::VP_REDUCE_AND:
7230 case ISD::VP_REDUCE_OR:
7231 case ISD::VP_REDUCE_XOR:
7232 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7233 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7234 return lowerVPREDUCE(Op, DAG);
7235 case ISD::VP_CTTZ_ELTS:
7236 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7237 return lowerVPCttzElements(Op, DAG);
7238 case ISD::UNDEF: {
7239 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7240 return convertFromScalableVector(Op.getSimpleValueType(),
7241 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7242 }
7244 return lowerINSERT_SUBVECTOR(Op, DAG);
7246 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7248 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7250 return lowerVECTOR_INTERLEAVE(Op, DAG);
7251 case ISD::STEP_VECTOR:
7252 return lowerSTEP_VECTOR(Op, DAG);
7254 return lowerVECTOR_REVERSE(Op, DAG);
7255 case ISD::VECTOR_SPLICE:
7256 return lowerVECTOR_SPLICE(Op, DAG);
7257 case ISD::BUILD_VECTOR:
7258 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7259 case ISD::SPLAT_VECTOR: {
7260 MVT VT = Op.getSimpleValueType();
7261 MVT EltVT = VT.getVectorElementType();
7262 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7263 EltVT == MVT::bf16) {
7264 SDLoc DL(Op);
7265 SDValue Elt;
7266 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7267 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7268 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7269 Op.getOperand(0));
7270 else
7271 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7272 MVT IVT = VT.changeVectorElementType(MVT::i16);
7273 return DAG.getNode(ISD::BITCAST, DL, VT,
7274 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7275 }
7276
7277 if (EltVT == MVT::i1)
7278 return lowerVectorMaskSplat(Op, DAG);
7279 return SDValue();
7280 }
7282 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7283 case ISD::CONCAT_VECTORS: {
7284 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7285 // better than going through the stack, as the default expansion does.
7286 SDLoc DL(Op);
7287 MVT VT = Op.getSimpleValueType();
7288 MVT ContainerVT = VT;
7289 if (VT.isFixedLengthVector())
7290 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7291
7292 // Recursively split concat_vectors with more than 2 operands:
7293 //
7294 // concat_vector op1, op2, op3, op4
7295 // ->
7296 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7297 //
7298 // This reduces the length of the chain of vslideups and allows us to
7299 // perform the vslideups at a smaller LMUL, limited to MF2.
7300 if (Op.getNumOperands() > 2 &&
7301 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7302 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7304 size_t HalfNumOps = Op.getNumOperands() / 2;
7305 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7306 Op->ops().take_front(HalfNumOps));
7307 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7308 Op->ops().drop_front(HalfNumOps));
7309 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7310 }
7311
7312 unsigned NumOpElts =
7313 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7314 SDValue Vec = DAG.getUNDEF(VT);
7315 for (const auto &OpIdx : enumerate(Op->ops())) {
7316 SDValue SubVec = OpIdx.value();
7317 // Don't insert undef subvectors.
7318 if (SubVec.isUndef())
7319 continue;
7320 Vec =
7321 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7322 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7323 }
7324 return Vec;
7325 }
7326 case ISD::LOAD: {
7327 auto *Load = cast<LoadSDNode>(Op);
7328 EVT VecTy = Load->getMemoryVT();
7329 // Handle normal vector tuple load.
7330 if (VecTy.isRISCVVectorTuple()) {
7331 SDLoc DL(Op);
7332 MVT XLenVT = Subtarget.getXLenVT();
7333 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7334 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7335 unsigned NumElts = Sz / (NF * 8);
7336 int Log2LMUL = Log2_64(NumElts) - 3;
7337
7338 auto Flag = SDNodeFlags();
7339 Flag.setNoUnsignedWrap(true);
7340 SDValue Ret = DAG.getUNDEF(VecTy);
7341 SDValue BasePtr = Load->getBasePtr();
7342 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7343 VROffset =
7344 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7345 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7346 SmallVector<SDValue, 8> OutChains;
7347
7348 // Load NF vector registers and combine them to a vector tuple.
7349 for (unsigned i = 0; i < NF; ++i) {
7350 SDValue LoadVal = DAG.getLoad(
7351 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7352 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7353 OutChains.push_back(LoadVal.getValue(1));
7354 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7355 DAG.getVectorIdxConstant(i, DL));
7356 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7357 }
7358 return DAG.getMergeValues(
7359 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7360 }
7361
7362 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7363 return V;
7364 if (Op.getValueType().isFixedLengthVector())
7365 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7366 return Op;
7367 }
7368 case ISD::STORE: {
7369 auto *Store = cast<StoreSDNode>(Op);
7370 SDValue StoredVal = Store->getValue();
7371 EVT VecTy = StoredVal.getValueType();
7372 // Handle normal vector tuple store.
7373 if (VecTy.isRISCVVectorTuple()) {
7374 SDLoc DL(Op);
7375 MVT XLenVT = Subtarget.getXLenVT();
7376 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7377 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7378 unsigned NumElts = Sz / (NF * 8);
7379 int Log2LMUL = Log2_64(NumElts) - 3;
7380
7381 auto Flag = SDNodeFlags();
7382 Flag.setNoUnsignedWrap(true);
7383 SDValue Ret;
7384 SDValue Chain = Store->getChain();
7385 SDValue BasePtr = Store->getBasePtr();
7386 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7387 VROffset =
7388 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7389 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7390
7391 // Extract subregisters in a vector tuple and store them individually.
7392 for (unsigned i = 0; i < NF; ++i) {
7393 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7394 MVT::getScalableVectorVT(MVT::i8, NumElts),
7395 StoredVal, DAG.getVectorIdxConstant(i, DL));
7396 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7397 MachinePointerInfo(Store->getAddressSpace()),
7398 Store->getOriginalAlign(),
7399 Store->getMemOperand()->getFlags());
7400 Chain = Ret.getValue(0);
7401 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7402 }
7403 return Ret;
7404 }
7405
7406 if (auto V = expandUnalignedRVVStore(Op, DAG))
7407 return V;
7408 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7409 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7410 return Op;
7411 }
7412 case ISD::MLOAD:
7413 case ISD::VP_LOAD:
7414 return lowerMaskedLoad(Op, DAG);
7415 case ISD::MSTORE:
7416 case ISD::VP_STORE:
7417 return lowerMaskedStore(Op, DAG);
7419 return lowerVectorCompress(Op, DAG);
7420 case ISD::SELECT_CC: {
7421 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7422 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7423 // into separate SETCC+SELECT just like LegalizeDAG.
7424 SDValue Tmp1 = Op.getOperand(0);
7425 SDValue Tmp2 = Op.getOperand(1);
7426 SDValue True = Op.getOperand(2);
7427 SDValue False = Op.getOperand(3);
7428 EVT VT = Op.getValueType();
7429 SDValue CC = Op.getOperand(4);
7430 EVT CmpVT = Tmp1.getValueType();
7431 EVT CCVT =
7432 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7433 SDLoc DL(Op);
7434 SDValue Cond =
7435 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7436 return DAG.getSelect(DL, VT, Cond, True, False);
7437 }
7438 case ISD::SETCC: {
7439 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7440 if (OpVT.isScalarInteger()) {
7441 MVT VT = Op.getSimpleValueType();
7442 SDValue LHS = Op.getOperand(0);
7443 SDValue RHS = Op.getOperand(1);
7444 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7445 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7446 "Unexpected CondCode");
7447
7448 SDLoc DL(Op);
7449
7450 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7451 // convert this to the equivalent of (set(u)ge X, C+1) by using
7452 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7453 // in a register.
7454 if (isa<ConstantSDNode>(RHS)) {
7455 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7456 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7457 // If this is an unsigned compare and the constant is -1, incrementing
7458 // the constant would change behavior. The result should be false.
7459 if (CCVal == ISD::SETUGT && Imm == -1)
7460 return DAG.getConstant(0, DL, VT);
7461 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7462 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7463 SDValue SetCC = DAG.getSetCC(
7464 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7465 return DAG.getLogicalNOT(DL, SetCC, VT);
7466 }
7467 }
7468
7469 // Not a constant we could handle, swap the operands and condition code to
7470 // SETLT/SETULT.
7471 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7472 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7473 }
7474
7475 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7476 return SplitVectorOp(Op, DAG);
7477
7478 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7479 }
7480 case ISD::ADD:
7481 case ISD::SUB:
7482 case ISD::MUL:
7483 case ISD::MULHS:
7484 case ISD::MULHU:
7485 case ISD::AND:
7486 case ISD::OR:
7487 case ISD::XOR:
7488 case ISD::SDIV:
7489 case ISD::SREM:
7490 case ISD::UDIV:
7491 case ISD::UREM:
7492 case ISD::BSWAP:
7493 case ISD::CTPOP:
7494 return lowerToScalableOp(Op, DAG);
7495 case ISD::SHL:
7496 case ISD::SRA:
7497 case ISD::SRL:
7498 if (Op.getSimpleValueType().isFixedLengthVector())
7499 return lowerToScalableOp(Op, DAG);
7500 // This can be called for an i32 shift amount that needs to be promoted.
7501 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7502 "Unexpected custom legalisation");
7503 return SDValue();
7504 case ISD::FABS:
7505 case ISD::FNEG:
7506 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7507 return lowerFABSorFNEG(Op, DAG, Subtarget);
7508 [[fallthrough]];
7509 case ISD::FADD:
7510 case ISD::FSUB:
7511 case ISD::FMUL:
7512 case ISD::FDIV:
7513 case ISD::FSQRT:
7514 case ISD::FMA:
7515 case ISD::FMINNUM:
7516 case ISD::FMAXNUM:
7517 if (isPromotedOpNeedingSplit(Op, Subtarget))
7518 return SplitVectorOp(Op, DAG);
7519 [[fallthrough]];
7520 case ISD::AVGFLOORS:
7521 case ISD::AVGFLOORU:
7522 case ISD::AVGCEILS:
7523 case ISD::AVGCEILU:
7524 case ISD::SMIN:
7525 case ISD::SMAX:
7526 case ISD::UMIN:
7527 case ISD::UMAX:
7528 case ISD::UADDSAT:
7529 case ISD::USUBSAT:
7530 case ISD::SADDSAT:
7531 case ISD::SSUBSAT:
7532 return lowerToScalableOp(Op, DAG);
7533 case ISD::ABDS:
7534 case ISD::ABDU: {
7535 SDLoc dl(Op);
7536 EVT VT = Op->getValueType(0);
7537 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7538 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7539 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7540
7541 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7542 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7543 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7544 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7545 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7546 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7547 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7548 }
7549 case ISD::ABS:
7550 case ISD::VP_ABS:
7551 return lowerABS(Op, DAG);
7552 case ISD::CTLZ:
7554 case ISD::CTTZ:
7556 if (Subtarget.hasStdExtZvbb())
7557 return lowerToScalableOp(Op, DAG);
7558 assert(Op.getOpcode() != ISD::CTTZ);
7559 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7560 case ISD::VSELECT:
7561 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7562 case ISD::FCOPYSIGN:
7563 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7564 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7565 if (isPromotedOpNeedingSplit(Op, Subtarget))
7566 return SplitVectorOp(Op, DAG);
7567 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7568 case ISD::STRICT_FADD:
7569 case ISD::STRICT_FSUB:
7570 case ISD::STRICT_FMUL:
7571 case ISD::STRICT_FDIV:
7572 case ISD::STRICT_FSQRT:
7573 case ISD::STRICT_FMA:
7574 if (isPromotedOpNeedingSplit(Op, Subtarget))
7575 return SplitStrictFPVectorOp(Op, DAG);
7576 return lowerToScalableOp(Op, DAG);
7577 case ISD::STRICT_FSETCC:
7579 return lowerVectorStrictFSetcc(Op, DAG);
7580 case ISD::STRICT_FCEIL:
7581 case ISD::STRICT_FRINT:
7582 case ISD::STRICT_FFLOOR:
7583 case ISD::STRICT_FTRUNC:
7585 case ISD::STRICT_FROUND:
7587 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7588 case ISD::MGATHER:
7589 case ISD::VP_GATHER:
7590 return lowerMaskedGather(Op, DAG);
7591 case ISD::MSCATTER:
7592 case ISD::VP_SCATTER:
7593 return lowerMaskedScatter(Op, DAG);
7594 case ISD::GET_ROUNDING:
7595 return lowerGET_ROUNDING(Op, DAG);
7596 case ISD::SET_ROUNDING:
7597 return lowerSET_ROUNDING(Op, DAG);
7598 case ISD::EH_DWARF_CFA:
7599 return lowerEH_DWARF_CFA(Op, DAG);
7600 case ISD::VP_MERGE:
7601 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7602 return lowerVPMergeMask(Op, DAG);
7603 [[fallthrough]];
7604 case ISD::VP_SELECT:
7605 case ISD::VP_ADD:
7606 case ISD::VP_SUB:
7607 case ISD::VP_MUL:
7608 case ISD::VP_SDIV:
7609 case ISD::VP_UDIV:
7610 case ISD::VP_SREM:
7611 case ISD::VP_UREM:
7612 case ISD::VP_UADDSAT:
7613 case ISD::VP_USUBSAT:
7614 case ISD::VP_SADDSAT:
7615 case ISD::VP_SSUBSAT:
7616 case ISD::VP_LRINT:
7617 case ISD::VP_LLRINT:
7618 return lowerVPOp(Op, DAG);
7619 case ISD::VP_AND:
7620 case ISD::VP_OR:
7621 case ISD::VP_XOR:
7622 return lowerLogicVPOp(Op, DAG);
7623 case ISD::VP_FADD:
7624 case ISD::VP_FSUB:
7625 case ISD::VP_FMUL:
7626 case ISD::VP_FDIV:
7627 case ISD::VP_FNEG:
7628 case ISD::VP_FABS:
7629 case ISD::VP_SQRT:
7630 case ISD::VP_FMA:
7631 case ISD::VP_FMINNUM:
7632 case ISD::VP_FMAXNUM:
7633 case ISD::VP_FCOPYSIGN:
7634 if (isPromotedOpNeedingSplit(Op, Subtarget))
7635 return SplitVPOp(Op, DAG);
7636 [[fallthrough]];
7637 case ISD::VP_SRA:
7638 case ISD::VP_SRL:
7639 case ISD::VP_SHL:
7640 return lowerVPOp(Op, DAG);
7641 case ISD::VP_IS_FPCLASS:
7642 return LowerIS_FPCLASS(Op, DAG);
7643 case ISD::VP_SIGN_EXTEND:
7644 case ISD::VP_ZERO_EXTEND:
7645 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7646 return lowerVPExtMaskOp(Op, DAG);
7647 return lowerVPOp(Op, DAG);
7648 case ISD::VP_TRUNCATE:
7649 return lowerVectorTruncLike(Op, DAG);
7650 case ISD::VP_FP_EXTEND:
7651 case ISD::VP_FP_ROUND:
7652 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7653 case ISD::VP_SINT_TO_FP:
7654 case ISD::VP_UINT_TO_FP:
7655 if (Op.getValueType().isVector() &&
7656 ((Op.getValueType().getScalarType() == MVT::f16 &&
7657 (Subtarget.hasVInstructionsF16Minimal() &&
7658 !Subtarget.hasVInstructionsF16())) ||
7659 Op.getValueType().getScalarType() == MVT::bf16)) {
7660 if (isPromotedOpNeedingSplit(Op, Subtarget))
7661 return SplitVectorOp(Op, DAG);
7662 // int -> f32
7663 SDLoc DL(Op);
7664 MVT NVT =
7665 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7666 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7667 // f32 -> [b]f16
7668 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7669 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7670 }
7671 [[fallthrough]];
7672 case ISD::VP_FP_TO_SINT:
7673 case ISD::VP_FP_TO_UINT:
7674 if (SDValue Op1 = Op.getOperand(0);
7675 Op1.getValueType().isVector() &&
7676 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7677 (Subtarget.hasVInstructionsF16Minimal() &&
7678 !Subtarget.hasVInstructionsF16())) ||
7679 Op1.getValueType().getScalarType() == MVT::bf16)) {
7680 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7681 return SplitVectorOp(Op, DAG);
7682 // [b]f16 -> f32
7683 SDLoc DL(Op);
7684 MVT NVT = MVT::getVectorVT(MVT::f32,
7685 Op1.getValueType().getVectorElementCount());
7686 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7687 // f32 -> int
7688 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7689 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7690 }
7691 return lowerVPFPIntConvOp(Op, DAG);
7692 case ISD::VP_SETCC:
7693 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7694 return SplitVPOp(Op, DAG);
7695 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7696 return lowerVPSetCCMaskOp(Op, DAG);
7697 [[fallthrough]];
7698 case ISD::VP_SMIN:
7699 case ISD::VP_SMAX:
7700 case ISD::VP_UMIN:
7701 case ISD::VP_UMAX:
7702 case ISD::VP_BITREVERSE:
7703 case ISD::VP_BSWAP:
7704 return lowerVPOp(Op, DAG);
7705 case ISD::VP_CTLZ:
7706 case ISD::VP_CTLZ_ZERO_UNDEF:
7707 if (Subtarget.hasStdExtZvbb())
7708 return lowerVPOp(Op, DAG);
7709 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7710 case ISD::VP_CTTZ:
7711 case ISD::VP_CTTZ_ZERO_UNDEF:
7712 if (Subtarget.hasStdExtZvbb())
7713 return lowerVPOp(Op, DAG);
7714 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7715 case ISD::VP_CTPOP:
7716 return lowerVPOp(Op, DAG);
7717 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7718 return lowerVPStridedLoad(Op, DAG);
7719 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7720 return lowerVPStridedStore(Op, DAG);
7721 case ISD::VP_FCEIL:
7722 case ISD::VP_FFLOOR:
7723 case ISD::VP_FRINT:
7724 case ISD::VP_FNEARBYINT:
7725 case ISD::VP_FROUND:
7726 case ISD::VP_FROUNDEVEN:
7727 case ISD::VP_FROUNDTOZERO:
7728 if (isPromotedOpNeedingSplit(Op, Subtarget))
7729 return SplitVPOp(Op, DAG);
7730 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7731 case ISD::VP_FMAXIMUM:
7732 case ISD::VP_FMINIMUM:
7733 if (isPromotedOpNeedingSplit(Op, Subtarget))
7734 return SplitVPOp(Op, DAG);
7735 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7736 case ISD::EXPERIMENTAL_VP_SPLICE:
7737 return lowerVPSpliceExperimental(Op, DAG);
7738 case ISD::EXPERIMENTAL_VP_REVERSE:
7739 return lowerVPReverseExperimental(Op, DAG);
7740 case ISD::EXPERIMENTAL_VP_SPLAT:
7741 return lowerVPSplatExperimental(Op, DAG);
7742 case ISD::CLEAR_CACHE: {
7743 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7744 "llvm.clear_cache only needs custom lower on Linux targets");
7745 SDLoc DL(Op);
7746 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7747 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7748 Op.getOperand(2), Flags, DL);
7749 }
7751 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7753 return lowerINIT_TRAMPOLINE(Op, DAG);
7755 return lowerADJUST_TRAMPOLINE(Op, DAG);
7756 }
7757}
7758
7759SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7760 SDValue Start, SDValue End,
7761 SDValue Flags, SDLoc DL) const {
7762 MakeLibCallOptions CallOptions;
7763 std::pair<SDValue, SDValue> CallResult =
7764 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7765 {Start, End, Flags}, CallOptions, DL, InChain);
7766
7767 // This function returns void so only the out chain matters.
7768 return CallResult.second;
7769}
7770
7771SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7772 SelectionDAG &DAG) const {
7773 if (!Subtarget.is64Bit())
7774 llvm::report_fatal_error("Trampolines only implemented for RV64");
7775
7776 // Create an MCCodeEmitter to encode instructions.
7778 assert(TLO);
7779 MCContext &MCCtx = TLO->getContext();
7780
7781 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7782 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7783
7784 SDValue Root = Op.getOperand(0);
7785 SDValue Trmp = Op.getOperand(1); // trampoline
7786 SDLoc dl(Op);
7787
7788 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7789
7790 // We store in the trampoline buffer the following instructions and data.
7791 // Offset:
7792 // 0: auipc t2, 0
7793 // 4: ld t0, 24(t2)
7794 // 8: ld t2, 16(t2)
7795 // 12: jalr t0
7796 // 16: <StaticChainOffset>
7797 // 24: <FunctionAddressOffset>
7798 // 32:
7799
7800 constexpr unsigned StaticChainOffset = 16;
7801 constexpr unsigned FunctionAddressOffset = 24;
7802
7804 assert(STI);
7805 auto GetEncoding = [&](const MCInst &MC) {
7808 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7809 uint32_t Encoding = support::endian::read32le(CB.data());
7810 return Encoding;
7811 };
7812
7813 SDValue OutChains[6];
7814
7815 uint32_t Encodings[] = {
7816 // auipc t2, 0
7817 // Loads the current PC into t2.
7818 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7819 // ld t0, 24(t2)
7820 // Loads the function address into t0. Note that we are using offsets
7821 // pc-relative to the first instruction of the trampoline.
7822 GetEncoding(
7823 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7824 FunctionAddressOffset)),
7825 // ld t2, 16(t2)
7826 // Load the value of the static chain.
7827 GetEncoding(
7828 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7829 StaticChainOffset)),
7830 // jalr t0
7831 // Jump to the function.
7832 GetEncoding(MCInstBuilder(RISCV::JALR)
7833 .addReg(RISCV::X0)
7834 .addReg(RISCV::X5)
7835 .addImm(0))};
7836
7837 // Store encoded instructions.
7838 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7839 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7840 DAG.getConstant(Idx * 4, dl, MVT::i64))
7841 : Trmp;
7842 OutChains[Idx] = DAG.getTruncStore(
7843 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7844 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7845 }
7846
7847 // Now store the variable part of the trampoline.
7848 SDValue FunctionAddress = Op.getOperand(2);
7849 SDValue StaticChain = Op.getOperand(3);
7850
7851 // Store the given static chain and function pointer in the trampoline buffer.
7852 struct OffsetValuePair {
7853 const unsigned Offset;
7854 const SDValue Value;
7855 SDValue Addr = SDValue(); // Used to cache the address.
7856 } OffsetValues[] = {
7857 {StaticChainOffset, StaticChain},
7858 {FunctionAddressOffset, FunctionAddress},
7859 };
7860 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7861 SDValue Addr =
7862 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7863 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7864 OffsetValue.Addr = Addr;
7865 OutChains[Idx + 4] =
7866 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7867 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7868 }
7869
7870 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7871
7872 // The end of instructions of trampoline is the same as the static chain
7873 // address that we computed earlier.
7874 SDValue EndOfTrmp = OffsetValues[0].Addr;
7875
7876 // Call clear cache on the trampoline instructions.
7877 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7878 Trmp, EndOfTrmp);
7879
7880 return Chain;
7881}
7882
7883SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7884 SelectionDAG &DAG) const {
7885 if (!Subtarget.is64Bit())
7886 llvm::report_fatal_error("Trampolines only implemented for RV64");
7887
7888 return Op.getOperand(0);
7889}
7890
7892 SelectionDAG &DAG, unsigned Flags) {
7893 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7894}
7895
7897 SelectionDAG &DAG, unsigned Flags) {
7898 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7899 Flags);
7900}
7901
7903 SelectionDAG &DAG, unsigned Flags) {
7904 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7905 N->getOffset(), Flags);
7906}
7907
7909 SelectionDAG &DAG, unsigned Flags) {
7910 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7911}
7912
7914 EVT Ty, SelectionDAG &DAG) {
7916 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7917 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7918 return DAG.getLoad(
7919 Ty, DL, DAG.getEntryNode(), LC,
7921}
7922
7924 EVT Ty, SelectionDAG &DAG) {
7926 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7927 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7928 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7929 return DAG.getLoad(
7930 Ty, DL, DAG.getEntryNode(), LC,
7932}
7933
7934template <class NodeTy>
7935SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7936 bool IsLocal, bool IsExternWeak) const {
7937 SDLoc DL(N);
7938 EVT Ty = getPointerTy(DAG.getDataLayout());
7939
7940 // When HWASAN is used and tagging of global variables is enabled
7941 // they should be accessed via the GOT, since the tagged address of a global
7942 // is incompatible with existing code models. This also applies to non-pic
7943 // mode.
7944 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7945 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7946 if (IsLocal && !Subtarget.allowTaggedGlobals())
7947 // Use PC-relative addressing to access the symbol. This generates the
7948 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7949 // %pcrel_lo(auipc)).
7950 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7951
7952 // Use PC-relative addressing to access the GOT for this symbol, then load
7953 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7954 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7955 SDValue Load =
7956 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7962 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7963 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7964 return Load;
7965 }
7966
7967 switch (getTargetMachine().getCodeModel()) {
7968 default:
7969 report_fatal_error("Unsupported code model for lowering");
7970 case CodeModel::Small: {
7971 // Generate a sequence for accessing addresses within the first 2 GiB of
7972 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7973 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7974 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7975 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7976 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7977 }
7978 case CodeModel::Medium: {
7979 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7980 if (IsExternWeak) {
7981 // An extern weak symbol may be undefined, i.e. have value 0, which may
7982 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7983 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7984 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7985 SDValue Load =
7986 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7992 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7993 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7994 return Load;
7995 }
7996
7997 // Generate a sequence for accessing addresses within any 2GiB range within
7998 // the address space. This generates the pattern (PseudoLLA sym), which
7999 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8000 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8001 }
8002 case CodeModel::Large: {
8003 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8004 return getLargeGlobalAddress(G, DL, Ty, DAG);
8005
8006 // Using pc-relative mode for other node type.
8007 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8008 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8009 }
8010 }
8011}
8012
8013SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8016 assert(N->getOffset() == 0 && "unexpected offset in global node");
8017 const GlobalValue *GV = N->getGlobal();
8018 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8019}
8020
8021SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8022 SelectionDAG &DAG) const {
8023 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8024
8025 return getAddr(N, DAG);
8026}
8027
8028SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8029 SelectionDAG &DAG) const {
8030 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8031
8032 return getAddr(N, DAG);
8033}
8034
8035SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8036 SelectionDAG &DAG) const {
8037 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8038
8039 return getAddr(N, DAG);
8040}
8041
8042SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8043 SelectionDAG &DAG,
8044 bool UseGOT) const {
8045 SDLoc DL(N);
8046 EVT Ty = getPointerTy(DAG.getDataLayout());
8047 const GlobalValue *GV = N->getGlobal();
8048 MVT XLenVT = Subtarget.getXLenVT();
8049
8050 if (UseGOT) {
8051 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8052 // load the address from the GOT and add the thread pointer. This generates
8053 // the pattern (PseudoLA_TLS_IE sym), which expands to
8054 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8055 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8056 SDValue Load =
8057 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8063 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8064 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8065
8066 // Add the thread pointer.
8067 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8068 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8069 }
8070
8071 // Generate a sequence for accessing the address relative to the thread
8072 // pointer, with the appropriate adjustment for the thread pointer offset.
8073 // This generates the pattern
8074 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8075 SDValue AddrHi =
8077 SDValue AddrAdd =
8079 SDValue AddrLo =
8081
8082 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8083 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8084 SDValue MNAdd =
8085 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8086 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8087}
8088
8089SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8090 SelectionDAG &DAG) const {
8091 SDLoc DL(N);
8092 EVT Ty = getPointerTy(DAG.getDataLayout());
8093 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8094 const GlobalValue *GV = N->getGlobal();
8095
8096 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8097 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8098 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8099 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8100 SDValue Load =
8101 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8102
8103 // Prepare argument list to generate call.
8105 ArgListEntry Entry;
8106 Entry.Node = Load;
8107 Entry.Ty = CallTy;
8108 Args.push_back(Entry);
8109
8110 // Setup call to __tls_get_addr.
8112 CLI.setDebugLoc(DL)
8113 .setChain(DAG.getEntryNode())
8114 .setLibCallee(CallingConv::C, CallTy,
8115 DAG.getExternalSymbol("__tls_get_addr", Ty),
8116 std::move(Args));
8117
8118 return LowerCallTo(CLI).first;
8119}
8120
8121SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8122 SelectionDAG &DAG) const {
8123 SDLoc DL(N);
8124 EVT Ty = getPointerTy(DAG.getDataLayout());
8125 const GlobalValue *GV = N->getGlobal();
8126
8127 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8128 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8129 //
8130 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8131 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8132 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8133 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8134 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8135 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8136}
8137
8138SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8139 SelectionDAG &DAG) const {
8140 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8141 assert(N->getOffset() == 0 && "unexpected offset in global node");
8142
8143 if (DAG.getTarget().useEmulatedTLS())
8144 return LowerToTLSEmulatedModel(N, DAG);
8145
8147
8150 report_fatal_error("In GHC calling convention TLS is not supported");
8151
8152 SDValue Addr;
8153 switch (Model) {
8155 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8156 break;
8158 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8159 break;
8162 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8163 : getDynamicTLSAddr(N, DAG);
8164 break;
8165 }
8166
8167 return Addr;
8168}
8169
8170// Return true if Val is equal to (setcc LHS, RHS, CC).
8171// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8172// Otherwise, return std::nullopt.
8173static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8174 ISD::CondCode CC, SDValue Val) {
8175 assert(Val->getOpcode() == ISD::SETCC);
8176 SDValue LHS2 = Val.getOperand(0);
8177 SDValue RHS2 = Val.getOperand(1);
8178 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8179
8180 if (LHS == LHS2 && RHS == RHS2) {
8181 if (CC == CC2)
8182 return true;
8183 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8184 return false;
8185 } else if (LHS == RHS2 && RHS == LHS2) {
8187 if (CC == CC2)
8188 return true;
8189 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8190 return false;
8191 }
8192
8193 return std::nullopt;
8194}
8195
8197 const RISCVSubtarget &Subtarget) {
8198 SDValue CondV = N->getOperand(0);
8199 SDValue TrueV = N->getOperand(1);
8200 SDValue FalseV = N->getOperand(2);
8201 MVT VT = N->getSimpleValueType(0);
8202 SDLoc DL(N);
8203
8204 if (!Subtarget.hasConditionalMoveFusion()) {
8205 // (select c, -1, y) -> -c | y
8206 if (isAllOnesConstant(TrueV)) {
8207 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8208 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8209 }
8210 // (select c, y, -1) -> (c-1) | y
8211 if (isAllOnesConstant(FalseV)) {
8212 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8213 DAG.getAllOnesConstant(DL, VT));
8214 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8215 }
8216
8217 // (select c, 0, y) -> (c-1) & y
8218 if (isNullConstant(TrueV)) {
8219 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8220 DAG.getAllOnesConstant(DL, VT));
8221 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8222 }
8223 // (select c, y, 0) -> -c & y
8224 if (isNullConstant(FalseV)) {
8225 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8226 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8227 }
8228 }
8229
8230 // select c, ~x, x --> xor -c, x
8231 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8232 const APInt &TrueVal = TrueV->getAsAPIntVal();
8233 const APInt &FalseVal = FalseV->getAsAPIntVal();
8234 if (~TrueVal == FalseVal) {
8235 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8236 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8237 }
8238 }
8239
8240 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8241 // when both truev and falsev are also setcc.
8242 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8243 FalseV.getOpcode() == ISD::SETCC) {
8244 SDValue LHS = CondV.getOperand(0);
8245 SDValue RHS = CondV.getOperand(1);
8246 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8247
8248 // (select x, x, y) -> x | y
8249 // (select !x, x, y) -> x & y
8250 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8251 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8252 DAG.getFreeze(FalseV));
8253 }
8254 // (select x, y, x) -> x & y
8255 // (select !x, y, x) -> x | y
8256 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8257 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8258 DAG.getFreeze(TrueV), FalseV);
8259 }
8260 }
8261
8262 return SDValue();
8263}
8264
8265// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8266// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8267// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8268// being `0` or `-1`. In such cases we can replace `select` with `and`.
8269// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8270// than `c0`?
8271static SDValue
8273 const RISCVSubtarget &Subtarget) {
8274 if (Subtarget.hasShortForwardBranchOpt())
8275 return SDValue();
8276
8277 unsigned SelOpNo = 0;
8278 SDValue Sel = BO->getOperand(0);
8279 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8280 SelOpNo = 1;
8281 Sel = BO->getOperand(1);
8282 }
8283
8284 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8285 return SDValue();
8286
8287 unsigned ConstSelOpNo = 1;
8288 unsigned OtherSelOpNo = 2;
8289 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8290 ConstSelOpNo = 2;
8291 OtherSelOpNo = 1;
8292 }
8293 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8294 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8295 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8296 return SDValue();
8297
8298 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8299 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8300 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8301 return SDValue();
8302
8303 SDLoc DL(Sel);
8304 EVT VT = BO->getValueType(0);
8305
8306 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8307 if (SelOpNo == 1)
8308 std::swap(NewConstOps[0], NewConstOps[1]);
8309
8310 SDValue NewConstOp =
8311 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8312 if (!NewConstOp)
8313 return SDValue();
8314
8315 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8316 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8317 return SDValue();
8318
8319 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8320 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8321 if (SelOpNo == 1)
8322 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8323 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8324
8325 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8326 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8327 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8328}
8329
8330SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8331 SDValue CondV = Op.getOperand(0);
8332 SDValue TrueV = Op.getOperand(1);
8333 SDValue FalseV = Op.getOperand(2);
8334 SDLoc DL(Op);
8335 MVT VT = Op.getSimpleValueType();
8336 MVT XLenVT = Subtarget.getXLenVT();
8337
8338 // Lower vector SELECTs to VSELECTs by splatting the condition.
8339 if (VT.isVector()) {
8340 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8341 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8342 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8343 }
8344
8345 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8346 // nodes to implement the SELECT. Performing the lowering here allows for
8347 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8348 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8349 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8350 VT.isScalarInteger()) {
8351 // (select c, t, 0) -> (czero_eqz t, c)
8352 if (isNullConstant(FalseV))
8353 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8354 // (select c, 0, f) -> (czero_nez f, c)
8355 if (isNullConstant(TrueV))
8356 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8357
8358 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8359 if (TrueV.getOpcode() == ISD::AND &&
8360 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8361 return DAG.getNode(
8362 ISD::OR, DL, VT, TrueV,
8363 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8364 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8365 if (FalseV.getOpcode() == ISD::AND &&
8366 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8367 return DAG.getNode(
8368 ISD::OR, DL, VT, FalseV,
8369 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8370
8371 // Try some other optimizations before falling back to generic lowering.
8372 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8373 return V;
8374
8375 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8376 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8377 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8378 const APInt &TrueVal = TrueV->getAsAPIntVal();
8379 const APInt &FalseVal = FalseV->getAsAPIntVal();
8380 const int TrueValCost = RISCVMatInt::getIntMatCost(
8381 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8382 const int FalseValCost = RISCVMatInt::getIntMatCost(
8383 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8384 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8385 SDValue LHSVal = DAG.getConstant(
8386 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8387 SDValue RHSVal =
8388 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8389 SDValue CMOV =
8391 DL, VT, LHSVal, CondV);
8392 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8393 }
8394
8395 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8396 // Unless we have the short forward branch optimization.
8397 if (!Subtarget.hasConditionalMoveFusion())
8398 return DAG.getNode(
8399 ISD::OR, DL, VT,
8400 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8401 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8402 }
8403
8404 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8405 return V;
8406
8407 if (Op.hasOneUse()) {
8408 unsigned UseOpc = Op->user_begin()->getOpcode();
8409 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8410 SDNode *BinOp = *Op->user_begin();
8411 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8412 DAG, Subtarget)) {
8413 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8414 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8415 // may return a constant node and cause crash in lowerSELECT.
8416 if (NewSel.getOpcode() == ISD::SELECT)
8417 return lowerSELECT(NewSel, DAG);
8418 return NewSel;
8419 }
8420 }
8421 }
8422
8423 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8424 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8425 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8426 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8427 if (FPTV && FPFV) {
8428 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8429 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8430 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8431 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8432 DAG.getConstant(1, DL, XLenVT));
8433 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8434 }
8435 }
8436
8437 // If the condition is not an integer SETCC which operates on XLenVT, we need
8438 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8439 // (select condv, truev, falsev)
8440 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8441 if (CondV.getOpcode() != ISD::SETCC ||
8442 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8443 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8444 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8445
8446 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8447
8448 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8449 }
8450
8451 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8452 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8453 // advantage of the integer compare+branch instructions. i.e.:
8454 // (select (setcc lhs, rhs, cc), truev, falsev)
8455 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8456 SDValue LHS = CondV.getOperand(0);
8457 SDValue RHS = CondV.getOperand(1);
8458 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8459
8460 // Special case for a select of 2 constants that have a diffence of 1.
8461 // Normally this is done by DAGCombine, but if the select is introduced by
8462 // type legalization or op legalization, we miss it. Restricting to SETLT
8463 // case for now because that is what signed saturating add/sub need.
8464 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8465 // but we would probably want to swap the true/false values if the condition
8466 // is SETGE/SETLE to avoid an XORI.
8467 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8468 CCVal == ISD::SETLT) {
8469 const APInt &TrueVal = TrueV->getAsAPIntVal();
8470 const APInt &FalseVal = FalseV->getAsAPIntVal();
8471 if (TrueVal - 1 == FalseVal)
8472 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8473 if (TrueVal + 1 == FalseVal)
8474 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8475 }
8476
8477 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8478 // 1 < x ? x : 1 -> 0 < x ? x : 1
8479 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8480 RHS == TrueV && LHS == FalseV) {
8481 LHS = DAG.getConstant(0, DL, VT);
8482 // 0 <u x is the same as x != 0.
8483 if (CCVal == ISD::SETULT) {
8484 std::swap(LHS, RHS);
8485 CCVal = ISD::SETNE;
8486 }
8487 }
8488
8489 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8490 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8491 RHS == FalseV) {
8492 RHS = DAG.getConstant(0, DL, VT);
8493 }
8494
8495 SDValue TargetCC = DAG.getCondCode(CCVal);
8496
8497 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8498 // (select (setcc lhs, rhs, CC), constant, falsev)
8499 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8500 std::swap(TrueV, FalseV);
8501 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8502 }
8503
8504 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8505 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8506}
8507
8508SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8509 SDValue CondV = Op.getOperand(1);
8510 SDLoc DL(Op);
8511 MVT XLenVT = Subtarget.getXLenVT();
8512
8513 if (CondV.getOpcode() == ISD::SETCC &&
8514 CondV.getOperand(0).getValueType() == XLenVT) {
8515 SDValue LHS = CondV.getOperand(0);
8516 SDValue RHS = CondV.getOperand(1);
8517 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8518
8519 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8520
8521 SDValue TargetCC = DAG.getCondCode(CCVal);
8522 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8523 LHS, RHS, TargetCC, Op.getOperand(2));
8524 }
8525
8526 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8527 CondV, DAG.getConstant(0, DL, XLenVT),
8528 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8529}
8530
8531SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8534
8535 SDLoc DL(Op);
8536 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8538
8539 // vastart just stores the address of the VarArgsFrameIndex slot into the
8540 // memory location argument.
8541 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8542 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8543 MachinePointerInfo(SV));
8544}
8545
8546SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8547 SelectionDAG &DAG) const {
8548 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8550 MachineFrameInfo &MFI = MF.getFrameInfo();
8551 MFI.setFrameAddressIsTaken(true);
8552 Register FrameReg = RI.getFrameRegister(MF);
8553 int XLenInBytes = Subtarget.getXLen() / 8;
8554
8555 EVT VT = Op.getValueType();
8556 SDLoc DL(Op);
8557 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8558 unsigned Depth = Op.getConstantOperandVal(0);
8559 while (Depth--) {
8560 int Offset = -(XLenInBytes * 2);
8561 SDValue Ptr = DAG.getNode(
8562 ISD::ADD, DL, VT, FrameAddr,
8564 FrameAddr =
8565 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8566 }
8567 return FrameAddr;
8568}
8569
8570SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8571 SelectionDAG &DAG) const {
8572 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8574 MachineFrameInfo &MFI = MF.getFrameInfo();
8575 MFI.setReturnAddressIsTaken(true);
8576 MVT XLenVT = Subtarget.getXLenVT();
8577 int XLenInBytes = Subtarget.getXLen() / 8;
8578
8580 return SDValue();
8581
8582 EVT VT = Op.getValueType();
8583 SDLoc DL(Op);
8584 unsigned Depth = Op.getConstantOperandVal(0);
8585 if (Depth) {
8586 int Off = -XLenInBytes;
8587 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8588 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8589 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8590 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8592 }
8593
8594 // Return the value of the return address register, marking it an implicit
8595 // live-in.
8596 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8597 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8598}
8599
8600SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8601 SelectionDAG &DAG) const {
8602 SDLoc DL(Op);
8603 SDValue Lo = Op.getOperand(0);
8604 SDValue Hi = Op.getOperand(1);
8605 SDValue Shamt = Op.getOperand(2);
8606 EVT VT = Lo.getValueType();
8607
8608 // if Shamt-XLEN < 0: // Shamt < XLEN
8609 // Lo = Lo << Shamt
8610 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8611 // else:
8612 // Lo = 0
8613 // Hi = Lo << (Shamt-XLEN)
8614
8615 SDValue Zero = DAG.getConstant(0, DL, VT);
8616 SDValue One = DAG.getConstant(1, DL, VT);
8617 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8618 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8619 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8620 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8621
8622 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8623 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8624 SDValue ShiftRightLo =
8625 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8626 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8627 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8628 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8629
8630 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8631
8632 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8633 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8634
8635 SDValue Parts[2] = {Lo, Hi};
8636 return DAG.getMergeValues(Parts, DL);
8637}
8638
8639SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8640 bool IsSRA) const {
8641 SDLoc DL(Op);
8642 SDValue Lo = Op.getOperand(0);
8643 SDValue Hi = Op.getOperand(1);
8644 SDValue Shamt = Op.getOperand(2);
8645 EVT VT = Lo.getValueType();
8646
8647 // SRA expansion:
8648 // if Shamt-XLEN < 0: // Shamt < XLEN
8649 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8650 // Hi = Hi >>s Shamt
8651 // else:
8652 // Lo = Hi >>s (Shamt-XLEN);
8653 // Hi = Hi >>s (XLEN-1)
8654 //
8655 // SRL expansion:
8656 // if Shamt-XLEN < 0: // Shamt < XLEN
8657 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8658 // Hi = Hi >>u Shamt
8659 // else:
8660 // Lo = Hi >>u (Shamt-XLEN);
8661 // Hi = 0;
8662
8663 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8664
8665 SDValue Zero = DAG.getConstant(0, DL, VT);
8666 SDValue One = DAG.getConstant(1, DL, VT);
8667 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8668 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8669 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8670 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8671
8672 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8673 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8674 SDValue ShiftLeftHi =
8675 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8676 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8677 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8678 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8679 SDValue HiFalse =
8680 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8681
8682 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8683
8684 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8685 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8686
8687 SDValue Parts[2] = {Lo, Hi};
8688 return DAG.getMergeValues(Parts, DL);
8689}
8690
8691// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8692// legal equivalently-sized i8 type, so we can use that as a go-between.
8693SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8694 SelectionDAG &DAG) const {
8695 SDLoc DL(Op);
8696 MVT VT = Op.getSimpleValueType();
8697 SDValue SplatVal = Op.getOperand(0);
8698 // All-zeros or all-ones splats are handled specially.
8699 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8700 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8701 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8702 }
8703 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8704 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8705 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8706 }
8707 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8708 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8709 DAG.getConstant(1, DL, SplatVal.getValueType()));
8710 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8711 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8712 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8713}
8714
8715// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8716// illegal (currently only vXi64 RV32).
8717// FIXME: We could also catch non-constant sign-extended i32 values and lower
8718// them to VMV_V_X_VL.
8719SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8720 SelectionDAG &DAG) const {
8721 SDLoc DL(Op);
8722 MVT VecVT = Op.getSimpleValueType();
8723 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8724 "Unexpected SPLAT_VECTOR_PARTS lowering");
8725
8726 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8727 SDValue Lo = Op.getOperand(0);
8728 SDValue Hi = Op.getOperand(1);
8729
8730 MVT ContainerVT = VecVT;
8731 if (VecVT.isFixedLengthVector())
8732 ContainerVT = getContainerForFixedLengthVector(VecVT);
8733
8734 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8735
8736 SDValue Res =
8737 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8738
8739 if (VecVT.isFixedLengthVector())
8740 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8741
8742 return Res;
8743}
8744
8745// Custom-lower extensions from mask vectors by using a vselect either with 1
8746// for zero/any-extension or -1 for sign-extension:
8747// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8748// Note that any-extension is lowered identically to zero-extension.
8749SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8750 int64_t ExtTrueVal) const {
8751 SDLoc DL(Op);
8752 MVT VecVT = Op.getSimpleValueType();
8753 SDValue Src = Op.getOperand(0);
8754 // Only custom-lower extensions from mask types
8755 assert(Src.getValueType().isVector() &&
8756 Src.getValueType().getVectorElementType() == MVT::i1);
8757
8758 if (VecVT.isScalableVector()) {
8759 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8760 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8761 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8762 }
8763
8764 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8765 MVT I1ContainerVT =
8766 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8767
8768 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8769
8770 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8771
8772 MVT XLenVT = Subtarget.getXLenVT();
8773 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8774 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8775
8776 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8777 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8778 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8779 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8780 SDValue Select =
8781 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8782 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8783
8784 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8785}
8786
8787SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8788 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8789 MVT ExtVT = Op.getSimpleValueType();
8790 // Only custom-lower extensions from fixed-length vector types.
8791 if (!ExtVT.isFixedLengthVector())
8792 return Op;
8793 MVT VT = Op.getOperand(0).getSimpleValueType();
8794 // Grab the canonical container type for the extended type. Infer the smaller
8795 // type from that to ensure the same number of vector elements, as we know
8796 // the LMUL will be sufficient to hold the smaller type.
8797 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8798 // Get the extended container type manually to ensure the same number of
8799 // vector elements between source and dest.
8800 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8801 ContainerExtVT.getVectorElementCount());
8802
8803 SDValue Op1 =
8804 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8805
8806 SDLoc DL(Op);
8807 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8808
8809 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8810
8811 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8812}
8813
8814// Custom-lower truncations from vectors to mask vectors by using a mask and a
8815// setcc operation:
8816// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8817SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8818 SelectionDAG &DAG) const {
8819 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8820 SDLoc DL(Op);
8821 EVT MaskVT = Op.getValueType();
8822 // Only expect to custom-lower truncations to mask types
8823 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8824 "Unexpected type for vector mask lowering");
8825 SDValue Src = Op.getOperand(0);
8826 MVT VecVT = Src.getSimpleValueType();
8827 SDValue Mask, VL;
8828 if (IsVPTrunc) {
8829 Mask = Op.getOperand(1);
8830 VL = Op.getOperand(2);
8831 }
8832 // If this is a fixed vector, we need to convert it to a scalable vector.
8833 MVT ContainerVT = VecVT;
8834
8835 if (VecVT.isFixedLengthVector()) {
8836 ContainerVT = getContainerForFixedLengthVector(VecVT);
8837 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8838 if (IsVPTrunc) {
8839 MVT MaskContainerVT =
8840 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8841 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8842 }
8843 }
8844
8845 if (!IsVPTrunc) {
8846 std::tie(Mask, VL) =
8847 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8848 }
8849
8850 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8851 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8852
8853 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8854 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8855 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8856 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8857
8858 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8859 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8860 DAG.getUNDEF(ContainerVT), Mask, VL);
8861 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8862 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8863 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8864 if (MaskVT.isFixedLengthVector())
8865 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8866 return Trunc;
8867}
8868
8869SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8870 SelectionDAG &DAG) const {
8871 unsigned Opc = Op.getOpcode();
8872 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8873 SDLoc DL(Op);
8874
8875 MVT VT = Op.getSimpleValueType();
8876 // Only custom-lower vector truncates
8877 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8878
8879 // Truncates to mask types are handled differently
8880 if (VT.getVectorElementType() == MVT::i1)
8881 return lowerVectorMaskTruncLike(Op, DAG);
8882
8883 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8884 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8885 // truncate by one power of two at a time.
8886 MVT DstEltVT = VT.getVectorElementType();
8887
8888 SDValue Src = Op.getOperand(0);
8889 MVT SrcVT = Src.getSimpleValueType();
8890 MVT SrcEltVT = SrcVT.getVectorElementType();
8891
8892 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8893 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8894 "Unexpected vector truncate lowering");
8895
8896 MVT ContainerVT = SrcVT;
8897 SDValue Mask, VL;
8898 if (IsVPTrunc) {
8899 Mask = Op.getOperand(1);
8900 VL = Op.getOperand(2);
8901 }
8902 if (SrcVT.isFixedLengthVector()) {
8903 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8904 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8905 if (IsVPTrunc) {
8906 MVT MaskVT = getMaskTypeFor(ContainerVT);
8907 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8908 }
8909 }
8910
8911 SDValue Result = Src;
8912 if (!IsVPTrunc) {
8913 std::tie(Mask, VL) =
8914 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8915 }
8916
8917 unsigned NewOpc;
8918 if (Opc == ISD::TRUNCATE_SSAT_S)
8920 else if (Opc == ISD::TRUNCATE_USAT_U)
8922 else
8924
8925 do {
8926 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8927 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8928 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8929 } while (SrcEltVT != DstEltVT);
8930
8931 if (SrcVT.isFixedLengthVector())
8932 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8933
8934 return Result;
8935}
8936
8937SDValue
8938RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8939 SelectionDAG &DAG) const {
8940 SDLoc DL(Op);
8941 SDValue Chain = Op.getOperand(0);
8942 SDValue Src = Op.getOperand(1);
8943 MVT VT = Op.getSimpleValueType();
8944 MVT SrcVT = Src.getSimpleValueType();
8945 MVT ContainerVT = VT;
8946 if (VT.isFixedLengthVector()) {
8947 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8948 ContainerVT =
8949 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8950 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8951 }
8952
8953 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8954
8955 // RVV can only widen/truncate fp to types double/half the size as the source.
8956 if ((VT.getVectorElementType() == MVT::f64 &&
8957 (SrcVT.getVectorElementType() == MVT::f16 ||
8958 SrcVT.getVectorElementType() == MVT::bf16)) ||
8959 ((VT.getVectorElementType() == MVT::f16 ||
8960 VT.getVectorElementType() == MVT::bf16) &&
8961 SrcVT.getVectorElementType() == MVT::f64)) {
8962 // For double rounding, the intermediate rounding should be round-to-odd.
8963 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8966 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8967 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8968 Chain, Src, Mask, VL);
8969 Chain = Src.getValue(1);
8970 }
8971
8972 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8975 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8976 Chain, Src, Mask, VL);
8977 if (VT.isFixedLengthVector()) {
8978 // StrictFP operations have two result values. Their lowered result should
8979 // have same result count.
8980 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8981 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8982 }
8983 return Res;
8984}
8985
8986SDValue
8987RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8988 SelectionDAG &DAG) const {
8989 bool IsVP =
8990 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8991 bool IsExtend =
8992 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8993 // RVV can only do truncate fp to types half the size as the source. We
8994 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8995 // conversion instruction.
8996 SDLoc DL(Op);
8997 MVT VT = Op.getSimpleValueType();
8998
8999 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9000
9001 SDValue Src = Op.getOperand(0);
9002 MVT SrcVT = Src.getSimpleValueType();
9003
9004 bool IsDirectExtend =
9005 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9006 (SrcVT.getVectorElementType() != MVT::f16 &&
9007 SrcVT.getVectorElementType() != MVT::bf16));
9008 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9009 VT.getVectorElementType() != MVT::bf16) ||
9010 SrcVT.getVectorElementType() != MVT::f64);
9011
9012 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9013
9014 // Prepare any fixed-length vector operands.
9015 MVT ContainerVT = VT;
9016 SDValue Mask, VL;
9017 if (IsVP) {
9018 Mask = Op.getOperand(1);
9019 VL = Op.getOperand(2);
9020 }
9021 if (VT.isFixedLengthVector()) {
9022 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9023 ContainerVT =
9024 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9025 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9026 if (IsVP) {
9027 MVT MaskVT = getMaskTypeFor(ContainerVT);
9028 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9029 }
9030 }
9031
9032 if (!IsVP)
9033 std::tie(Mask, VL) =
9034 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9035
9036 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9037
9038 if (IsDirectConv) {
9039 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9040 if (VT.isFixedLengthVector())
9041 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9042 return Src;
9043 }
9044
9045 unsigned InterConvOpc =
9047
9048 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9049 SDValue IntermediateConv =
9050 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9051 SDValue Result =
9052 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9053 if (VT.isFixedLengthVector())
9054 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9055 return Result;
9056}
9057
9058// Given a scalable vector type and an index into it, returns the type for the
9059// smallest subvector that the index fits in. This can be used to reduce LMUL
9060// for operations like vslidedown.
9061//
9062// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9063static std::optional<MVT>
9064getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9065 const RISCVSubtarget &Subtarget) {
9066 assert(VecVT.isScalableVector());
9067 const unsigned EltSize = VecVT.getScalarSizeInBits();
9068 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9069 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9070 MVT SmallerVT;
9071 if (MaxIdx < MinVLMAX)
9072 SmallerVT = getLMUL1VT(VecVT);
9073 else if (MaxIdx < MinVLMAX * 2)
9074 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9075 else if (MaxIdx < MinVLMAX * 4)
9076 SmallerVT = getLMUL1VT(VecVT)
9079 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9080 return std::nullopt;
9081 return SmallerVT;
9082}
9083
9084// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9085// first position of a vector, and that vector is slid up to the insert index.
9086// By limiting the active vector length to index+1 and merging with the
9087// original vector (with an undisturbed tail policy for elements >= VL), we
9088// achieve the desired result of leaving all elements untouched except the one
9089// at VL-1, which is replaced with the desired value.
9090SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9091 SelectionDAG &DAG) const {
9092 SDLoc DL(Op);
9093 MVT VecVT = Op.getSimpleValueType();
9094 MVT XLenVT = Subtarget.getXLenVT();
9095 SDValue Vec = Op.getOperand(0);
9096 SDValue Val = Op.getOperand(1);
9097 MVT ValVT = Val.getSimpleValueType();
9098 SDValue Idx = Op.getOperand(2);
9099
9100 if (VecVT.getVectorElementType() == MVT::i1) {
9101 // FIXME: For now we just promote to an i8 vector and insert into that,
9102 // but this is probably not optimal.
9103 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9104 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9105 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9106 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9107 }
9108
9109 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9110 ValVT == MVT::bf16) {
9111 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9112 MVT IntVT = VecVT.changeTypeToInteger();
9113 SDValue IntInsert = DAG.getNode(
9114 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9115 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9116 return DAG.getBitcast(VecVT, IntInsert);
9117 }
9118
9119 MVT ContainerVT = VecVT;
9120 // If the operand is a fixed-length vector, convert to a scalable one.
9121 if (VecVT.isFixedLengthVector()) {
9122 ContainerVT = getContainerForFixedLengthVector(VecVT);
9123 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9124 }
9125
9126 // If we know the index we're going to insert at, we can shrink Vec so that
9127 // we're performing the scalar inserts and slideup on a smaller LMUL.
9128 MVT OrigContainerVT = ContainerVT;
9129 SDValue OrigVec = Vec;
9130 SDValue AlignedIdx;
9131 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9132 const unsigned OrigIdx = IdxC->getZExtValue();
9133 // Do we know an upper bound on LMUL?
9134 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9135 DL, DAG, Subtarget)) {
9136 ContainerVT = *ShrunkVT;
9137 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9138 }
9139
9140 // If we're compiling for an exact VLEN value, we can always perform
9141 // the insert in m1 as we can determine the register corresponding to
9142 // the index in the register group.
9143 const MVT M1VT = getLMUL1VT(ContainerVT);
9144 if (auto VLEN = Subtarget.getRealVLen();
9145 VLEN && ContainerVT.bitsGT(M1VT)) {
9146 EVT ElemVT = VecVT.getVectorElementType();
9147 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9148 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9149 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9150 unsigned ExtractIdx =
9151 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9152 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9153 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9154 ContainerVT = M1VT;
9155 }
9156
9157 if (AlignedIdx)
9158 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9159 AlignedIdx);
9160 }
9161
9162 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9163 // Even i64-element vectors on RV32 can be lowered without scalar
9164 // legalization if the most-significant 32 bits of the value are not affected
9165 // by the sign-extension of the lower 32 bits.
9166 // TODO: We could also catch sign extensions of a 32-bit value.
9167 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9168 const auto *CVal = cast<ConstantSDNode>(Val);
9169 if (isInt<32>(CVal->getSExtValue())) {
9170 IsLegalInsert = true;
9171 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9172 }
9173 }
9174
9175 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9176
9177 SDValue ValInVec;
9178
9179 if (IsLegalInsert) {
9180 unsigned Opc =
9182 if (isNullConstant(Idx)) {
9183 if (!VecVT.isFloatingPoint())
9184 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9185 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9186
9187 if (AlignedIdx)
9188 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9189 Vec, AlignedIdx);
9190 if (!VecVT.isFixedLengthVector())
9191 return Vec;
9192 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9193 }
9194 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9195 } else {
9196 // On RV32, i64-element vectors must be specially handled to place the
9197 // value at element 0, by using two vslide1down instructions in sequence on
9198 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9199 // this.
9200 SDValue ValLo, ValHi;
9201 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9202 MVT I32ContainerVT =
9203 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9204 SDValue I32Mask =
9205 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9206 // Limit the active VL to two.
9207 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9208 // If the Idx is 0 we can insert directly into the vector.
9209 if (isNullConstant(Idx)) {
9210 // First slide in the lo value, then the hi in above it. We use slide1down
9211 // to avoid the register group overlap constraint of vslide1up.
9212 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9213 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9214 // If the source vector is undef don't pass along the tail elements from
9215 // the previous slide1down.
9216 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9217 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9218 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9219 // Bitcast back to the right container type.
9220 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9221
9222 if (AlignedIdx)
9223 ValInVec =
9224 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9225 ValInVec, AlignedIdx);
9226 if (!VecVT.isFixedLengthVector())
9227 return ValInVec;
9228 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9229 }
9230
9231 // First slide in the lo value, then the hi in above it. We use slide1down
9232 // to avoid the register group overlap constraint of vslide1up.
9233 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9234 DAG.getUNDEF(I32ContainerVT),
9235 DAG.getUNDEF(I32ContainerVT), ValLo,
9236 I32Mask, InsertI64VL);
9237 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9238 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9239 I32Mask, InsertI64VL);
9240 // Bitcast back to the right container type.
9241 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9242 }
9243
9244 // Now that the value is in a vector, slide it into position.
9245 SDValue InsertVL =
9246 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9247
9248 // Use tail agnostic policy if Idx is the last index of Vec.
9250 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9251 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9252 Policy = RISCVII::TAIL_AGNOSTIC;
9253 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9254 Idx, Mask, InsertVL, Policy);
9255
9256 if (AlignedIdx)
9257 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9258 Slideup, AlignedIdx);
9259 if (!VecVT.isFixedLengthVector())
9260 return Slideup;
9261 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9262}
9263
9264// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9265// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9266// types this is done using VMV_X_S to allow us to glean information about the
9267// sign bits of the result.
9268SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9269 SelectionDAG &DAG) const {
9270 SDLoc DL(Op);
9271 SDValue Idx = Op.getOperand(1);
9272 SDValue Vec = Op.getOperand(0);
9273 EVT EltVT = Op.getValueType();
9274 MVT VecVT = Vec.getSimpleValueType();
9275 MVT XLenVT = Subtarget.getXLenVT();
9276
9277 if (VecVT.getVectorElementType() == MVT::i1) {
9278 // Use vfirst.m to extract the first bit.
9279 if (isNullConstant(Idx)) {
9280 MVT ContainerVT = VecVT;
9281 if (VecVT.isFixedLengthVector()) {
9282 ContainerVT = getContainerForFixedLengthVector(VecVT);
9283 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9284 }
9285 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9286 SDValue Vfirst =
9287 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9288 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9289 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9290 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9291 }
9292 if (VecVT.isFixedLengthVector()) {
9293 unsigned NumElts = VecVT.getVectorNumElements();
9294 if (NumElts >= 8) {
9295 MVT WideEltVT;
9296 unsigned WidenVecLen;
9297 SDValue ExtractElementIdx;
9298 SDValue ExtractBitIdx;
9299 unsigned MaxEEW = Subtarget.getELen();
9300 MVT LargestEltVT = MVT::getIntegerVT(
9301 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9302 if (NumElts <= LargestEltVT.getSizeInBits()) {
9303 assert(isPowerOf2_32(NumElts) &&
9304 "the number of elements should be power of 2");
9305 WideEltVT = MVT::getIntegerVT(NumElts);
9306 WidenVecLen = 1;
9307 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9308 ExtractBitIdx = Idx;
9309 } else {
9310 WideEltVT = LargestEltVT;
9311 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9312 // extract element index = index / element width
9313 ExtractElementIdx = DAG.getNode(
9314 ISD::SRL, DL, XLenVT, Idx,
9315 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9316 // mask bit index = index % element width
9317 ExtractBitIdx = DAG.getNode(
9318 ISD::AND, DL, XLenVT, Idx,
9319 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9320 }
9321 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9322 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9323 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9324 Vec, ExtractElementIdx);
9325 // Extract the bit from GPR.
9326 SDValue ShiftRight =
9327 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9328 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9329 DAG.getConstant(1, DL, XLenVT));
9330 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9331 }
9332 }
9333 // Otherwise, promote to an i8 vector and extract from that.
9334 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9335 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9336 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9337 }
9338
9339 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9340 EltVT == MVT::bf16) {
9341 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9342 MVT IntVT = VecVT.changeTypeToInteger();
9343 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9344 SDValue IntExtract =
9345 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9346 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9347 }
9348
9349 // If this is a fixed vector, we need to convert it to a scalable vector.
9350 MVT ContainerVT = VecVT;
9351 if (VecVT.isFixedLengthVector()) {
9352 ContainerVT = getContainerForFixedLengthVector(VecVT);
9353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9354 }
9355
9356 // If we're compiling for an exact VLEN value and we have a known
9357 // constant index, we can always perform the extract in m1 (or
9358 // smaller) as we can determine the register corresponding to
9359 // the index in the register group.
9360 const auto VLen = Subtarget.getRealVLen();
9361 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9362 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9363 MVT M1VT = getLMUL1VT(ContainerVT);
9364 unsigned OrigIdx = IdxC->getZExtValue();
9365 EVT ElemVT = VecVT.getVectorElementType();
9366 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9367 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9368 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9369 unsigned ExtractIdx =
9370 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9371 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9372 DAG.getVectorIdxConstant(ExtractIdx, DL));
9373 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9374 ContainerVT = M1VT;
9375 }
9376
9377 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9378 // contains our index.
9379 std::optional<uint64_t> MaxIdx;
9380 if (VecVT.isFixedLengthVector())
9381 MaxIdx = VecVT.getVectorNumElements() - 1;
9382 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9383 MaxIdx = IdxC->getZExtValue();
9384 if (MaxIdx) {
9385 if (auto SmallerVT =
9386 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9387 ContainerVT = *SmallerVT;
9388 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9389 DAG.getConstant(0, DL, XLenVT));
9390 }
9391 }
9392
9393 // If after narrowing, the required slide is still greater than LMUL2,
9394 // fallback to generic expansion and go through the stack. This is done
9395 // for a subtle reason: extracting *all* elements out of a vector is
9396 // widely expected to be linear in vector size, but because vslidedown
9397 // is linear in LMUL, performing N extracts using vslidedown becomes
9398 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9399 // seems to have the same problem (the store is linear in LMUL), but the
9400 // generic expansion *memoizes* the store, and thus for many extracts of
9401 // the same vector we end up with one store and a bunch of loads.
9402 // TODO: We don't have the same code for insert_vector_elt because we
9403 // have BUILD_VECTOR and handle the degenerate case there. Should we
9404 // consider adding an inverse BUILD_VECTOR node?
9405 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9406 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9407 return SDValue();
9408
9409 // If the index is 0, the vector is already in the right position.
9410 if (!isNullConstant(Idx)) {
9411 // Use a VL of 1 to avoid processing more elements than we need.
9412 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9413 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9414 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9415 }
9416
9417 if (!EltVT.isInteger()) {
9418 // Floating-point extracts are handled in TableGen.
9419 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9420 DAG.getVectorIdxConstant(0, DL));
9421 }
9422
9423 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9424 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9425}
9426
9427// Some RVV intrinsics may claim that they want an integer operand to be
9428// promoted or expanded.
9430 const RISCVSubtarget &Subtarget) {
9431 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9432 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9433 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9434 "Unexpected opcode");
9435
9436 if (!Subtarget.hasVInstructions())
9437 return SDValue();
9438
9439 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9440 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9441 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9442
9443 SDLoc DL(Op);
9444
9446 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9447 if (!II || !II->hasScalarOperand())
9448 return SDValue();
9449
9450 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9451 assert(SplatOp < Op.getNumOperands());
9452
9454 SDValue &ScalarOp = Operands[SplatOp];
9455 MVT OpVT = ScalarOp.getSimpleValueType();
9456 MVT XLenVT = Subtarget.getXLenVT();
9457
9458 // If this isn't a scalar, or its type is XLenVT we're done.
9459 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9460 return SDValue();
9461
9462 // Simplest case is that the operand needs to be promoted to XLenVT.
9463 if (OpVT.bitsLT(XLenVT)) {
9464 // If the operand is a constant, sign extend to increase our chances
9465 // of being able to use a .vi instruction. ANY_EXTEND would become a
9466 // a zero extend and the simm5 check in isel would fail.
9467 // FIXME: Should we ignore the upper bits in isel instead?
9468 unsigned ExtOpc =
9469 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9470 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9471 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9472 }
9473
9474 // Use the previous operand to get the vXi64 VT. The result might be a mask
9475 // VT for compares. Using the previous operand assumes that the previous
9476 // operand will never have a smaller element size than a scalar operand and
9477 // that a widening operation never uses SEW=64.
9478 // NOTE: If this fails the below assert, we can probably just find the
9479 // element count from any operand or result and use it to construct the VT.
9480 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9481 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9482
9483 // The more complex case is when the scalar is larger than XLenVT.
9484 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9485 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9486
9487 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9488 // instruction to sign-extend since SEW>XLEN.
9489 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9490 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9491 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9492 }
9493
9494 switch (IntNo) {
9495 case Intrinsic::riscv_vslide1up:
9496 case Intrinsic::riscv_vslide1down:
9497 case Intrinsic::riscv_vslide1up_mask:
9498 case Intrinsic::riscv_vslide1down_mask: {
9499 // We need to special case these when the scalar is larger than XLen.
9500 unsigned NumOps = Op.getNumOperands();
9501 bool IsMasked = NumOps == 7;
9502
9503 // Convert the vector source to the equivalent nxvXi32 vector.
9504 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9505 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9506 SDValue ScalarLo, ScalarHi;
9507 std::tie(ScalarLo, ScalarHi) =
9508 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9509
9510 // Double the VL since we halved SEW.
9511 SDValue AVL = getVLOperand(Op);
9512 SDValue I32VL;
9513
9514 // Optimize for constant AVL
9515 if (isa<ConstantSDNode>(AVL)) {
9516 const auto [MinVLMAX, MaxVLMAX] =
9518
9519 uint64_t AVLInt = AVL->getAsZExtVal();
9520 if (AVLInt <= MinVLMAX) {
9521 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9522 } else if (AVLInt >= 2 * MaxVLMAX) {
9523 // Just set vl to VLMAX in this situation
9524 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9525 } else {
9526 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9527 // is related to the hardware implementation.
9528 // So let the following code handle
9529 }
9530 }
9531 if (!I32VL) {
9533 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9534 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9535 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9536 SDValue SETVL =
9537 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9538 // Using vsetvli instruction to get actually used length which related to
9539 // the hardware implementation
9540 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9541 SEW, LMUL);
9542 I32VL =
9543 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9544 }
9545
9546 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9547
9548 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9549 // instructions.
9550 SDValue Passthru;
9551 if (IsMasked)
9552 Passthru = DAG.getUNDEF(I32VT);
9553 else
9554 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9555
9556 if (IntNo == Intrinsic::riscv_vslide1up ||
9557 IntNo == Intrinsic::riscv_vslide1up_mask) {
9558 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9559 ScalarHi, I32Mask, I32VL);
9560 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9561 ScalarLo, I32Mask, I32VL);
9562 } else {
9563 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9564 ScalarLo, I32Mask, I32VL);
9565 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9566 ScalarHi, I32Mask, I32VL);
9567 }
9568
9569 // Convert back to nxvXi64.
9570 Vec = DAG.getBitcast(VT, Vec);
9571
9572 if (!IsMasked)
9573 return Vec;
9574 // Apply mask after the operation.
9575 SDValue Mask = Operands[NumOps - 3];
9576 SDValue MaskedOff = Operands[1];
9577 // Assume Policy operand is the last operand.
9578 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9579 // We don't need to select maskedoff if it's undef.
9580 if (MaskedOff.isUndef())
9581 return Vec;
9582 // TAMU
9583 if (Policy == RISCVII::TAIL_AGNOSTIC)
9584 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9585 DAG.getUNDEF(VT), AVL);
9586 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9587 // It's fine because vmerge does not care mask policy.
9588 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9589 MaskedOff, AVL);
9590 }
9591 }
9592
9593 // We need to convert the scalar to a splat vector.
9594 SDValue VL = getVLOperand(Op);
9595 assert(VL.getValueType() == XLenVT);
9596 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9597 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9598}
9599
9600// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9601// scalable vector llvm.get.vector.length for now.
9602//
9603// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9604// (vscale * VF). The vscale and VF are independent of element width. We use
9605// SEW=8 for the vsetvli because it is the only element width that supports all
9606// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9607// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9608// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9609// SEW and LMUL are better for the surrounding vector instructions.
9611 const RISCVSubtarget &Subtarget) {
9612 MVT XLenVT = Subtarget.getXLenVT();
9613
9614 // The smallest LMUL is only valid for the smallest element width.
9615 const unsigned ElementWidth = 8;
9616
9617 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9618 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9619 // We don't support VF==1 with ELEN==32.
9620 [[maybe_unused]] unsigned MinVF =
9621 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9622
9623 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9624 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9625 "Unexpected VF");
9626
9627 bool Fractional = VF < LMul1VF;
9628 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9629 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9630 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9631
9632 SDLoc DL(N);
9633
9634 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9635 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9636
9637 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9638
9639 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9640 SDValue Res =
9641 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9642 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9643}
9644
9646 const RISCVSubtarget &Subtarget) {
9647 SDValue Op0 = N->getOperand(1);
9648 MVT OpVT = Op0.getSimpleValueType();
9649 MVT ContainerVT = OpVT;
9650 if (OpVT.isFixedLengthVector()) {
9651 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9652 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9653 }
9654 MVT XLenVT = Subtarget.getXLenVT();
9655 SDLoc DL(N);
9656 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9657 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9658 if (isOneConstant(N->getOperand(2)))
9659 return Res;
9660
9661 // Convert -1 to VL.
9662 SDValue Setcc =
9663 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9664 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9665 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9666}
9667
9668static inline void promoteVCIXScalar(const SDValue &Op,
9670 SelectionDAG &DAG) {
9671 const RISCVSubtarget &Subtarget =
9673
9674 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9675 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9676 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9677 SDLoc DL(Op);
9678
9680 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9681 if (!II || !II->hasScalarOperand())
9682 return;
9683
9684 unsigned SplatOp = II->ScalarOperand + 1;
9685 assert(SplatOp < Op.getNumOperands());
9686
9687 SDValue &ScalarOp = Operands[SplatOp];
9688 MVT OpVT = ScalarOp.getSimpleValueType();
9689 MVT XLenVT = Subtarget.getXLenVT();
9690
9691 // The code below is partially copied from lowerVectorIntrinsicScalars.
9692 // If this isn't a scalar, or its type is XLenVT we're done.
9693 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9694 return;
9695
9696 // Manually emit promote operation for scalar operation.
9697 if (OpVT.bitsLT(XLenVT)) {
9698 unsigned ExtOpc =
9699 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9700 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9701 }
9702}
9703
9704static void processVCIXOperands(SDValue &OrigOp,
9706 SelectionDAG &DAG) {
9707 promoteVCIXScalar(OrigOp, Operands, DAG);
9708 const RISCVSubtarget &Subtarget =
9710 for (SDValue &V : Operands) {
9711 EVT ValType = V.getValueType();
9712 if (ValType.isVector() && ValType.isFloatingPoint()) {
9713 MVT InterimIVT =
9714 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9715 ValType.getVectorElementCount());
9716 V = DAG.getBitcast(InterimIVT, V);
9717 }
9718 if (ValType.isFixedLengthVector()) {
9719 MVT OpContainerVT = getContainerForFixedLengthVector(
9720 DAG, V.getSimpleValueType(), Subtarget);
9721 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9722 }
9723 }
9724}
9725
9726// LMUL * VLEN should be greater than or equal to EGS * SEW
9727static inline bool isValidEGW(int EGS, EVT VT,
9728 const RISCVSubtarget &Subtarget) {
9729 return (Subtarget.getRealMinVLen() *
9731 EGS * VT.getScalarSizeInBits();
9732}
9733
9734SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9735 SelectionDAG &DAG) const {
9736 unsigned IntNo = Op.getConstantOperandVal(0);
9737 SDLoc DL(Op);
9738 MVT XLenVT = Subtarget.getXLenVT();
9739
9740 switch (IntNo) {
9741 default:
9742 break; // Don't custom lower most intrinsics.
9743 case Intrinsic::riscv_tuple_insert: {
9744 SDValue Vec = Op.getOperand(1);
9745 SDValue SubVec = Op.getOperand(2);
9746 SDValue Index = Op.getOperand(3);
9747
9748 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9749 SubVec, Index);
9750 }
9751 case Intrinsic::riscv_tuple_extract: {
9752 SDValue Vec = Op.getOperand(1);
9753 SDValue Index = Op.getOperand(2);
9754
9755 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9756 Index);
9757 }
9758 case Intrinsic::thread_pointer: {
9759 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9760 return DAG.getRegister(RISCV::X4, PtrVT);
9761 }
9762 case Intrinsic::riscv_orc_b:
9763 case Intrinsic::riscv_brev8:
9764 case Intrinsic::riscv_sha256sig0:
9765 case Intrinsic::riscv_sha256sig1:
9766 case Intrinsic::riscv_sha256sum0:
9767 case Intrinsic::riscv_sha256sum1:
9768 case Intrinsic::riscv_sm3p0:
9769 case Intrinsic::riscv_sm3p1: {
9770 unsigned Opc;
9771 switch (IntNo) {
9772 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9773 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9774 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9775 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9776 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9777 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9778 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9779 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9780 }
9781
9782 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9783 }
9784 case Intrinsic::riscv_sm4ks:
9785 case Intrinsic::riscv_sm4ed: {
9786 unsigned Opc =
9787 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9788
9789 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9790 Op.getOperand(3));
9791 }
9792 case Intrinsic::riscv_zip:
9793 case Intrinsic::riscv_unzip: {
9794 unsigned Opc =
9795 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9796 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9797 }
9798 case Intrinsic::riscv_mopr:
9799 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9800 Op.getOperand(2));
9801
9802 case Intrinsic::riscv_moprr: {
9803 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9804 Op.getOperand(2), Op.getOperand(3));
9805 }
9806 case Intrinsic::riscv_clmul:
9807 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9808 Op.getOperand(2));
9809 case Intrinsic::riscv_clmulh:
9810 case Intrinsic::riscv_clmulr: {
9811 unsigned Opc =
9812 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9813 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9814 }
9815 case Intrinsic::experimental_get_vector_length:
9816 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9817 case Intrinsic::experimental_cttz_elts:
9818 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9819 case Intrinsic::riscv_vmv_x_s: {
9820 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9821 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9822 }
9823 case Intrinsic::riscv_vfmv_f_s:
9824 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9825 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9826 case Intrinsic::riscv_vmv_v_x:
9827 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9828 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9829 Subtarget);
9830 case Intrinsic::riscv_vfmv_v_f:
9831 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9832 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9833 case Intrinsic::riscv_vmv_s_x: {
9834 SDValue Scalar = Op.getOperand(2);
9835
9836 if (Scalar.getValueType().bitsLE(XLenVT)) {
9837 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9838 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9839 Op.getOperand(1), Scalar, Op.getOperand(3));
9840 }
9841
9842 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9843
9844 // This is an i64 value that lives in two scalar registers. We have to
9845 // insert this in a convoluted way. First we build vXi64 splat containing
9846 // the two values that we assemble using some bit math. Next we'll use
9847 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9848 // to merge element 0 from our splat into the source vector.
9849 // FIXME: This is probably not the best way to do this, but it is
9850 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9851 // point.
9852 // sw lo, (a0)
9853 // sw hi, 4(a0)
9854 // vlse vX, (a0)
9855 //
9856 // vid.v vVid
9857 // vmseq.vx mMask, vVid, 0
9858 // vmerge.vvm vDest, vSrc, vVal, mMask
9859 MVT VT = Op.getSimpleValueType();
9860 SDValue Vec = Op.getOperand(1);
9861 SDValue VL = getVLOperand(Op);
9862
9863 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9864 if (Op.getOperand(1).isUndef())
9865 return SplattedVal;
9866 SDValue SplattedIdx =
9867 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9868 DAG.getConstant(0, DL, MVT::i32), VL);
9869
9870 MVT MaskVT = getMaskTypeFor(VT);
9871 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9872 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9873 SDValue SelectCond =
9874 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9875 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9876 DAG.getUNDEF(MaskVT), Mask, VL});
9877 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9878 Vec, DAG.getUNDEF(VT), VL);
9879 }
9880 case Intrinsic::riscv_vfmv_s_f:
9881 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9882 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9883 // EGS * EEW >= 128 bits
9884 case Intrinsic::riscv_vaesdf_vv:
9885 case Intrinsic::riscv_vaesdf_vs:
9886 case Intrinsic::riscv_vaesdm_vv:
9887 case Intrinsic::riscv_vaesdm_vs:
9888 case Intrinsic::riscv_vaesef_vv:
9889 case Intrinsic::riscv_vaesef_vs:
9890 case Intrinsic::riscv_vaesem_vv:
9891 case Intrinsic::riscv_vaesem_vs:
9892 case Intrinsic::riscv_vaeskf1:
9893 case Intrinsic::riscv_vaeskf2:
9894 case Intrinsic::riscv_vaesz_vs:
9895 case Intrinsic::riscv_vsm4k:
9896 case Intrinsic::riscv_vsm4r_vv:
9897 case Intrinsic::riscv_vsm4r_vs: {
9898 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9899 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9900 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9901 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9902 return Op;
9903 }
9904 // EGS * EEW >= 256 bits
9905 case Intrinsic::riscv_vsm3c:
9906 case Intrinsic::riscv_vsm3me: {
9907 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9908 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9909 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9910 return Op;
9911 }
9912 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9913 case Intrinsic::riscv_vsha2ch:
9914 case Intrinsic::riscv_vsha2cl:
9915 case Intrinsic::riscv_vsha2ms: {
9916 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9917 !Subtarget.hasStdExtZvknhb())
9918 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9919 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9920 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9921 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9922 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9923 return Op;
9924 }
9925 case Intrinsic::riscv_sf_vc_v_x:
9926 case Intrinsic::riscv_sf_vc_v_i:
9927 case Intrinsic::riscv_sf_vc_v_xv:
9928 case Intrinsic::riscv_sf_vc_v_iv:
9929 case Intrinsic::riscv_sf_vc_v_vv:
9930 case Intrinsic::riscv_sf_vc_v_fv:
9931 case Intrinsic::riscv_sf_vc_v_xvv:
9932 case Intrinsic::riscv_sf_vc_v_ivv:
9933 case Intrinsic::riscv_sf_vc_v_vvv:
9934 case Intrinsic::riscv_sf_vc_v_fvv:
9935 case Intrinsic::riscv_sf_vc_v_xvw:
9936 case Intrinsic::riscv_sf_vc_v_ivw:
9937 case Intrinsic::riscv_sf_vc_v_vvw:
9938 case Intrinsic::riscv_sf_vc_v_fvw: {
9939 MVT VT = Op.getSimpleValueType();
9940
9941 SmallVector<SDValue> Operands{Op->op_values()};
9943
9944 MVT RetVT = VT;
9945 if (VT.isFixedLengthVector())
9947 else if (VT.isFloatingPoint())
9950
9951 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9952
9953 if (VT.isFixedLengthVector())
9954 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9955 else if (VT.isFloatingPoint())
9956 NewNode = DAG.getBitcast(VT, NewNode);
9957
9958 if (Op == NewNode)
9959 break;
9960
9961 return NewNode;
9962 }
9963 }
9964
9965 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9966}
9967
9969 unsigned Type) {
9970 SDLoc DL(Op);
9971 SmallVector<SDValue> Operands{Op->op_values()};
9972 Operands.erase(Operands.begin() + 1);
9973
9974 const RISCVSubtarget &Subtarget =
9976 MVT VT = Op.getSimpleValueType();
9977 MVT RetVT = VT;
9978 MVT FloatVT = VT;
9979
9980 if (VT.isFloatingPoint()) {
9981 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9983 FloatVT = RetVT;
9984 }
9985 if (VT.isFixedLengthVector())
9987 Subtarget);
9988
9990
9991 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9992 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9993 SDValue Chain = NewNode.getValue(1);
9994
9995 if (VT.isFixedLengthVector())
9996 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9997 if (VT.isFloatingPoint())
9998 NewNode = DAG.getBitcast(VT, NewNode);
9999
10000 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
10001
10002 return NewNode;
10003}
10004
10006 unsigned Type) {
10007 SmallVector<SDValue> Operands{Op->op_values()};
10008 Operands.erase(Operands.begin() + 1);
10010
10011 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10012}
10013
10014SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10015 SelectionDAG &DAG) const {
10016 unsigned IntNo = Op.getConstantOperandVal(1);
10017 switch (IntNo) {
10018 default:
10019 break;
10020 case Intrinsic::riscv_seg2_load:
10021 case Intrinsic::riscv_seg3_load:
10022 case Intrinsic::riscv_seg4_load:
10023 case Intrinsic::riscv_seg5_load:
10024 case Intrinsic::riscv_seg6_load:
10025 case Intrinsic::riscv_seg7_load:
10026 case Intrinsic::riscv_seg8_load: {
10027 SDLoc DL(Op);
10028 static const Intrinsic::ID VlsegInts[7] = {
10029 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10030 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10031 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10032 Intrinsic::riscv_vlseg8};
10033 unsigned NF = Op->getNumValues() - 1;
10034 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10035 MVT XLenVT = Subtarget.getXLenVT();
10036 MVT VT = Op->getSimpleValueType(0);
10037 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10038 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10039 ContainerVT.getScalarSizeInBits();
10040 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10041
10042 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10043 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10044 auto *Load = cast<MemIntrinsicSDNode>(Op);
10045
10046 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10047 SDValue Ops[] = {
10048 Load->getChain(),
10049 IntID,
10050 DAG.getUNDEF(VecTupTy),
10051 Op.getOperand(2),
10052 VL,
10053 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10054 SDValue Result =
10056 Load->getMemoryVT(), Load->getMemOperand());
10058 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10059 SDValue SubVec =
10060 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10061 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10062 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10063 }
10064 Results.push_back(Result.getValue(1));
10065 return DAG.getMergeValues(Results, DL);
10066 }
10067 case Intrinsic::riscv_sf_vc_v_x_se:
10069 case Intrinsic::riscv_sf_vc_v_i_se:
10071 case Intrinsic::riscv_sf_vc_v_xv_se:
10073 case Intrinsic::riscv_sf_vc_v_iv_se:
10075 case Intrinsic::riscv_sf_vc_v_vv_se:
10077 case Intrinsic::riscv_sf_vc_v_fv_se:
10079 case Intrinsic::riscv_sf_vc_v_xvv_se:
10081 case Intrinsic::riscv_sf_vc_v_ivv_se:
10083 case Intrinsic::riscv_sf_vc_v_vvv_se:
10085 case Intrinsic::riscv_sf_vc_v_fvv_se:
10087 case Intrinsic::riscv_sf_vc_v_xvw_se:
10089 case Intrinsic::riscv_sf_vc_v_ivw_se:
10091 case Intrinsic::riscv_sf_vc_v_vvw_se:
10093 case Intrinsic::riscv_sf_vc_v_fvw_se:
10095 }
10096
10097 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10098}
10099
10100SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10101 SelectionDAG &DAG) const {
10102 unsigned IntNo = Op.getConstantOperandVal(1);
10103 switch (IntNo) {
10104 default:
10105 break;
10106 case Intrinsic::riscv_seg2_store:
10107 case Intrinsic::riscv_seg3_store:
10108 case Intrinsic::riscv_seg4_store:
10109 case Intrinsic::riscv_seg5_store:
10110 case Intrinsic::riscv_seg6_store:
10111 case Intrinsic::riscv_seg7_store:
10112 case Intrinsic::riscv_seg8_store: {
10113 SDLoc DL(Op);
10114 static const Intrinsic::ID VssegInts[] = {
10115 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10116 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10117 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10118 Intrinsic::riscv_vsseg8};
10119 // Operands are (chain, int_id, vec*, ptr, vl)
10120 unsigned NF = Op->getNumOperands() - 4;
10121 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10122 MVT XLenVT = Subtarget.getXLenVT();
10123 MVT VT = Op->getOperand(2).getSimpleValueType();
10124 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10125 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10126 ContainerVT.getScalarSizeInBits();
10127 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10128
10129 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10130 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10131 SDValue Ptr = Op->getOperand(NF + 2);
10132
10133 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10134
10135 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10136 for (unsigned i = 0; i < NF; i++)
10137 StoredVal = DAG.getNode(
10138 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10140 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10141 DAG.getVectorIdxConstant(i, DL));
10142
10143 SDValue Ops[] = {
10144 FixedIntrinsic->getChain(),
10145 IntID,
10146 StoredVal,
10147 Ptr,
10148 VL,
10149 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10150
10151 return DAG.getMemIntrinsicNode(
10152 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10153 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10154 }
10155 case Intrinsic::riscv_sf_vc_xv_se:
10157 case Intrinsic::riscv_sf_vc_iv_se:
10159 case Intrinsic::riscv_sf_vc_vv_se:
10161 case Intrinsic::riscv_sf_vc_fv_se:
10163 case Intrinsic::riscv_sf_vc_xvv_se:
10165 case Intrinsic::riscv_sf_vc_ivv_se:
10167 case Intrinsic::riscv_sf_vc_vvv_se:
10169 case Intrinsic::riscv_sf_vc_fvv_se:
10171 case Intrinsic::riscv_sf_vc_xvw_se:
10173 case Intrinsic::riscv_sf_vc_ivw_se:
10175 case Intrinsic::riscv_sf_vc_vvw_se:
10177 case Intrinsic::riscv_sf_vc_fvw_se:
10179 }
10180
10181 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10182}
10183
10184static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10185 switch (ISDOpcode) {
10186 default:
10187 llvm_unreachable("Unhandled reduction");
10188 case ISD::VP_REDUCE_ADD:
10189 case ISD::VECREDUCE_ADD:
10191 case ISD::VP_REDUCE_UMAX:
10194 case ISD::VP_REDUCE_SMAX:
10197 case ISD::VP_REDUCE_UMIN:
10200 case ISD::VP_REDUCE_SMIN:
10203 case ISD::VP_REDUCE_AND:
10204 case ISD::VECREDUCE_AND:
10206 case ISD::VP_REDUCE_OR:
10207 case ISD::VECREDUCE_OR:
10209 case ISD::VP_REDUCE_XOR:
10210 case ISD::VECREDUCE_XOR:
10212 case ISD::VP_REDUCE_FADD:
10214 case ISD::VP_REDUCE_SEQ_FADD:
10216 case ISD::VP_REDUCE_FMAX:
10217 case ISD::VP_REDUCE_FMAXIMUM:
10219 case ISD::VP_REDUCE_FMIN:
10220 case ISD::VP_REDUCE_FMINIMUM:
10222 }
10223
10224}
10225
10226SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10227 SelectionDAG &DAG,
10228 bool IsVP) const {
10229 SDLoc DL(Op);
10230 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10231 MVT VecVT = Vec.getSimpleValueType();
10232 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10233 Op.getOpcode() == ISD::VECREDUCE_OR ||
10234 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10235 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10236 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10237 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10238 "Unexpected reduction lowering");
10239
10240 MVT XLenVT = Subtarget.getXLenVT();
10241
10242 MVT ContainerVT = VecVT;
10243 if (VecVT.isFixedLengthVector()) {
10244 ContainerVT = getContainerForFixedLengthVector(VecVT);
10245 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10246 }
10247
10248 SDValue Mask, VL;
10249 if (IsVP) {
10250 Mask = Op.getOperand(2);
10251 VL = Op.getOperand(3);
10252 } else {
10253 std::tie(Mask, VL) =
10254 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10255 }
10256
10258 switch (Op.getOpcode()) {
10259 default:
10260 llvm_unreachable("Unhandled reduction");
10261 case ISD::VECREDUCE_AND:
10262 case ISD::VP_REDUCE_AND: {
10263 // vcpop ~x == 0
10264 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10265 if (IsVP || VecVT.isFixedLengthVector())
10266 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10267 else
10268 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10269 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10270 CC = ISD::SETEQ;
10271 break;
10272 }
10273 case ISD::VECREDUCE_OR:
10274 case ISD::VP_REDUCE_OR:
10275 // vcpop x != 0
10276 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10277 CC = ISD::SETNE;
10278 break;
10279 case ISD::VECREDUCE_XOR:
10280 case ISD::VP_REDUCE_XOR: {
10281 // ((vcpop x) & 1) != 0
10282 SDValue One = DAG.getConstant(1, DL, XLenVT);
10283 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10284 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10285 CC = ISD::SETNE;
10286 break;
10287 }
10288 }
10289
10290 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10291 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10292 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10293
10294 if (!IsVP)
10295 return SetCC;
10296
10297 // Now include the start value in the operation.
10298 // Note that we must return the start value when no elements are operated
10299 // upon. The vcpop instructions we've emitted in each case above will return
10300 // 0 for an inactive vector, and so we've already received the neutral value:
10301 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10302 // can simply include the start value.
10303 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10304 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10305}
10306
10307static bool isNonZeroAVL(SDValue AVL) {
10308 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10309 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10310 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10311 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10312}
10313
10314/// Helper to lower a reduction sequence of the form:
10315/// scalar = reduce_op vec, scalar_start
10316static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10317 SDValue StartValue, SDValue Vec, SDValue Mask,
10318 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10319 const RISCVSubtarget &Subtarget) {
10320 const MVT VecVT = Vec.getSimpleValueType();
10321 const MVT M1VT = getLMUL1VT(VecVT);
10322 const MVT XLenVT = Subtarget.getXLenVT();
10323 const bool NonZeroAVL = isNonZeroAVL(VL);
10324
10325 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10326 // or the original VT if fractional.
10327 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10328 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10329 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10330 // be the result of the reduction operation.
10331 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10332 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10333 DAG, Subtarget);
10334 if (M1VT != InnerVT)
10335 InitialValue =
10336 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10337 InitialValue, DAG.getVectorIdxConstant(0, DL));
10338 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10339 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10340 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10341 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10342 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10343 DAG.getVectorIdxConstant(0, DL));
10344}
10345
10346SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10347 SelectionDAG &DAG) const {
10348 SDLoc DL(Op);
10349 SDValue Vec = Op.getOperand(0);
10350 EVT VecEVT = Vec.getValueType();
10351
10352 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10353
10354 // Due to ordering in legalize types we may have a vector type that needs to
10355 // be split. Do that manually so we can get down to a legal type.
10356 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10358 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10359 VecEVT = Lo.getValueType();
10360 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10361 }
10362
10363 // TODO: The type may need to be widened rather than split. Or widened before
10364 // it can be split.
10365 if (!isTypeLegal(VecEVT))
10366 return SDValue();
10367
10368 MVT VecVT = VecEVT.getSimpleVT();
10369 MVT VecEltVT = VecVT.getVectorElementType();
10370 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10371
10372 MVT ContainerVT = VecVT;
10373 if (VecVT.isFixedLengthVector()) {
10374 ContainerVT = getContainerForFixedLengthVector(VecVT);
10375 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10376 }
10377
10378 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10379
10380 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10381 switch (BaseOpc) {
10382 case ISD::AND:
10383 case ISD::OR:
10384 case ISD::UMAX:
10385 case ISD::UMIN:
10386 case ISD::SMAX:
10387 case ISD::SMIN:
10388 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10389 DAG.getVectorIdxConstant(0, DL));
10390 }
10391 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10392 Mask, VL, DL, DAG, Subtarget);
10393}
10394
10395// Given a reduction op, this function returns the matching reduction opcode,
10396// the vector SDValue and the scalar SDValue required to lower this to a
10397// RISCVISD node.
10398static std::tuple<unsigned, SDValue, SDValue>
10400 const RISCVSubtarget &Subtarget) {
10401 SDLoc DL(Op);
10402 auto Flags = Op->getFlags();
10403 unsigned Opcode = Op.getOpcode();
10404 switch (Opcode) {
10405 default:
10406 llvm_unreachable("Unhandled reduction");
10407 case ISD::VECREDUCE_FADD: {
10408 // Use positive zero if we can. It is cheaper to materialize.
10409 SDValue Zero =
10410 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10411 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10412 }
10414 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10415 Op.getOperand(0));
10419 case ISD::VECREDUCE_FMAX: {
10420 SDValue Front =
10421 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10422 DAG.getVectorIdxConstant(0, DL));
10423 unsigned RVVOpc =
10424 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10427 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10428 }
10429 }
10430}
10431
10432SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10433 SelectionDAG &DAG) const {
10434 SDLoc DL(Op);
10435 MVT VecEltVT = Op.getSimpleValueType();
10436
10437 unsigned RVVOpcode;
10438 SDValue VectorVal, ScalarVal;
10439 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10440 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10441 MVT VecVT = VectorVal.getSimpleValueType();
10442
10443 MVT ContainerVT = VecVT;
10444 if (VecVT.isFixedLengthVector()) {
10445 ContainerVT = getContainerForFixedLengthVector(VecVT);
10446 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10447 }
10448
10449 MVT ResVT = Op.getSimpleValueType();
10450 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10451 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10452 VL, DL, DAG, Subtarget);
10453 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10454 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10455 return Res;
10456
10457 if (Op->getFlags().hasNoNaNs())
10458 return Res;
10459
10460 // Force output to NaN if any element is Nan.
10461 SDValue IsNan =
10462 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10463 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10464 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10465 MVT XLenVT = Subtarget.getXLenVT();
10466 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10467 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10468 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10469 return DAG.getSelect(
10470 DL, ResVT, NoNaNs, Res,
10471 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10472}
10473
10474SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10475 SelectionDAG &DAG) const {
10476 SDLoc DL(Op);
10477 unsigned Opc = Op.getOpcode();
10478 SDValue Start = Op.getOperand(0);
10479 SDValue Vec = Op.getOperand(1);
10480 EVT VecEVT = Vec.getValueType();
10481 MVT XLenVT = Subtarget.getXLenVT();
10482
10483 // TODO: The type may need to be widened rather than split. Or widened before
10484 // it can be split.
10485 if (!isTypeLegal(VecEVT))
10486 return SDValue();
10487
10488 MVT VecVT = VecEVT.getSimpleVT();
10489 unsigned RVVOpcode = getRVVReductionOp(Opc);
10490
10491 if (VecVT.isFixedLengthVector()) {
10492 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10493 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10494 }
10495
10496 SDValue VL = Op.getOperand(3);
10497 SDValue Mask = Op.getOperand(2);
10498 SDValue Res =
10499 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10500 Vec, Mask, VL, DL, DAG, Subtarget);
10501 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10502 Op->getFlags().hasNoNaNs())
10503 return Res;
10504
10505 // Propagate NaNs.
10506 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10507 // Check if any of the elements in Vec is NaN.
10508 SDValue IsNaN = DAG.getNode(
10509 RISCVISD::SETCC_VL, DL, PredVT,
10510 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10511 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10512 // Check if the start value is NaN.
10513 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10514 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10515 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10516 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10517 MVT ResVT = Res.getSimpleValueType();
10518 return DAG.getSelect(
10519 DL, ResVT, NoNaNs, Res,
10520 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10521}
10522
10523SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10524 SelectionDAG &DAG) const {
10525 SDValue Vec = Op.getOperand(0);
10526 SDValue SubVec = Op.getOperand(1);
10527 MVT VecVT = Vec.getSimpleValueType();
10528 MVT SubVecVT = SubVec.getSimpleValueType();
10529
10530 SDLoc DL(Op);
10531 MVT XLenVT = Subtarget.getXLenVT();
10532 unsigned OrigIdx = Op.getConstantOperandVal(2);
10533 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10534
10535 if (OrigIdx == 0 && Vec.isUndef())
10536 return Op;
10537
10538 // We don't have the ability to slide mask vectors up indexed by their i1
10539 // elements; the smallest we can do is i8. Often we are able to bitcast to
10540 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10541 // into a scalable one, we might not necessarily have enough scalable
10542 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10543 if (SubVecVT.getVectorElementType() == MVT::i1) {
10544 if (VecVT.getVectorMinNumElements() >= 8 &&
10545 SubVecVT.getVectorMinNumElements() >= 8) {
10546 assert(OrigIdx % 8 == 0 && "Invalid index");
10547 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10548 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10549 "Unexpected mask vector lowering");
10550 OrigIdx /= 8;
10551 SubVecVT =
10552 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10553 SubVecVT.isScalableVector());
10554 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10555 VecVT.isScalableVector());
10556 Vec = DAG.getBitcast(VecVT, Vec);
10557 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10558 } else {
10559 // We can't slide this mask vector up indexed by its i1 elements.
10560 // This poses a problem when we wish to insert a scalable vector which
10561 // can't be re-expressed as a larger type. Just choose the slow path and
10562 // extend to a larger type, then truncate back down.
10563 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10564 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10565 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10566 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10567 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10568 Op.getOperand(2));
10569 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10570 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10571 }
10572 }
10573
10574 // If the subvector vector is a fixed-length type and we don't know VLEN
10575 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10576 // don't know which register of a LMUL group contains the specific subvector
10577 // as we only know the minimum register size. Therefore we must slide the
10578 // vector group up the full amount.
10579 const auto VLen = Subtarget.getRealVLen();
10580 if (SubVecVT.isFixedLengthVector() && !VLen) {
10581 MVT ContainerVT = VecVT;
10582 if (VecVT.isFixedLengthVector()) {
10583 ContainerVT = getContainerForFixedLengthVector(VecVT);
10584 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10585 }
10586
10587 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10588 DAG.getUNDEF(ContainerVT), SubVec,
10589 DAG.getVectorIdxConstant(0, DL));
10590
10591 SDValue Mask =
10592 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10593 // Set the vector length to only the number of elements we care about. Note
10594 // that for slideup this includes the offset.
10595 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10596 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10597
10598 // Use tail agnostic policy if we're inserting over Vec's tail.
10600 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10601 Policy = RISCVII::TAIL_AGNOSTIC;
10602
10603 // If we're inserting into the lowest elements, use a tail undisturbed
10604 // vmv.v.v.
10605 if (OrigIdx == 0) {
10606 SubVec =
10607 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10608 } else {
10609 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10610 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10611 SlideupAmt, Mask, VL, Policy);
10612 }
10613
10614 if (VecVT.isFixedLengthVector())
10615 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10616 return DAG.getBitcast(Op.getValueType(), SubVec);
10617 }
10618
10619 MVT ContainerVecVT = VecVT;
10620 if (VecVT.isFixedLengthVector()) {
10621 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10622 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10623 }
10624
10625 MVT ContainerSubVecVT = SubVecVT;
10626 if (SubVecVT.isFixedLengthVector()) {
10627 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10628 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10629 }
10630
10631 unsigned SubRegIdx;
10632 ElementCount RemIdx;
10633 // insert_subvector scales the index by vscale if the subvector is scalable,
10634 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10635 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10636 if (SubVecVT.isFixedLengthVector()) {
10637 assert(VLen);
10638 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10639 auto Decompose =
10641 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10642 SubRegIdx = Decompose.first;
10643 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10644 (OrigIdx % Vscale));
10645 } else {
10646 auto Decompose =
10648 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10649 SubRegIdx = Decompose.first;
10650 RemIdx = ElementCount::getScalable(Decompose.second);
10651 }
10652
10655 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10656 bool ExactlyVecRegSized =
10657 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10658 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10659
10660 // 1. If the Idx has been completely eliminated and this subvector's size is
10661 // a vector register or a multiple thereof, or the surrounding elements are
10662 // undef, then this is a subvector insert which naturally aligns to a vector
10663 // register. These can easily be handled using subregister manipulation.
10664 // 2. If the subvector isn't an exact multiple of a valid register group size,
10665 // then the insertion must preserve the undisturbed elements of the register.
10666 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10667 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10668 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10669 // of that LMUL=1 type back into the larger vector (resolving to another
10670 // subregister operation). See below for how our VSLIDEUP works. We go via a
10671 // LMUL=1 type to avoid allocating a large register group to hold our
10672 // subvector.
10673 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10674 if (SubVecVT.isFixedLengthVector()) {
10675 // We may get NoSubRegister if inserting at index 0 and the subvec
10676 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10677 if (SubRegIdx == RISCV::NoSubRegister) {
10678 assert(OrigIdx == 0);
10679 return Op;
10680 }
10681
10682 // Use a insert_subvector that will resolve to an insert subreg.
10683 assert(VLen);
10684 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10685 SDValue Insert =
10686 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10687 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10688 if (VecVT.isFixedLengthVector())
10689 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10690 return Insert;
10691 }
10692 return Op;
10693 }
10694
10695 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10696 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10697 // (in our case undisturbed). This means we can set up a subvector insertion
10698 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10699 // size of the subvector.
10700 MVT InterSubVT = ContainerVecVT;
10701 SDValue AlignedExtract = Vec;
10702 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10703 if (SubVecVT.isFixedLengthVector()) {
10704 assert(VLen);
10705 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10706 }
10707 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10708 InterSubVT = getLMUL1VT(ContainerVecVT);
10709 // Extract a subvector equal to the nearest full vector register type. This
10710 // should resolve to a EXTRACT_SUBREG instruction.
10711 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10712 DAG.getVectorIdxConstant(AlignedIdx, DL));
10713 }
10714
10715 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10716 DAG.getUNDEF(InterSubVT), SubVec,
10717 DAG.getVectorIdxConstant(0, DL));
10718
10719 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10720
10721 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10722 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10723
10724 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10726 if (Subtarget.expandVScale(EndIndex) ==
10727 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10728 Policy = RISCVII::TAIL_AGNOSTIC;
10729
10730 // If we're inserting into the lowest elements, use a tail undisturbed
10731 // vmv.v.v.
10732 if (RemIdx.isZero()) {
10733 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10734 SubVec, VL);
10735 } else {
10736 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10737
10738 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10739 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10740
10741 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10742 SlideupAmt, Mask, VL, Policy);
10743 }
10744
10745 // If required, insert this subvector back into the correct vector register.
10746 // This should resolve to an INSERT_SUBREG instruction.
10747 if (ContainerVecVT.bitsGT(InterSubVT))
10748 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10749 DAG.getVectorIdxConstant(AlignedIdx, DL));
10750
10751 if (VecVT.isFixedLengthVector())
10752 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10753
10754 // We might have bitcast from a mask type: cast back to the original type if
10755 // required.
10756 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10757}
10758
10759SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10760 SelectionDAG &DAG) const {
10761 SDValue Vec = Op.getOperand(0);
10762 MVT SubVecVT = Op.getSimpleValueType();
10763 MVT VecVT = Vec.getSimpleValueType();
10764
10765 SDLoc DL(Op);
10766 MVT XLenVT = Subtarget.getXLenVT();
10767 unsigned OrigIdx = Op.getConstantOperandVal(1);
10768 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10769
10770 // With an index of 0 this is a cast-like subvector, which can be performed
10771 // with subregister operations.
10772 if (OrigIdx == 0)
10773 return Op;
10774
10775 // We don't have the ability to slide mask vectors down indexed by their i1
10776 // elements; the smallest we can do is i8. Often we are able to bitcast to
10777 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10778 // from a scalable one, we might not necessarily have enough scalable
10779 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10780 if (SubVecVT.getVectorElementType() == MVT::i1) {
10781 if (VecVT.getVectorMinNumElements() >= 8 &&
10782 SubVecVT.getVectorMinNumElements() >= 8) {
10783 assert(OrigIdx % 8 == 0 && "Invalid index");
10784 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10785 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10786 "Unexpected mask vector lowering");
10787 OrigIdx /= 8;
10788 SubVecVT =
10789 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10790 SubVecVT.isScalableVector());
10791 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10792 VecVT.isScalableVector());
10793 Vec = DAG.getBitcast(VecVT, Vec);
10794 } else {
10795 // We can't slide this mask vector down, indexed by its i1 elements.
10796 // This poses a problem when we wish to extract a scalable vector which
10797 // can't be re-expressed as a larger type. Just choose the slow path and
10798 // extend to a larger type, then truncate back down.
10799 // TODO: We could probably improve this when extracting certain fixed
10800 // from fixed, where we can extract as i8 and shift the correct element
10801 // right to reach the desired subvector?
10802 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10803 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10804 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10805 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10806 Op.getOperand(1));
10807 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10808 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10809 }
10810 }
10811
10812 const auto VLen = Subtarget.getRealVLen();
10813
10814 // If the subvector vector is a fixed-length type and we don't know VLEN
10815 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10816 // don't know which register of a LMUL group contains the specific subvector
10817 // as we only know the minimum register size. Therefore we must slide the
10818 // vector group down the full amount.
10819 if (SubVecVT.isFixedLengthVector() && !VLen) {
10820 MVT ContainerVT = VecVT;
10821 if (VecVT.isFixedLengthVector()) {
10822 ContainerVT = getContainerForFixedLengthVector(VecVT);
10823 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10824 }
10825
10826 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10827 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10828 if (auto ShrunkVT =
10829 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10830 ContainerVT = *ShrunkVT;
10831 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10832 DAG.getVectorIdxConstant(0, DL));
10833 }
10834
10835 SDValue Mask =
10836 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10837 // Set the vector length to only the number of elements we care about. This
10838 // avoids sliding down elements we're going to discard straight away.
10839 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10840 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10841 SDValue Slidedown =
10842 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10843 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10844 // Now we can use a cast-like subvector extract to get the result.
10845 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10846 DAG.getVectorIdxConstant(0, DL));
10847 return DAG.getBitcast(Op.getValueType(), Slidedown);
10848 }
10849
10850 if (VecVT.isFixedLengthVector()) {
10851 VecVT = getContainerForFixedLengthVector(VecVT);
10852 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10853 }
10854
10855 MVT ContainerSubVecVT = SubVecVT;
10856 if (SubVecVT.isFixedLengthVector())
10857 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10858
10859 unsigned SubRegIdx;
10860 ElementCount RemIdx;
10861 // extract_subvector scales the index by vscale if the subvector is scalable,
10862 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10863 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10864 if (SubVecVT.isFixedLengthVector()) {
10865 assert(VLen);
10866 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10867 auto Decompose =
10869 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10870 SubRegIdx = Decompose.first;
10871 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10872 (OrigIdx % Vscale));
10873 } else {
10874 auto Decompose =
10876 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10877 SubRegIdx = Decompose.first;
10878 RemIdx = ElementCount::getScalable(Decompose.second);
10879 }
10880
10881 // If the Idx has been completely eliminated then this is a subvector extract
10882 // which naturally aligns to a vector register. These can easily be handled
10883 // using subregister manipulation. We use an extract_subvector that will
10884 // resolve to an extract subreg.
10885 if (RemIdx.isZero()) {
10886 if (SubVecVT.isFixedLengthVector()) {
10887 assert(VLen);
10888 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10889 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10890 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10891 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10892 }
10893 return Op;
10894 }
10895
10896 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10897 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10898 // divide exactly.
10899 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10900 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10901
10902 // If the vector type is an LMUL-group type, extract a subvector equal to the
10903 // nearest full vector register type.
10904 MVT InterSubVT = VecVT;
10905 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10906 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10907 // we should have successfully decomposed the extract into a subregister.
10908 // We use an extract_subvector that will resolve to a subreg extract.
10909 assert(SubRegIdx != RISCV::NoSubRegister);
10910 (void)SubRegIdx;
10911 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10912 if (SubVecVT.isFixedLengthVector()) {
10913 assert(VLen);
10914 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10915 }
10916 InterSubVT = getLMUL1VT(VecVT);
10917 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10918 DAG.getConstant(Idx, DL, XLenVT));
10919 }
10920
10921 // Slide this vector register down by the desired number of elements in order
10922 // to place the desired subvector starting at element 0.
10923 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10924 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10925 if (SubVecVT.isFixedLengthVector())
10926 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10927 SDValue Slidedown =
10928 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10929 Vec, SlidedownAmt, Mask, VL);
10930
10931 // Now the vector is in the right position, extract our final subvector. This
10932 // should resolve to a COPY.
10933 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10934 DAG.getVectorIdxConstant(0, DL));
10935
10936 // We might have bitcast from a mask type: cast back to the original type if
10937 // required.
10938 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10939}
10940
10941// Widen a vector's operands to i8, then truncate its results back to the
10942// original type, typically i1. All operand and result types must be the same.
10944 SelectionDAG &DAG) {
10945 MVT VT = N.getSimpleValueType();
10946 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10948 for (SDValue Op : N->ops()) {
10949 assert(Op.getSimpleValueType() == VT &&
10950 "Operands and result must be same type");
10951 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10952 }
10953
10954 unsigned NumVals = N->getNumValues();
10955
10957 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10958 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10959 SmallVector<SDValue, 4> TruncVals;
10960 for (unsigned I = 0; I < NumVals; I++) {
10961 TruncVals.push_back(
10962 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10963 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10964 }
10965
10966 if (TruncVals.size() > 1)
10967 return DAG.getMergeValues(TruncVals, DL);
10968 return TruncVals.front();
10969}
10970
10971SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10972 SelectionDAG &DAG) const {
10973 SDLoc DL(Op);
10974 MVT VecVT = Op.getSimpleValueType();
10975
10976 assert(VecVT.isScalableVector() &&
10977 "vector_interleave on non-scalable vector!");
10978
10979 // 1 bit element vectors need to be widened to e8
10980 if (VecVT.getVectorElementType() == MVT::i1)
10981 return widenVectorOpsToi8(Op, DL, DAG);
10982
10983 // If the VT is LMUL=8, we need to split and reassemble.
10984 if (VecVT.getSizeInBits().getKnownMinValue() ==
10985 (8 * RISCV::RVVBitsPerBlock)) {
10986 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10987 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10988 EVT SplitVT = Op0Lo.getValueType();
10989
10991 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10993 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10994
10995 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10996 ResLo.getValue(0), ResHi.getValue(0));
10997 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10998 ResHi.getValue(1));
10999 return DAG.getMergeValues({Even, Odd}, DL);
11000 }
11001
11002 // Concatenate the two vectors as one vector to deinterleave
11003 MVT ConcatVT =
11006 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11007 Op.getOperand(0), Op.getOperand(1));
11008
11009 // We can deinterleave through vnsrl.wi if the element type is smaller than
11010 // ELEN
11011 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11012 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11013 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11014 return DAG.getMergeValues({Even, Odd}, DL);
11015 }
11016
11017 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11018 // possibly mask vector, then extract the required subvector. Doing this
11019 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11020 // creation to be rematerialized during register allocation to reduce
11021 // register pressure if needed.
11022
11023 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11024
11025 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11026 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11027 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11028 DAG.getVectorIdxConstant(0, DL));
11029
11030 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11031 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11032 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11033 DAG.getVectorIdxConstant(0, DL));
11034
11035 // vcompress the even and odd elements into two separate vectors
11036 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11037 EvenMask, DAG.getUNDEF(ConcatVT));
11038 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11039 OddMask, DAG.getUNDEF(ConcatVT));
11040
11041 // Extract the result half of the gather for even and odd
11042 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11043 DAG.getVectorIdxConstant(0, DL));
11044 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11045 DAG.getVectorIdxConstant(0, DL));
11046
11047 return DAG.getMergeValues({Even, Odd}, DL);
11048}
11049
11050SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11051 SelectionDAG &DAG) const {
11052 SDLoc DL(Op);
11053 MVT VecVT = Op.getSimpleValueType();
11054
11055 assert(VecVT.isScalableVector() &&
11056 "vector_interleave on non-scalable vector!");
11057
11058 // i1 vectors need to be widened to i8
11059 if (VecVT.getVectorElementType() == MVT::i1)
11060 return widenVectorOpsToi8(Op, DL, DAG);
11061
11062 MVT XLenVT = Subtarget.getXLenVT();
11063 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11064
11065 // If the VT is LMUL=8, we need to split and reassemble.
11066 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11067 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11068 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11069 EVT SplitVT = Op0Lo.getValueType();
11070
11072 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11074 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11075
11076 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11077 ResLo.getValue(0), ResLo.getValue(1));
11078 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11079 ResHi.getValue(0), ResHi.getValue(1));
11080 return DAG.getMergeValues({Lo, Hi}, DL);
11081 }
11082
11083 SDValue Interleaved;
11084
11085 // If the element type is smaller than ELEN, then we can interleave with
11086 // vwaddu.vv and vwmaccu.vx
11087 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11088 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11089 DAG, Subtarget);
11090 } else {
11091 // Otherwise, fallback to using vrgathere16.vv
11092 MVT ConcatVT =
11095 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11096 Op.getOperand(0), Op.getOperand(1));
11097
11098 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11099
11100 // 0 1 2 3 4 5 6 7 ...
11101 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11102
11103 // 1 1 1 1 1 1 1 1 ...
11104 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11105
11106 // 1 0 1 0 1 0 1 0 ...
11107 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11108 OddMask = DAG.getSetCC(
11109 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11110 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11112
11113 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11114
11115 // Build up the index vector for interleaving the concatenated vector
11116 // 0 0 1 1 2 2 3 3 ...
11117 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11118 // 0 n 1 n+1 2 n+2 3 n+3 ...
11119 Idx =
11120 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11121
11122 // Then perform the interleave
11123 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11124 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11125 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11126 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11127 }
11128
11129 // Extract the two halves from the interleaved result
11130 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11131 DAG.getVectorIdxConstant(0, DL));
11132 SDValue Hi = DAG.getNode(
11133 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11135
11136 return DAG.getMergeValues({Lo, Hi}, DL);
11137}
11138
11139// Lower step_vector to the vid instruction. Any non-identity step value must
11140// be accounted for my manual expansion.
11141SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11142 SelectionDAG &DAG) const {
11143 SDLoc DL(Op);
11144 MVT VT = Op.getSimpleValueType();
11145 assert(VT.isScalableVector() && "Expected scalable vector");
11146 MVT XLenVT = Subtarget.getXLenVT();
11147 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11148 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11149 uint64_t StepValImm = Op.getConstantOperandVal(0);
11150 if (StepValImm != 1) {
11151 if (isPowerOf2_64(StepValImm)) {
11152 SDValue StepVal =
11153 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11154 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11155 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11156 } else {
11157 SDValue StepVal = lowerScalarSplat(
11158 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11159 VL, VT, DL, DAG, Subtarget);
11160 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11161 }
11162 }
11163 return StepVec;
11164}
11165
11166// Implement vector_reverse using vrgather.vv with indices determined by
11167// subtracting the id of each element from (VLMAX-1). This will convert
11168// the indices like so:
11169// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11170// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11171SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11172 SelectionDAG &DAG) const {
11173 SDLoc DL(Op);
11174 MVT VecVT = Op.getSimpleValueType();
11175 if (VecVT.getVectorElementType() == MVT::i1) {
11176 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11177 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11178 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11179 return DAG.getSetCC(DL, VecVT, Op2,
11180 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11181 }
11182
11183 MVT ContainerVT = VecVT;
11184 SDValue Vec = Op.getOperand(0);
11185 if (VecVT.isFixedLengthVector()) {
11186 ContainerVT = getContainerForFixedLengthVector(VecVT);
11187 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11188 }
11189
11190 MVT XLenVT = Subtarget.getXLenVT();
11191 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11192
11193 // On some uarchs vrgather.vv will read from every input register for each
11194 // output register, regardless of the indices. However to reverse a vector
11195 // each output register only needs to read from one register. So decompose it
11196 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11197 // O(LMUL^2).
11198 //
11199 // vsetvli a1, zero, e64, m4, ta, ma
11200 // vrgatherei16.vv v12, v8, v16
11201 // ->
11202 // vsetvli a1, zero, e64, m1, ta, ma
11203 // vrgather.vv v15, v8, v16
11204 // vrgather.vv v14, v9, v16
11205 // vrgather.vv v13, v10, v16
11206 // vrgather.vv v12, v11, v16
11207 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11208 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11209 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11210 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11211 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11212 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11213
11214 // Fixed length vectors might not fit exactly into their container, and so
11215 // leave a gap in the front of the vector after being reversed. Slide this
11216 // away.
11217 //
11218 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11219 // 0 1 2 3 x x x x <- reverse
11220 // x x x x 0 1 2 3 <- vslidedown.vx
11221 if (VecVT.isFixedLengthVector()) {
11222 SDValue Offset = DAG.getNode(
11223 ISD::SUB, DL, XLenVT,
11224 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11225 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11226 Concat =
11227 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11228 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11229 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11230 }
11231 return Concat;
11232 }
11233
11234 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11235 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11236 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11237 unsigned MaxVLMAX =
11238 VecVT.isFixedLengthVector()
11239 ? VecVT.getVectorNumElements()
11240 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11241
11242 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11243 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11244
11245 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11246 // to use vrgatherei16.vv.
11247 if (MaxVLMAX > 256 && EltSize == 8) {
11248 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11249 // Reverse each half, then reassemble them in reverse order.
11250 // NOTE: It's also possible that after splitting that VLMAX no longer
11251 // requires vrgatherei16.vv.
11252 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11253 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11254 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11255 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11256 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11257 // Reassemble the low and high pieces reversed.
11258 // FIXME: This is a CONCAT_VECTORS.
11259 SDValue Res =
11260 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11261 DAG.getVectorIdxConstant(0, DL));
11262 return DAG.getNode(
11263 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11264 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11265 }
11266
11267 // Just promote the int type to i16 which will double the LMUL.
11268 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11269 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11270 }
11271
11272 // At LMUL > 1, do the index computation in 16 bits to reduce register
11273 // pressure.
11274 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11275 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11276 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11277 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11278 IntVT = IntVT.changeVectorElementType(MVT::i16);
11279 }
11280
11281 // Calculate VLMAX-1 for the desired SEW.
11282 SDValue VLMinus1 = DAG.getNode(
11283 ISD::SUB, DL, XLenVT,
11284 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11285 DAG.getConstant(1, DL, XLenVT));
11286
11287 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11288 bool IsRV32E64 =
11289 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11290 SDValue SplatVL;
11291 if (!IsRV32E64)
11292 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11293 else
11294 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11295 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11296
11297 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11298 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11299 DAG.getUNDEF(IntVT), Mask, VL);
11300
11301 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11302 DAG.getUNDEF(ContainerVT), Mask, VL);
11303 if (VecVT.isFixedLengthVector())
11304 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11305 return Gather;
11306}
11307
11308SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11309 SelectionDAG &DAG) const {
11310 SDLoc DL(Op);
11311 SDValue V1 = Op.getOperand(0);
11312 SDValue V2 = Op.getOperand(1);
11313 MVT XLenVT = Subtarget.getXLenVT();
11314 MVT VecVT = Op.getSimpleValueType();
11315
11316 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11317
11318 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11319 SDValue DownOffset, UpOffset;
11320 if (ImmValue >= 0) {
11321 // The operand is a TargetConstant, we need to rebuild it as a regular
11322 // constant.
11323 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11324 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11325 } else {
11326 // The operand is a TargetConstant, we need to rebuild it as a regular
11327 // constant rather than negating the original operand.
11328 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11329 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11330 }
11331
11332 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11333
11334 SDValue SlideDown =
11335 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11336 DownOffset, TrueMask, UpOffset);
11337 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11338 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11340}
11341
11342SDValue
11343RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11344 SelectionDAG &DAG) const {
11345 SDLoc DL(Op);
11346 auto *Load = cast<LoadSDNode>(Op);
11347
11349 Load->getMemoryVT(),
11350 *Load->getMemOperand()) &&
11351 "Expecting a correctly-aligned load");
11352
11353 MVT VT = Op.getSimpleValueType();
11354 MVT XLenVT = Subtarget.getXLenVT();
11355 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11356
11357 // If we know the exact VLEN and our fixed length vector completely fills
11358 // the container, use a whole register load instead.
11359 const auto [MinVLMAX, MaxVLMAX] =
11360 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11361 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11362 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11363 MachineMemOperand *MMO = Load->getMemOperand();
11364 SDValue NewLoad =
11365 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11366 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11367 MMO->getAAInfo(), MMO->getRanges());
11368 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11369 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11370 }
11371
11372 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11373
11374 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11375 SDValue IntID = DAG.getTargetConstant(
11376 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11377 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11378 if (!IsMaskOp)
11379 Ops.push_back(DAG.getUNDEF(ContainerVT));
11380 Ops.push_back(Load->getBasePtr());
11381 Ops.push_back(VL);
11382 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11383 SDValue NewLoad =
11385 Load->getMemoryVT(), Load->getMemOperand());
11386
11387 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11388 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11389}
11390
11391SDValue
11392RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11393 SelectionDAG &DAG) const {
11394 SDLoc DL(Op);
11395 auto *Store = cast<StoreSDNode>(Op);
11396
11398 Store->getMemoryVT(),
11399 *Store->getMemOperand()) &&
11400 "Expecting a correctly-aligned store");
11401
11402 SDValue StoreVal = Store->getValue();
11403 MVT VT = StoreVal.getSimpleValueType();
11404 MVT XLenVT = Subtarget.getXLenVT();
11405
11406 // If the size less than a byte, we need to pad with zeros to make a byte.
11407 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11408 VT = MVT::v8i1;
11409 StoreVal =
11410 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11411 StoreVal, DAG.getVectorIdxConstant(0, DL));
11412 }
11413
11414 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11415
11416 SDValue NewValue =
11417 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11418
11419 // If we know the exact VLEN and our fixed length vector completely fills
11420 // the container, use a whole register store instead.
11421 const auto [MinVLMAX, MaxVLMAX] =
11422 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11423 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11424 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11425 MachineMemOperand *MMO = Store->getMemOperand();
11426 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11427 MMO->getPointerInfo(), MMO->getBaseAlign(),
11428 MMO->getFlags(), MMO->getAAInfo());
11429 }
11430
11431 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11432
11433 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11434 SDValue IntID = DAG.getTargetConstant(
11435 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11436 return DAG.getMemIntrinsicNode(
11437 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11438 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11439 Store->getMemoryVT(), Store->getMemOperand());
11440}
11441
11442SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11443 SelectionDAG &DAG) const {
11444 SDLoc DL(Op);
11445 MVT VT = Op.getSimpleValueType();
11446
11447 const auto *MemSD = cast<MemSDNode>(Op);
11448 EVT MemVT = MemSD->getMemoryVT();
11449 MachineMemOperand *MMO = MemSD->getMemOperand();
11450 SDValue Chain = MemSD->getChain();
11451 SDValue BasePtr = MemSD->getBasePtr();
11452
11453 SDValue Mask, PassThru, VL;
11454 bool IsExpandingLoad = false;
11455 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11456 Mask = VPLoad->getMask();
11457 PassThru = DAG.getUNDEF(VT);
11458 VL = VPLoad->getVectorLength();
11459 } else {
11460 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11461 Mask = MLoad->getMask();
11462 PassThru = MLoad->getPassThru();
11463 IsExpandingLoad = MLoad->isExpandingLoad();
11464 }
11465
11466 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11467
11468 MVT XLenVT = Subtarget.getXLenVT();
11469
11470 MVT ContainerVT = VT;
11471 if (VT.isFixedLengthVector()) {
11472 ContainerVT = getContainerForFixedLengthVector(VT);
11473 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11474 if (!IsUnmasked) {
11475 MVT MaskVT = getMaskTypeFor(ContainerVT);
11476 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11477 }
11478 }
11479
11480 if (!VL)
11481 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11482
11483 SDValue ExpandingVL;
11484 if (!IsUnmasked && IsExpandingLoad) {
11485 ExpandingVL = VL;
11486 VL =
11487 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11488 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11489 }
11490
11491 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11492 : Intrinsic::riscv_vle_mask;
11493 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11494 if (IntID == Intrinsic::riscv_vle)
11495 Ops.push_back(DAG.getUNDEF(ContainerVT));
11496 else
11497 Ops.push_back(PassThru);
11498 Ops.push_back(BasePtr);
11499 if (IntID == Intrinsic::riscv_vle_mask)
11500 Ops.push_back(Mask);
11501 Ops.push_back(VL);
11502 if (IntID == Intrinsic::riscv_vle_mask)
11503 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11504
11505 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11506
11507 SDValue Result =
11508 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11509 Chain = Result.getValue(1);
11510 if (ExpandingVL) {
11511 MVT IndexVT = ContainerVT;
11512 if (ContainerVT.isFloatingPoint())
11513 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11514
11515 MVT IndexEltVT = IndexVT.getVectorElementType();
11516 bool UseVRGATHEREI16 = false;
11517 // If index vector is an i8 vector and the element count exceeds 256, we
11518 // should change the element type of index vector to i16 to avoid
11519 // overflow.
11520 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11521 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11522 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11523 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11524 UseVRGATHEREI16 = true;
11525 }
11526
11527 SDValue Iota =
11528 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11529 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11530 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11531 Result =
11532 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11534 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11535 }
11536
11537 if (VT.isFixedLengthVector())
11538 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11539
11540 return DAG.getMergeValues({Result, Chain}, DL);
11541}
11542
11543SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11544 SelectionDAG &DAG) const {
11545 SDLoc DL(Op);
11546
11547 const auto *MemSD = cast<MemSDNode>(Op);
11548 EVT MemVT = MemSD->getMemoryVT();
11549 MachineMemOperand *MMO = MemSD->getMemOperand();
11550 SDValue Chain = MemSD->getChain();
11551 SDValue BasePtr = MemSD->getBasePtr();
11552 SDValue Val, Mask, VL;
11553
11554 bool IsCompressingStore = false;
11555 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11556 Val = VPStore->getValue();
11557 Mask = VPStore->getMask();
11558 VL = VPStore->getVectorLength();
11559 } else {
11560 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11561 Val = MStore->getValue();
11562 Mask = MStore->getMask();
11563 IsCompressingStore = MStore->isCompressingStore();
11564 }
11565
11566 bool IsUnmasked =
11567 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11568
11569 MVT VT = Val.getSimpleValueType();
11570 MVT XLenVT = Subtarget.getXLenVT();
11571
11572 MVT ContainerVT = VT;
11573 if (VT.isFixedLengthVector()) {
11574 ContainerVT = getContainerForFixedLengthVector(VT);
11575
11576 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11577 if (!IsUnmasked || IsCompressingStore) {
11578 MVT MaskVT = getMaskTypeFor(ContainerVT);
11579 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11580 }
11581 }
11582
11583 if (!VL)
11584 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11585
11586 if (IsCompressingStore) {
11587 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11588 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11589 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11590 VL =
11591 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11592 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11593 }
11594
11595 unsigned IntID =
11596 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11597 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11598 Ops.push_back(Val);
11599 Ops.push_back(BasePtr);
11600 if (!IsUnmasked)
11601 Ops.push_back(Mask);
11602 Ops.push_back(VL);
11603
11605 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11606}
11607
11608SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11609 SelectionDAG &DAG) const {
11610 SDLoc DL(Op);
11611 SDValue Val = Op.getOperand(0);
11612 SDValue Mask = Op.getOperand(1);
11613 SDValue Passthru = Op.getOperand(2);
11614
11615 MVT VT = Val.getSimpleValueType();
11616 MVT XLenVT = Subtarget.getXLenVT();
11617 MVT ContainerVT = VT;
11618 if (VT.isFixedLengthVector()) {
11619 ContainerVT = getContainerForFixedLengthVector(VT);
11620 MVT MaskVT = getMaskTypeFor(ContainerVT);
11621 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11622 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11623 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11624 }
11625
11626 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11627 SDValue Res =
11628 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11629 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11630 Passthru, Val, Mask, VL);
11631
11632 if (VT.isFixedLengthVector())
11633 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11634
11635 return Res;
11636}
11637
11638SDValue
11639RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11640 SelectionDAG &DAG) const {
11641 MVT InVT = Op.getOperand(0).getSimpleValueType();
11642 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11643
11644 MVT VT = Op.getSimpleValueType();
11645
11646 SDValue Op1 =
11647 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11648 SDValue Op2 =
11649 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11650
11651 SDLoc DL(Op);
11652 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11653 DAG, Subtarget);
11654 MVT MaskVT = getMaskTypeFor(ContainerVT);
11655
11656 SDValue Cmp =
11657 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11658 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11659
11660 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11661}
11662
11663SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11664 SelectionDAG &DAG) const {
11665 unsigned Opc = Op.getOpcode();
11666 SDLoc DL(Op);
11667 SDValue Chain = Op.getOperand(0);
11668 SDValue Op1 = Op.getOperand(1);
11669 SDValue Op2 = Op.getOperand(2);
11670 SDValue CC = Op.getOperand(3);
11671 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11672 MVT VT = Op.getSimpleValueType();
11673 MVT InVT = Op1.getSimpleValueType();
11674
11675 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11676 // condition code.
11677 if (Opc == ISD::STRICT_FSETCCS) {
11678 // Expand strict_fsetccs(x, oeq) to
11679 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11680 SDVTList VTList = Op->getVTList();
11681 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11682 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11683 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11684 Op2, OLECCVal);
11685 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11686 Op1, OLECCVal);
11687 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11688 Tmp1.getValue(1), Tmp2.getValue(1));
11689 // Tmp1 and Tmp2 might be the same node.
11690 if (Tmp1 != Tmp2)
11691 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11692 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11693 }
11694
11695 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11696 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11697 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11698 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11699 Op2, OEQCCVal);
11700 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11701 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11702 }
11703 }
11704
11705 MVT ContainerInVT = InVT;
11706 if (InVT.isFixedLengthVector()) {
11707 ContainerInVT = getContainerForFixedLengthVector(InVT);
11708 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11709 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11710 }
11711 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11712
11713 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11714
11715 SDValue Res;
11716 if (Opc == ISD::STRICT_FSETCC &&
11717 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11718 CCVal == ISD::SETOLE)) {
11719 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11720 // active when both input elements are ordered.
11721 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11722 SDValue OrderMask1 = DAG.getNode(
11723 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11724 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11725 True, VL});
11726 SDValue OrderMask2 = DAG.getNode(
11727 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11728 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11729 True, VL});
11730 Mask =
11731 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11732 // Use Mask as the passthru operand to let the result be 0 if either of the
11733 // inputs is unordered.
11735 DAG.getVTList(MaskVT, MVT::Other),
11736 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11737 } else {
11738 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11740 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11741 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11742 }
11743
11744 if (VT.isFixedLengthVector()) {
11745 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11746 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11747 }
11748 return Res;
11749}
11750
11751// Lower vector ABS to smax(X, sub(0, X)).
11752SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11753 SDLoc DL(Op);
11754 MVT VT = Op.getSimpleValueType();
11755 SDValue X = Op.getOperand(0);
11756
11757 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11758 "Unexpected type for ISD::ABS");
11759
11760 MVT ContainerVT = VT;
11761 if (VT.isFixedLengthVector()) {
11762 ContainerVT = getContainerForFixedLengthVector(VT);
11763 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11764 }
11765
11766 SDValue Mask, VL;
11767 if (Op->getOpcode() == ISD::VP_ABS) {
11768 Mask = Op->getOperand(1);
11769 if (VT.isFixedLengthVector())
11770 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11771 Subtarget);
11772 VL = Op->getOperand(2);
11773 } else
11774 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11775
11776 SDValue SplatZero = DAG.getNode(
11777 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11778 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11779 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11780 DAG.getUNDEF(ContainerVT), Mask, VL);
11781 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11782 DAG.getUNDEF(ContainerVT), Mask, VL);
11783
11784 if (VT.isFixedLengthVector())
11785 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11786 return Max;
11787}
11788
11789SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11790 SDValue Op, SelectionDAG &DAG) const {
11791 SDLoc DL(Op);
11792 MVT VT = Op.getSimpleValueType();
11793 SDValue Mag = Op.getOperand(0);
11794 SDValue Sign = Op.getOperand(1);
11795 assert(Mag.getValueType() == Sign.getValueType() &&
11796 "Can only handle COPYSIGN with matching types.");
11797
11798 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11799 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11800 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11801
11802 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11803
11804 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11805 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11806
11807 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11808}
11809
11810SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11811 SDValue Op, SelectionDAG &DAG) const {
11812 MVT VT = Op.getSimpleValueType();
11813 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11814
11815 MVT I1ContainerVT =
11816 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11817
11818 SDValue CC =
11819 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11820 SDValue Op1 =
11821 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11822 SDValue Op2 =
11823 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11824
11825 SDLoc DL(Op);
11826 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11827
11828 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11829 Op2, DAG.getUNDEF(ContainerVT), VL);
11830
11831 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11832}
11833
11834SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11835 SelectionDAG &DAG) const {
11836 unsigned NewOpc = getRISCVVLOp(Op);
11837 bool HasPassthruOp = hasPassthruOp(NewOpc);
11838 bool HasMask = hasMaskOp(NewOpc);
11839
11840 MVT VT = Op.getSimpleValueType();
11841 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11842
11843 // Create list of operands by converting existing ones to scalable types.
11845 for (const SDValue &V : Op->op_values()) {
11846 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11847
11848 // Pass through non-vector operands.
11849 if (!V.getValueType().isVector()) {
11850 Ops.push_back(V);
11851 continue;
11852 }
11853
11854 // "cast" fixed length vector to a scalable vector.
11855 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11856 "Only fixed length vectors are supported!");
11857 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11858 }
11859
11860 SDLoc DL(Op);
11861 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11862 if (HasPassthruOp)
11863 Ops.push_back(DAG.getUNDEF(ContainerVT));
11864 if (HasMask)
11865 Ops.push_back(Mask);
11866 Ops.push_back(VL);
11867
11868 // StrictFP operations have two result values. Their lowered result should
11869 // have same result count.
11870 if (Op->isStrictFPOpcode()) {
11871 SDValue ScalableRes =
11872 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11873 Op->getFlags());
11874 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11875 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11876 }
11877
11878 SDValue ScalableRes =
11879 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11880 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11881}
11882
11883// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11884// * Operands of each node are assumed to be in the same order.
11885// * The EVL operand is promoted from i32 to i64 on RV64.
11886// * Fixed-length vectors are converted to their scalable-vector container
11887// types.
11888SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11889 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11890 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11891
11892 SDLoc DL(Op);
11893 MVT VT = Op.getSimpleValueType();
11895
11896 MVT ContainerVT = VT;
11897 if (VT.isFixedLengthVector())
11898 ContainerVT = getContainerForFixedLengthVector(VT);
11899
11900 for (const auto &OpIdx : enumerate(Op->ops())) {
11901 SDValue V = OpIdx.value();
11902 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11903 // Add dummy passthru value before the mask. Or if there isn't a mask,
11904 // before EVL.
11905 if (HasPassthruOp) {
11906 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11907 if (MaskIdx) {
11908 if (*MaskIdx == OpIdx.index())
11909 Ops.push_back(DAG.getUNDEF(ContainerVT));
11910 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11911 OpIdx.index()) {
11912 if (Op.getOpcode() == ISD::VP_MERGE) {
11913 // For VP_MERGE, copy the false operand instead of an undef value.
11914 Ops.push_back(Ops.back());
11915 } else {
11916 assert(Op.getOpcode() == ISD::VP_SELECT);
11917 // For VP_SELECT, add an undef value.
11918 Ops.push_back(DAG.getUNDEF(ContainerVT));
11919 }
11920 }
11921 }
11922 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11923 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11924 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11926 Subtarget.getXLenVT()));
11927 // Pass through operands which aren't fixed-length vectors.
11928 if (!V.getValueType().isFixedLengthVector()) {
11929 Ops.push_back(V);
11930 continue;
11931 }
11932 // "cast" fixed length vector to a scalable vector.
11933 MVT OpVT = V.getSimpleValueType();
11934 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11935 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11936 "Only fixed length vectors are supported!");
11937 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11938 }
11939
11940 if (!VT.isFixedLengthVector())
11941 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11942
11943 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11944
11945 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11946}
11947
11948SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11949 SelectionDAG &DAG) const {
11950 SDLoc DL(Op);
11951 MVT VT = Op.getSimpleValueType();
11952
11953 SDValue Src = Op.getOperand(0);
11954 // NOTE: Mask is dropped.
11955 SDValue VL = Op.getOperand(2);
11956
11957 MVT ContainerVT = VT;
11958 if (VT.isFixedLengthVector()) {
11959 ContainerVT = getContainerForFixedLengthVector(VT);
11960 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11961 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11962 }
11963
11964 MVT XLenVT = Subtarget.getXLenVT();
11965 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11966 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11967 DAG.getUNDEF(ContainerVT), Zero, VL);
11968
11969 SDValue SplatValue = DAG.getSignedConstant(
11970 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11971 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11972 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11973
11974 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11975 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11976 if (!VT.isFixedLengthVector())
11977 return Result;
11978 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11979}
11980
11981SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11982 SelectionDAG &DAG) const {
11983 SDLoc DL(Op);
11984 MVT VT = Op.getSimpleValueType();
11985
11986 SDValue Op1 = Op.getOperand(0);
11987 SDValue Op2 = Op.getOperand(1);
11988 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11989 // NOTE: Mask is dropped.
11990 SDValue VL = Op.getOperand(4);
11991
11992 MVT ContainerVT = VT;
11993 if (VT.isFixedLengthVector()) {
11994 ContainerVT = getContainerForFixedLengthVector(VT);
11995 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11996 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11997 }
11998
12000 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12001
12002 switch (Condition) {
12003 default:
12004 break;
12005 // X != Y --> (X^Y)
12006 case ISD::SETNE:
12007 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12008 break;
12009 // X == Y --> ~(X^Y)
12010 case ISD::SETEQ: {
12011 SDValue Temp =
12012 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12013 Result =
12014 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12015 break;
12016 }
12017 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12018 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12019 case ISD::SETGT:
12020 case ISD::SETULT: {
12021 SDValue Temp =
12022 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12023 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12024 break;
12025 }
12026 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12027 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12028 case ISD::SETLT:
12029 case ISD::SETUGT: {
12030 SDValue Temp =
12031 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12032 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12033 break;
12034 }
12035 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12036 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12037 case ISD::SETGE:
12038 case ISD::SETULE: {
12039 SDValue Temp =
12040 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12041 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12042 break;
12043 }
12044 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12045 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12046 case ISD::SETLE:
12047 case ISD::SETUGE: {
12048 SDValue Temp =
12049 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12050 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12051 break;
12052 }
12053 }
12054
12055 if (!VT.isFixedLengthVector())
12056 return Result;
12057 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12058}
12059
12060// Lower Floating-Point/Integer Type-Convert VP SDNodes
12061SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12062 SelectionDAG &DAG) const {
12063 SDLoc DL(Op);
12064
12065 SDValue Src = Op.getOperand(0);
12066 SDValue Mask = Op.getOperand(1);
12067 SDValue VL = Op.getOperand(2);
12068 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12069
12070 MVT DstVT = Op.getSimpleValueType();
12071 MVT SrcVT = Src.getSimpleValueType();
12072 if (DstVT.isFixedLengthVector()) {
12073 DstVT = getContainerForFixedLengthVector(DstVT);
12074 SrcVT = getContainerForFixedLengthVector(SrcVT);
12075 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12076 MVT MaskVT = getMaskTypeFor(DstVT);
12077 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12078 }
12079
12080 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12081 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12082
12084 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12085 if (SrcVT.isInteger()) {
12086 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12087
12088 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12091
12092 // Do we need to do any pre-widening before converting?
12093 if (SrcEltSize == 1) {
12094 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12095 MVT XLenVT = Subtarget.getXLenVT();
12096 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12097 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12098 DAG.getUNDEF(IntVT), Zero, VL);
12099 SDValue One = DAG.getSignedConstant(
12100 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12101 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12102 DAG.getUNDEF(IntVT), One, VL);
12103 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12104 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12105 } else if (DstEltSize > (2 * SrcEltSize)) {
12106 // Widen before converting.
12107 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12108 DstVT.getVectorElementCount());
12109 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12110 }
12111
12112 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12113 } else {
12114 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12115 "Wrong input/output vector types");
12116
12117 // Convert f16 to f32 then convert f32 to i64.
12118 if (DstEltSize > (2 * SrcEltSize)) {
12119 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12120 MVT InterimFVT =
12121 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12122 Src =
12123 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12124 }
12125
12126 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12127 }
12128 } else { // Narrowing + Conversion
12129 if (SrcVT.isInteger()) {
12130 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12131 // First do a narrowing convert to an FP type half the size, then round
12132 // the FP type to a small FP type if needed.
12133
12134 MVT InterimFVT = DstVT;
12135 if (SrcEltSize > (2 * DstEltSize)) {
12136 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12137 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12138 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12139 }
12140
12141 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12142
12143 if (InterimFVT != DstVT) {
12144 Src = Result;
12145 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12146 }
12147 } else {
12148 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12149 "Wrong input/output vector types");
12150 // First do a narrowing conversion to an integer half the size, then
12151 // truncate if needed.
12152
12153 if (DstEltSize == 1) {
12154 // First convert to the same size integer, then convert to mask using
12155 // setcc.
12156 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12157 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12158 DstVT.getVectorElementCount());
12159 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12160
12161 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12162 // otherwise the conversion was undefined.
12163 MVT XLenVT = Subtarget.getXLenVT();
12164 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12165 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12166 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12167 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12168 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12169 DAG.getUNDEF(DstVT), Mask, VL});
12170 } else {
12171 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12172 DstVT.getVectorElementCount());
12173
12174 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12175
12176 while (InterimIVT != DstVT) {
12177 SrcEltSize /= 2;
12178 Src = Result;
12179 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12180 DstVT.getVectorElementCount());
12181 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12182 Src, Mask, VL);
12183 }
12184 }
12185 }
12186 }
12187
12188 MVT VT = Op.getSimpleValueType();
12189 if (!VT.isFixedLengthVector())
12190 return Result;
12191 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12192}
12193
12194SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12195 SelectionDAG &DAG) const {
12196 SDLoc DL(Op);
12197 MVT VT = Op.getSimpleValueType();
12198 MVT XLenVT = Subtarget.getXLenVT();
12199
12200 SDValue Mask = Op.getOperand(0);
12201 SDValue TrueVal = Op.getOperand(1);
12202 SDValue FalseVal = Op.getOperand(2);
12203 SDValue VL = Op.getOperand(3);
12204
12205 // Use default legalization if a vector of EVL type would be legal.
12206 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12208 if (isTypeLegal(EVLVecVT))
12209 return SDValue();
12210
12211 MVT ContainerVT = VT;
12212 if (VT.isFixedLengthVector()) {
12213 ContainerVT = getContainerForFixedLengthVector(VT);
12214 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12215 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12216 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12217 }
12218
12219 // Promote to a vector of i8.
12220 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12221
12222 // Promote TrueVal and FalseVal using VLMax.
12223 // FIXME: Is there a better way to do this?
12224 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12225 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12226 DAG.getUNDEF(PromotedVT),
12227 DAG.getConstant(1, DL, XLenVT), VLMax);
12228 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12229 DAG.getUNDEF(PromotedVT),
12230 DAG.getConstant(0, DL, XLenVT), VLMax);
12231 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12232 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12233 // Any element past VL uses FalseVal, so use VLMax
12234 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12235 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12236
12237 // VP_MERGE the two promoted values.
12238 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12239 TrueVal, FalseVal, FalseVal, VL);
12240
12241 // Convert back to mask.
12242 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12243 SDValue Result = DAG.getNode(
12244 RISCVISD::SETCC_VL, DL, ContainerVT,
12245 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12246 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12247
12248 if (VT.isFixedLengthVector())
12249 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12250 return Result;
12251}
12252
12253SDValue
12254RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12255 SelectionDAG &DAG) const {
12256 SDLoc DL(Op);
12257
12258 SDValue Op1 = Op.getOperand(0);
12259 SDValue Op2 = Op.getOperand(1);
12260 SDValue Offset = Op.getOperand(2);
12261 SDValue Mask = Op.getOperand(3);
12262 SDValue EVL1 = Op.getOperand(4);
12263 SDValue EVL2 = Op.getOperand(5);
12264
12265 const MVT XLenVT = Subtarget.getXLenVT();
12266 MVT VT = Op.getSimpleValueType();
12267 MVT ContainerVT = VT;
12268 if (VT.isFixedLengthVector()) {
12269 ContainerVT = getContainerForFixedLengthVector(VT);
12270 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12271 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12272 MVT MaskVT = getMaskTypeFor(ContainerVT);
12273 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12274 }
12275
12276 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12277 if (IsMaskVector) {
12278 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12279
12280 // Expand input operands
12281 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12282 DAG.getUNDEF(ContainerVT),
12283 DAG.getConstant(1, DL, XLenVT), EVL1);
12284 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12285 DAG.getUNDEF(ContainerVT),
12286 DAG.getConstant(0, DL, XLenVT), EVL1);
12287 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12288 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12289
12290 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12291 DAG.getUNDEF(ContainerVT),
12292 DAG.getConstant(1, DL, XLenVT), EVL2);
12293 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12294 DAG.getUNDEF(ContainerVT),
12295 DAG.getConstant(0, DL, XLenVT), EVL2);
12296 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12297 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12298 }
12299
12300 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12301 SDValue DownOffset, UpOffset;
12302 if (ImmValue >= 0) {
12303 // The operand is a TargetConstant, we need to rebuild it as a regular
12304 // constant.
12305 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12306 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12307 } else {
12308 // The operand is a TargetConstant, we need to rebuild it as a regular
12309 // constant rather than negating the original operand.
12310 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12311 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12312 }
12313
12314 SDValue SlideDown =
12315 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12316 Op1, DownOffset, Mask, UpOffset);
12317 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12318 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12319
12320 if (IsMaskVector) {
12321 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12322 Result = DAG.getNode(
12323 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12324 {Result, DAG.getConstant(0, DL, ContainerVT),
12325 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12326 Mask, EVL2});
12327 }
12328
12329 if (!VT.isFixedLengthVector())
12330 return Result;
12331 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12332}
12333
12334SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12335 SelectionDAG &DAG) const {
12336 SDLoc DL(Op);
12337 SDValue Val = Op.getOperand(0);
12338 SDValue Mask = Op.getOperand(1);
12339 SDValue VL = Op.getOperand(2);
12340 MVT VT = Op.getSimpleValueType();
12341
12342 MVT ContainerVT = VT;
12343 if (VT.isFixedLengthVector()) {
12344 ContainerVT = getContainerForFixedLengthVector(VT);
12345 MVT MaskVT = getMaskTypeFor(ContainerVT);
12346 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12347 }
12348
12349 SDValue Result =
12350 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12351
12352 if (!VT.isFixedLengthVector())
12353 return Result;
12354 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12355}
12356
12357SDValue
12358RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12359 SelectionDAG &DAG) const {
12360 SDLoc DL(Op);
12361 MVT VT = Op.getSimpleValueType();
12362 MVT XLenVT = Subtarget.getXLenVT();
12363
12364 SDValue Op1 = Op.getOperand(0);
12365 SDValue Mask = Op.getOperand(1);
12366 SDValue EVL = Op.getOperand(2);
12367
12368 MVT ContainerVT = VT;
12369 if (VT.isFixedLengthVector()) {
12370 ContainerVT = getContainerForFixedLengthVector(VT);
12371 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12372 MVT MaskVT = getMaskTypeFor(ContainerVT);
12373 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12374 }
12375
12376 MVT GatherVT = ContainerVT;
12377 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12378 // Check if we are working with mask vectors
12379 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12380 if (IsMaskVector) {
12381 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12382
12383 // Expand input operand
12384 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12385 DAG.getUNDEF(IndicesVT),
12386 DAG.getConstant(1, DL, XLenVT), EVL);
12387 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12388 DAG.getUNDEF(IndicesVT),
12389 DAG.getConstant(0, DL, XLenVT), EVL);
12390 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12391 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12392 }
12393
12394 unsigned EltSize = GatherVT.getScalarSizeInBits();
12395 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12396 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12397 unsigned MaxVLMAX =
12398 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12399
12400 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12401 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12402 // to use vrgatherei16.vv.
12403 // TODO: It's also possible to use vrgatherei16.vv for other types to
12404 // decrease register width for the index calculation.
12405 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12406 if (MaxVLMAX > 256 && EltSize == 8) {
12407 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12408 // Split the vector in half and reverse each half using a full register
12409 // reverse.
12410 // Swap the halves and concatenate them.
12411 // Slide the concatenated result by (VLMax - VL).
12412 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12413 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12414 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12415
12416 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12417 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12418
12419 // Reassemble the low and high pieces reversed.
12420 // NOTE: this Result is unmasked (because we do not need masks for
12421 // shuffles). If in the future this has to change, we can use a SELECT_VL
12422 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12423 SDValue Result =
12424 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12425
12426 // Slide off any elements from past EVL that were reversed into the low
12427 // elements.
12428 unsigned MinElts = GatherVT.getVectorMinNumElements();
12429 SDValue VLMax =
12430 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12431 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12432
12433 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12434 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12435
12436 if (IsMaskVector) {
12437 // Truncate Result back to a mask vector
12438 Result =
12439 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12440 {Result, DAG.getConstant(0, DL, GatherVT),
12442 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12443 }
12444
12445 if (!VT.isFixedLengthVector())
12446 return Result;
12447 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12448 }
12449
12450 // Just promote the int type to i16 which will double the LMUL.
12451 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12452 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12453 }
12454
12455 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12456 SDValue VecLen =
12457 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12458 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12459 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12460 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12461 DAG.getUNDEF(IndicesVT), Mask, EVL);
12462 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12463 DAG.getUNDEF(GatherVT), Mask, EVL);
12464
12465 if (IsMaskVector) {
12466 // Truncate Result back to a mask vector
12467 Result = DAG.getNode(
12468 RISCVISD::SETCC_VL, DL, ContainerVT,
12469 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12470 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12471 }
12472
12473 if (!VT.isFixedLengthVector())
12474 return Result;
12475 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12476}
12477
12478SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12479 SelectionDAG &DAG) const {
12480 MVT VT = Op.getSimpleValueType();
12481 if (VT.getVectorElementType() != MVT::i1)
12482 return lowerVPOp(Op, DAG);
12483
12484 // It is safe to drop mask parameter as masked-off elements are undef.
12485 SDValue Op1 = Op->getOperand(0);
12486 SDValue Op2 = Op->getOperand(1);
12487 SDValue VL = Op->getOperand(3);
12488
12489 MVT ContainerVT = VT;
12490 const bool IsFixed = VT.isFixedLengthVector();
12491 if (IsFixed) {
12492 ContainerVT = getContainerForFixedLengthVector(VT);
12493 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12494 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12495 }
12496
12497 SDLoc DL(Op);
12498 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12499 if (!IsFixed)
12500 return Val;
12501 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12502}
12503
12504SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12505 SelectionDAG &DAG) const {
12506 SDLoc DL(Op);
12507 MVT XLenVT = Subtarget.getXLenVT();
12508 MVT VT = Op.getSimpleValueType();
12509 MVT ContainerVT = VT;
12510 if (VT.isFixedLengthVector())
12511 ContainerVT = getContainerForFixedLengthVector(VT);
12512
12513 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12514
12515 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12516 // Check if the mask is known to be all ones
12517 SDValue Mask = VPNode->getMask();
12518 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12519
12520 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12521 : Intrinsic::riscv_vlse_mask,
12522 DL, XLenVT);
12523 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12524 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12525 VPNode->getStride()};
12526 if (!IsUnmasked) {
12527 if (VT.isFixedLengthVector()) {
12528 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12529 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12530 }
12531 Ops.push_back(Mask);
12532 }
12533 Ops.push_back(VPNode->getVectorLength());
12534 if (!IsUnmasked) {
12535 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12536 Ops.push_back(Policy);
12537 }
12538
12539 SDValue Result =
12541 VPNode->getMemoryVT(), VPNode->getMemOperand());
12542 SDValue Chain = Result.getValue(1);
12543
12544 if (VT.isFixedLengthVector())
12545 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12546
12547 return DAG.getMergeValues({Result, Chain}, DL);
12548}
12549
12550SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12551 SelectionDAG &DAG) const {
12552 SDLoc DL(Op);
12553 MVT XLenVT = Subtarget.getXLenVT();
12554
12555 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12556 SDValue StoreVal = VPNode->getValue();
12557 MVT VT = StoreVal.getSimpleValueType();
12558 MVT ContainerVT = VT;
12559 if (VT.isFixedLengthVector()) {
12560 ContainerVT = getContainerForFixedLengthVector(VT);
12561 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12562 }
12563
12564 // Check if the mask is known to be all ones
12565 SDValue Mask = VPNode->getMask();
12566 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12567
12568 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12569 : Intrinsic::riscv_vsse_mask,
12570 DL, XLenVT);
12571 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12572 VPNode->getBasePtr(), VPNode->getStride()};
12573 if (!IsUnmasked) {
12574 if (VT.isFixedLengthVector()) {
12575 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12576 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12577 }
12578 Ops.push_back(Mask);
12579 }
12580 Ops.push_back(VPNode->getVectorLength());
12581
12582 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12583 Ops, VPNode->getMemoryVT(),
12584 VPNode->getMemOperand());
12585}
12586
12587// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12588// matched to a RVV indexed load. The RVV indexed load instructions only
12589// support the "unsigned unscaled" addressing mode; indices are implicitly
12590// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12591// signed or scaled indexing is extended to the XLEN value type and scaled
12592// accordingly.
12593SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12594 SelectionDAG &DAG) const {
12595 SDLoc DL(Op);
12596 MVT VT = Op.getSimpleValueType();
12597
12598 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12599 EVT MemVT = MemSD->getMemoryVT();
12600 MachineMemOperand *MMO = MemSD->getMemOperand();
12601 SDValue Chain = MemSD->getChain();
12602 SDValue BasePtr = MemSD->getBasePtr();
12603
12604 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12605 SDValue Index, Mask, PassThru, VL;
12606
12607 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12608 Index = VPGN->getIndex();
12609 Mask = VPGN->getMask();
12610 PassThru = DAG.getUNDEF(VT);
12611 VL = VPGN->getVectorLength();
12612 // VP doesn't support extending loads.
12614 } else {
12615 // Else it must be a MGATHER.
12616 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12617 Index = MGN->getIndex();
12618 Mask = MGN->getMask();
12619 PassThru = MGN->getPassThru();
12620 LoadExtType = MGN->getExtensionType();
12621 }
12622
12623 MVT IndexVT = Index.getSimpleValueType();
12624 MVT XLenVT = Subtarget.getXLenVT();
12625
12627 "Unexpected VTs!");
12628 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12629 // Targets have to explicitly opt-in for extending vector loads.
12630 assert(LoadExtType == ISD::NON_EXTLOAD &&
12631 "Unexpected extending MGATHER/VP_GATHER");
12632
12633 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12634 // the selection of the masked intrinsics doesn't do this for us.
12635 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12636
12637 MVT ContainerVT = VT;
12638 if (VT.isFixedLengthVector()) {
12639 ContainerVT = getContainerForFixedLengthVector(VT);
12640 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12641 ContainerVT.getVectorElementCount());
12642
12643 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12644
12645 if (!IsUnmasked) {
12646 MVT MaskVT = getMaskTypeFor(ContainerVT);
12647 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12648 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12649 }
12650 }
12651
12652 if (!VL)
12653 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12654
12655 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12656 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12657 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12658 }
12659
12660 unsigned IntID =
12661 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12662 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12663 if (IsUnmasked)
12664 Ops.push_back(DAG.getUNDEF(ContainerVT));
12665 else
12666 Ops.push_back(PassThru);
12667 Ops.push_back(BasePtr);
12668 Ops.push_back(Index);
12669 if (!IsUnmasked)
12670 Ops.push_back(Mask);
12671 Ops.push_back(VL);
12672 if (!IsUnmasked)
12674
12675 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12676 SDValue Result =
12677 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12678 Chain = Result.getValue(1);
12679
12680 if (VT.isFixedLengthVector())
12681 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12682
12683 return DAG.getMergeValues({Result, Chain}, DL);
12684}
12685
12686// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12687// matched to a RVV indexed store. The RVV indexed store instructions only
12688// support the "unsigned unscaled" addressing mode; indices are implicitly
12689// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12690// signed or scaled indexing is extended to the XLEN value type and scaled
12691// accordingly.
12692SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12693 SelectionDAG &DAG) const {
12694 SDLoc DL(Op);
12695 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12696 EVT MemVT = MemSD->getMemoryVT();
12697 MachineMemOperand *MMO = MemSD->getMemOperand();
12698 SDValue Chain = MemSD->getChain();
12699 SDValue BasePtr = MemSD->getBasePtr();
12700
12701 [[maybe_unused]] bool IsTruncatingStore = false;
12702 SDValue Index, Mask, Val, VL;
12703
12704 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12705 Index = VPSN->getIndex();
12706 Mask = VPSN->getMask();
12707 Val = VPSN->getValue();
12708 VL = VPSN->getVectorLength();
12709 // VP doesn't support truncating stores.
12710 IsTruncatingStore = false;
12711 } else {
12712 // Else it must be a MSCATTER.
12713 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12714 Index = MSN->getIndex();
12715 Mask = MSN->getMask();
12716 Val = MSN->getValue();
12717 IsTruncatingStore = MSN->isTruncatingStore();
12718 }
12719
12720 MVT VT = Val.getSimpleValueType();
12721 MVT IndexVT = Index.getSimpleValueType();
12722 MVT XLenVT = Subtarget.getXLenVT();
12723
12725 "Unexpected VTs!");
12726 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12727 // Targets have to explicitly opt-in for extending vector loads and
12728 // truncating vector stores.
12729 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12730
12731 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12732 // the selection of the masked intrinsics doesn't do this for us.
12733 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12734
12735 MVT ContainerVT = VT;
12736 if (VT.isFixedLengthVector()) {
12737 ContainerVT = getContainerForFixedLengthVector(VT);
12738 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12739 ContainerVT.getVectorElementCount());
12740
12741 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12742 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12743
12744 if (!IsUnmasked) {
12745 MVT MaskVT = getMaskTypeFor(ContainerVT);
12746 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12747 }
12748 }
12749
12750 if (!VL)
12751 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12752
12753 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12754 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12755 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12756 }
12757
12758 unsigned IntID =
12759 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12760 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12761 Ops.push_back(Val);
12762 Ops.push_back(BasePtr);
12763 Ops.push_back(Index);
12764 if (!IsUnmasked)
12765 Ops.push_back(Mask);
12766 Ops.push_back(VL);
12767
12769 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12770}
12771
12772SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12773 SelectionDAG &DAG) const {
12774 const MVT XLenVT = Subtarget.getXLenVT();
12775 SDLoc DL(Op);
12776 SDValue Chain = Op->getOperand(0);
12777 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12778 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12779 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12780
12781 // Encoding used for rounding mode in RISC-V differs from that used in
12782 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12783 // table, which consists of a sequence of 4-bit fields, each representing
12784 // corresponding FLT_ROUNDS mode.
12785 static const int Table =
12791
12792 SDValue Shift =
12793 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12794 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12795 DAG.getConstant(Table, DL, XLenVT), Shift);
12796 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12797 DAG.getConstant(7, DL, XLenVT));
12798
12799 return DAG.getMergeValues({Masked, Chain}, DL);
12800}
12801
12802SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12803 SelectionDAG &DAG) const {
12804 const MVT XLenVT = Subtarget.getXLenVT();
12805 SDLoc DL(Op);
12806 SDValue Chain = Op->getOperand(0);
12807 SDValue RMValue = Op->getOperand(1);
12808 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12809
12810 // Encoding used for rounding mode in RISC-V differs from that used in
12811 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12812 // a table, which consists of a sequence of 4-bit fields, each representing
12813 // corresponding RISC-V mode.
12814 static const unsigned Table =
12820
12821 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12822
12823 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12824 DAG.getConstant(2, DL, XLenVT));
12825 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12826 DAG.getConstant(Table, DL, XLenVT), Shift);
12827 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12828 DAG.getConstant(0x7, DL, XLenVT));
12829 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12830 RMValue);
12831}
12832
12833SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12834 SelectionDAG &DAG) const {
12836
12837 bool isRISCV64 = Subtarget.is64Bit();
12838 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12839
12840 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12841 return DAG.getFrameIndex(FI, PtrVT);
12842}
12843
12844// Returns the opcode of the target-specific SDNode that implements the 32-bit
12845// form of the given Opcode.
12846static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12847 switch (Opcode) {
12848 default:
12849 llvm_unreachable("Unexpected opcode");
12850 case ISD::SHL:
12851 return RISCVISD::SLLW;
12852 case ISD::SRA:
12853 return RISCVISD::SRAW;
12854 case ISD::SRL:
12855 return RISCVISD::SRLW;
12856 case ISD::SDIV:
12857 return RISCVISD::DIVW;
12858 case ISD::UDIV:
12859 return RISCVISD::DIVUW;
12860 case ISD::UREM:
12861 return RISCVISD::REMUW;
12862 case ISD::ROTL:
12863 return RISCVISD::ROLW;
12864 case ISD::ROTR:
12865 return RISCVISD::RORW;
12866 }
12867}
12868
12869// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12870// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12871// otherwise be promoted to i64, making it difficult to select the
12872// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12873// type i8/i16/i32 is lost.
12875 unsigned ExtOpc = ISD::ANY_EXTEND) {
12876 SDLoc DL(N);
12877 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12878 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12879 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12880 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12881 // ReplaceNodeResults requires we maintain the same type for the return value.
12882 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12883}
12884
12885// Converts the given 32-bit operation to a i64 operation with signed extension
12886// semantic to reduce the signed extension instructions.
12888 SDLoc DL(N);
12889 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12890 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12891 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12892 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12893 DAG.getValueType(MVT::i32));
12894 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12895}
12896
12899 SelectionDAG &DAG) const {
12900 SDLoc DL(N);
12901 switch (N->getOpcode()) {
12902 default:
12903 llvm_unreachable("Don't know how to custom type legalize this operation!");
12906 case ISD::FP_TO_SINT:
12907 case ISD::FP_TO_UINT: {
12908 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12909 "Unexpected custom legalisation");
12910 bool IsStrict = N->isStrictFPOpcode();
12911 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12912 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12913 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12914 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12916 if (!isTypeLegal(Op0.getValueType()))
12917 return;
12918 if (IsStrict) {
12919 SDValue Chain = N->getOperand(0);
12920 // In absense of Zfh, promote f16 to f32, then convert.
12921 if (Op0.getValueType() == MVT::f16 &&
12922 !Subtarget.hasStdExtZfhOrZhinx()) {
12923 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12924 {Chain, Op0});
12925 Chain = Op0.getValue(1);
12926 }
12927 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12929 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12930 SDValue Res = DAG.getNode(
12931 Opc, DL, VTs, Chain, Op0,
12932 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12933 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12934 Results.push_back(Res.getValue(1));
12935 return;
12936 }
12937 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12938 // convert.
12939 if ((Op0.getValueType() == MVT::f16 &&
12940 !Subtarget.hasStdExtZfhOrZhinx()) ||
12941 Op0.getValueType() == MVT::bf16)
12942 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12943
12944 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12945 SDValue Res =
12946 DAG.getNode(Opc, DL, MVT::i64, Op0,
12947 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12948 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12949 return;
12950 }
12951 // If the FP type needs to be softened, emit a library call using the 'si'
12952 // version. If we left it to default legalization we'd end up with 'di'. If
12953 // the FP type doesn't need to be softened just let generic type
12954 // legalization promote the result type.
12955 RTLIB::Libcall LC;
12956 if (IsSigned)
12957 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12958 else
12959 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12960 MakeLibCallOptions CallOptions;
12961 EVT OpVT = Op0.getValueType();
12962 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12963 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12964 SDValue Result;
12965 std::tie(Result, Chain) =
12966 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12967 Results.push_back(Result);
12968 if (IsStrict)
12969 Results.push_back(Chain);
12970 break;
12971 }
12972 case ISD::LROUND: {
12973 SDValue Op0 = N->getOperand(0);
12974 EVT Op0VT = Op0.getValueType();
12975 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12977 if (!isTypeLegal(Op0VT))
12978 return;
12979
12980 // In absense of Zfh, promote f16 to f32, then convert.
12981 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12982 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12983
12984 SDValue Res =
12985 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12986 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12987 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12988 return;
12989 }
12990 // If the FP type needs to be softened, emit a library call to lround. We'll
12991 // need to truncate the result. We assume any value that doesn't fit in i32
12992 // is allowed to return an unspecified value.
12993 RTLIB::Libcall LC =
12994 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12995 MakeLibCallOptions CallOptions;
12996 EVT OpVT = Op0.getValueType();
12997 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12998 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12999 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
13000 Results.push_back(Result);
13001 break;
13002 }
13005 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13006 "has custom type legalization on riscv32");
13007
13008 SDValue LoCounter, HiCounter;
13009 MVT XLenVT = Subtarget.getXLenVT();
13010 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13011 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13012 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13013 } else {
13014 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13015 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13016 }
13017 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13019 N->getOperand(0), LoCounter, HiCounter);
13020
13021 Results.push_back(
13022 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13023 Results.push_back(RCW.getValue(2));
13024 break;
13025 }
13026 case ISD::LOAD: {
13027 if (!ISD::isNON_EXTLoad(N))
13028 return;
13029
13030 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13031 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13032 LoadSDNode *Ld = cast<LoadSDNode>(N);
13033
13034 SDLoc dl(N);
13035 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13036 Ld->getBasePtr(), Ld->getMemoryVT(),
13037 Ld->getMemOperand());
13038 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13039 Results.push_back(Res.getValue(1));
13040 return;
13041 }
13042 case ISD::MUL: {
13043 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13044 unsigned XLen = Subtarget.getXLen();
13045 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13046 if (Size > XLen) {
13047 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13048 SDValue LHS = N->getOperand(0);
13049 SDValue RHS = N->getOperand(1);
13050 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13051
13052 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13053 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13054 // We need exactly one side to be unsigned.
13055 if (LHSIsU == RHSIsU)
13056 return;
13057
13058 auto MakeMULPair = [&](SDValue S, SDValue U) {
13059 MVT XLenVT = Subtarget.getXLenVT();
13060 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13061 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13062 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13063 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13064 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13065 };
13066
13067 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13068 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13069
13070 // The other operand should be signed, but still prefer MULH when
13071 // possible.
13072 if (RHSIsU && LHSIsS && !RHSIsS)
13073 Results.push_back(MakeMULPair(LHS, RHS));
13074 else if (LHSIsU && RHSIsS && !LHSIsS)
13075 Results.push_back(MakeMULPair(RHS, LHS));
13076
13077 return;
13078 }
13079 [[fallthrough]];
13080 }
13081 case ISD::ADD:
13082 case ISD::SUB:
13083 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13084 "Unexpected custom legalisation");
13085 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13086 break;
13087 case ISD::SHL:
13088 case ISD::SRA:
13089 case ISD::SRL:
13090 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13091 "Unexpected custom legalisation");
13092 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13093 // If we can use a BSET instruction, allow default promotion to apply.
13094 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13095 isOneConstant(N->getOperand(0)))
13096 break;
13097 Results.push_back(customLegalizeToWOp(N, DAG));
13098 break;
13099 }
13100
13101 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13102 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13103 // shift amount.
13104 if (N->getOpcode() == ISD::SHL) {
13105 SDLoc DL(N);
13106 SDValue NewOp0 =
13107 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13108 SDValue NewOp1 =
13109 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13110 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13111 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13112 DAG.getValueType(MVT::i32));
13113 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13114 }
13115
13116 break;
13117 case ISD::ROTL:
13118 case ISD::ROTR:
13119 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13120 "Unexpected custom legalisation");
13121 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13122 Subtarget.hasVendorXTHeadBb()) &&
13123 "Unexpected custom legalization");
13124 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13125 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13126 return;
13127 Results.push_back(customLegalizeToWOp(N, DAG));
13128 break;
13129 case ISD::CTTZ:
13131 case ISD::CTLZ:
13132 case ISD::CTLZ_ZERO_UNDEF: {
13133 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13134 "Unexpected custom legalisation");
13135
13136 SDValue NewOp0 =
13137 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13138 bool IsCTZ =
13139 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13140 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13141 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13142 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13143 return;
13144 }
13145 case ISD::SDIV:
13146 case ISD::UDIV:
13147 case ISD::UREM: {
13148 MVT VT = N->getSimpleValueType(0);
13149 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13150 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13151 "Unexpected custom legalisation");
13152 // Don't promote division/remainder by constant since we should expand those
13153 // to multiply by magic constant.
13155 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13156 !isIntDivCheap(N->getValueType(0), Attr))
13157 return;
13158
13159 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13160 // the upper 32 bits. For other types we need to sign or zero extend
13161 // based on the opcode.
13162 unsigned ExtOpc = ISD::ANY_EXTEND;
13163 if (VT != MVT::i32)
13164 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13166
13167 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13168 break;
13169 }
13170 case ISD::SADDO: {
13171 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13172 "Unexpected custom legalisation");
13173
13174 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13175 // use the default legalization.
13176 if (!isa<ConstantSDNode>(N->getOperand(1)))
13177 return;
13178
13179 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13180 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13181 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13182 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13183 DAG.getValueType(MVT::i32));
13184
13185 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13186
13187 // For an addition, the result should be less than one of the operands (LHS)
13188 // if and only if the other operand (RHS) is negative, otherwise there will
13189 // be overflow.
13190 // For a subtraction, the result should be less than one of the operands
13191 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13192 // otherwise there will be overflow.
13193 EVT OType = N->getValueType(1);
13194 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13195 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13196
13197 SDValue Overflow =
13198 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13199 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13200 Results.push_back(Overflow);
13201 return;
13202 }
13203 case ISD::UADDO:
13204 case ISD::USUBO: {
13205 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13206 "Unexpected custom legalisation");
13207 bool IsAdd = N->getOpcode() == ISD::UADDO;
13208 // Create an ADDW or SUBW.
13209 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13210 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13211 SDValue Res =
13212 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13213 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13214 DAG.getValueType(MVT::i32));
13215
13216 SDValue Overflow;
13217 if (IsAdd && isOneConstant(RHS)) {
13218 // Special case uaddo X, 1 overflowed if the addition result is 0.
13219 // The general case (X + C) < C is not necessarily beneficial. Although we
13220 // reduce the live range of X, we may introduce the materialization of
13221 // constant C, especially when the setcc result is used by branch. We have
13222 // no compare with constant and branch instructions.
13223 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13224 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13225 } else if (IsAdd && isAllOnesConstant(RHS)) {
13226 // Special case uaddo X, -1 overflowed if X != 0.
13227 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13228 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13229 } else {
13230 // Sign extend the LHS and perform an unsigned compare with the ADDW
13231 // result. Since the inputs are sign extended from i32, this is equivalent
13232 // to comparing the lower 32 bits.
13233 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13234 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13235 IsAdd ? ISD::SETULT : ISD::SETUGT);
13236 }
13237
13238 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13239 Results.push_back(Overflow);
13240 return;
13241 }
13242 case ISD::UADDSAT:
13243 case ISD::USUBSAT: {
13244 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13245 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13246 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13247 // promotion for UADDO/USUBO.
13248 Results.push_back(expandAddSubSat(N, DAG));
13249 return;
13250 }
13251 case ISD::SADDSAT:
13252 case ISD::SSUBSAT: {
13253 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13254 "Unexpected custom legalisation");
13255 Results.push_back(expandAddSubSat(N, DAG));
13256 return;
13257 }
13258 case ISD::ABS: {
13259 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13260 "Unexpected custom legalisation");
13261
13262 if (Subtarget.hasStdExtZbb()) {
13263 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13264 // This allows us to remember that the result is sign extended. Expanding
13265 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13266 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13267 N->getOperand(0));
13268 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13269 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13270 return;
13271 }
13272
13273 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13274 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13275
13276 // Freeze the source so we can increase it's use count.
13277 Src = DAG.getFreeze(Src);
13278
13279 // Copy sign bit to all bits using the sraiw pattern.
13280 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13281 DAG.getValueType(MVT::i32));
13282 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13283 DAG.getConstant(31, DL, MVT::i64));
13284
13285 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13286 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13287
13288 // NOTE: The result is only required to be anyextended, but sext is
13289 // consistent with type legalization of sub.
13290 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13291 DAG.getValueType(MVT::i32));
13292 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13293 return;
13294 }
13295 case ISD::BITCAST: {
13296 EVT VT = N->getValueType(0);
13297 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13298 SDValue Op0 = N->getOperand(0);
13299 EVT Op0VT = Op0.getValueType();
13300 MVT XLenVT = Subtarget.getXLenVT();
13301 if (VT == MVT::i16 &&
13302 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13303 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13304 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13305 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13306 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13307 Subtarget.hasStdExtFOrZfinx()) {
13308 SDValue FPConv =
13309 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13310 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13311 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13312 Subtarget.hasStdExtDOrZdinx()) {
13313 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13314 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13315 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13316 NewReg.getValue(0), NewReg.getValue(1));
13317 Results.push_back(RetReg);
13318 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13319 isTypeLegal(Op0VT)) {
13320 // Custom-legalize bitcasts from fixed-length vector types to illegal
13321 // scalar types in order to improve codegen. Bitcast the vector to a
13322 // one-element vector type whose element type is the same as the result
13323 // type, and extract the first element.
13324 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13325 if (isTypeLegal(BVT)) {
13326 SDValue BVec = DAG.getBitcast(BVT, Op0);
13327 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13328 DAG.getVectorIdxConstant(0, DL)));
13329 }
13330 }
13331 break;
13332 }
13333 case RISCVISD::BREV8:
13334 case RISCVISD::ORC_B: {
13335 MVT VT = N->getSimpleValueType(0);
13336 MVT XLenVT = Subtarget.getXLenVT();
13337 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13338 "Unexpected custom legalisation");
13339 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13340 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13341 "Unexpected extension");
13342 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13343 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13344 // ReplaceNodeResults requires we maintain the same type for the return
13345 // value.
13346 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13347 break;
13348 }
13350 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13351 // type is illegal (currently only vXi64 RV32).
13352 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13353 // transferred to the destination register. We issue two of these from the
13354 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13355 // first element.
13356 SDValue Vec = N->getOperand(0);
13357 SDValue Idx = N->getOperand(1);
13358
13359 // The vector type hasn't been legalized yet so we can't issue target
13360 // specific nodes if it needs legalization.
13361 // FIXME: We would manually legalize if it's important.
13362 if (!isTypeLegal(Vec.getValueType()))
13363 return;
13364
13365 MVT VecVT = Vec.getSimpleValueType();
13366
13367 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13368 VecVT.getVectorElementType() == MVT::i64 &&
13369 "Unexpected EXTRACT_VECTOR_ELT legalization");
13370
13371 // If this is a fixed vector, we need to convert it to a scalable vector.
13372 MVT ContainerVT = VecVT;
13373 if (VecVT.isFixedLengthVector()) {
13374 ContainerVT = getContainerForFixedLengthVector(VecVT);
13375 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13376 }
13377
13378 MVT XLenVT = Subtarget.getXLenVT();
13379
13380 // Use a VL of 1 to avoid processing more elements than we need.
13381 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13382
13383 // Unless the index is known to be 0, we must slide the vector down to get
13384 // the desired element into index 0.
13385 if (!isNullConstant(Idx)) {
13386 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13387 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13388 }
13389
13390 // Extract the lower XLEN bits of the correct vector element.
13391 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13392
13393 // To extract the upper XLEN bits of the vector element, shift the first
13394 // element right by 32 bits and re-extract the lower XLEN bits.
13395 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13396 DAG.getUNDEF(ContainerVT),
13397 DAG.getConstant(32, DL, XLenVT), VL);
13398 SDValue LShr32 =
13399 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13400 DAG.getUNDEF(ContainerVT), Mask, VL);
13401
13402 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13403
13404 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13405 break;
13406 }
13408 unsigned IntNo = N->getConstantOperandVal(0);
13409 switch (IntNo) {
13410 default:
13412 "Don't know how to custom type legalize this intrinsic!");
13413 case Intrinsic::experimental_get_vector_length: {
13414 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13415 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13416 return;
13417 }
13418 case Intrinsic::experimental_cttz_elts: {
13419 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13420 Results.push_back(
13421 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13422 return;
13423 }
13424 case Intrinsic::riscv_orc_b:
13425 case Intrinsic::riscv_brev8:
13426 case Intrinsic::riscv_sha256sig0:
13427 case Intrinsic::riscv_sha256sig1:
13428 case Intrinsic::riscv_sha256sum0:
13429 case Intrinsic::riscv_sha256sum1:
13430 case Intrinsic::riscv_sm3p0:
13431 case Intrinsic::riscv_sm3p1: {
13432 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13433 return;
13434 unsigned Opc;
13435 switch (IntNo) {
13436 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13437 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13438 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13439 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13440 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13441 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13442 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13443 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13444 }
13445
13446 SDValue NewOp =
13447 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13448 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13449 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13450 return;
13451 }
13452 case Intrinsic::riscv_sm4ks:
13453 case Intrinsic::riscv_sm4ed: {
13454 unsigned Opc =
13455 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13456 SDValue NewOp0 =
13457 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13458 SDValue NewOp1 =
13459 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13460 SDValue Res =
13461 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13462 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13463 return;
13464 }
13465 case Intrinsic::riscv_mopr: {
13466 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13467 return;
13468 SDValue NewOp =
13469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13470 SDValue Res = DAG.getNode(
13471 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13472 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13473 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13474 return;
13475 }
13476 case Intrinsic::riscv_moprr: {
13477 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13478 return;
13479 SDValue NewOp0 =
13480 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13481 SDValue NewOp1 =
13482 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13483 SDValue Res = DAG.getNode(
13484 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13485 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13486 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13487 return;
13488 }
13489 case Intrinsic::riscv_clmul: {
13490 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13491 return;
13492
13493 SDValue NewOp0 =
13494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13495 SDValue NewOp1 =
13496 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13497 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13498 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13499 return;
13500 }
13501 case Intrinsic::riscv_clmulh:
13502 case Intrinsic::riscv_clmulr: {
13503 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13504 return;
13505
13506 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13507 // to the full 128-bit clmul result of multiplying two xlen values.
13508 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13509 // upper 32 bits.
13510 //
13511 // The alternative is to mask the inputs to 32 bits and use clmul, but
13512 // that requires two shifts to mask each input without zext.w.
13513 // FIXME: If the inputs are known zero extended or could be freely
13514 // zero extended, the mask form would be better.
13515 SDValue NewOp0 =
13516 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13517 SDValue NewOp1 =
13518 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13519 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13520 DAG.getConstant(32, DL, MVT::i64));
13521 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13522 DAG.getConstant(32, DL, MVT::i64));
13523 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13525 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13526 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13527 DAG.getConstant(32, DL, MVT::i64));
13528 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13529 return;
13530 }
13531 case Intrinsic::riscv_vmv_x_s: {
13532 EVT VT = N->getValueType(0);
13533 MVT XLenVT = Subtarget.getXLenVT();
13534 if (VT.bitsLT(XLenVT)) {
13535 // Simple case just extract using vmv.x.s and truncate.
13536 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13537 Subtarget.getXLenVT(), N->getOperand(1));
13538 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13539 return;
13540 }
13541
13542 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13543 "Unexpected custom legalization");
13544
13545 // We need to do the move in two steps.
13546 SDValue Vec = N->getOperand(1);
13547 MVT VecVT = Vec.getSimpleValueType();
13548
13549 // First extract the lower XLEN bits of the element.
13550 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13551
13552 // To extract the upper XLEN bits of the vector element, shift the first
13553 // element right by 32 bits and re-extract the lower XLEN bits.
13554 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13555
13556 SDValue ThirtyTwoV =
13557 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13558 DAG.getConstant(32, DL, XLenVT), VL);
13559 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13560 DAG.getUNDEF(VecVT), Mask, VL);
13561 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13562
13563 Results.push_back(
13564 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13565 break;
13566 }
13567 }
13568 break;
13569 }
13570 case ISD::VECREDUCE_ADD:
13571 case ISD::VECREDUCE_AND:
13572 case ISD::VECREDUCE_OR:
13573 case ISD::VECREDUCE_XOR:
13578 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13579 Results.push_back(V);
13580 break;
13581 case ISD::VP_REDUCE_ADD:
13582 case ISD::VP_REDUCE_AND:
13583 case ISD::VP_REDUCE_OR:
13584 case ISD::VP_REDUCE_XOR:
13585 case ISD::VP_REDUCE_SMAX:
13586 case ISD::VP_REDUCE_UMAX:
13587 case ISD::VP_REDUCE_SMIN:
13588 case ISD::VP_REDUCE_UMIN:
13589 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13590 Results.push_back(V);
13591 break;
13592 case ISD::GET_ROUNDING: {
13593 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13594 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13595 Results.push_back(Res.getValue(0));
13596 Results.push_back(Res.getValue(1));
13597 break;
13598 }
13599 }
13600}
13601
13602/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13603/// which corresponds to it.
13604static unsigned getVecReduceOpcode(unsigned Opc) {
13605 switch (Opc) {
13606 default:
13607 llvm_unreachable("Unhandled binary to transfrom reduction");
13608 case ISD::ADD:
13609 return ISD::VECREDUCE_ADD;
13610 case ISD::UMAX:
13611 return ISD::VECREDUCE_UMAX;
13612 case ISD::SMAX:
13613 return ISD::VECREDUCE_SMAX;
13614 case ISD::UMIN:
13615 return ISD::VECREDUCE_UMIN;
13616 case ISD::SMIN:
13617 return ISD::VECREDUCE_SMIN;
13618 case ISD::AND:
13619 return ISD::VECREDUCE_AND;
13620 case ISD::OR:
13621 return ISD::VECREDUCE_OR;
13622 case ISD::XOR:
13623 return ISD::VECREDUCE_XOR;
13624 case ISD::FADD:
13625 // Note: This is the associative form of the generic reduction opcode.
13626 return ISD::VECREDUCE_FADD;
13627 }
13628}
13629
13630/// Perform two related transforms whose purpose is to incrementally recognize
13631/// an explode_vector followed by scalar reduction as a vector reduction node.
13632/// This exists to recover from a deficiency in SLP which can't handle
13633/// forests with multiple roots sharing common nodes. In some cases, one
13634/// of the trees will be vectorized, and the other will remain (unprofitably)
13635/// scalarized.
13636static SDValue
13638 const RISCVSubtarget &Subtarget) {
13639
13640 // This transforms need to run before all integer types have been legalized
13641 // to i64 (so that the vector element type matches the add type), and while
13642 // it's safe to introduce odd sized vector types.
13644 return SDValue();
13645
13646 // Without V, this transform isn't useful. We could form the (illegal)
13647 // operations and let them be scalarized again, but there's really no point.
13648 if (!Subtarget.hasVInstructions())
13649 return SDValue();
13650
13651 const SDLoc DL(N);
13652 const EVT VT = N->getValueType(0);
13653 const unsigned Opc = N->getOpcode();
13654
13655 // For FADD, we only handle the case with reassociation allowed. We
13656 // could handle strict reduction order, but at the moment, there's no
13657 // known reason to, and the complexity isn't worth it.
13658 // TODO: Handle fminnum and fmaxnum here
13659 if (!VT.isInteger() &&
13660 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13661 return SDValue();
13662
13663 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13664 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13665 "Inconsistent mappings");
13666 SDValue LHS = N->getOperand(0);
13667 SDValue RHS = N->getOperand(1);
13668
13669 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13670 return SDValue();
13671
13672 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13673 std::swap(LHS, RHS);
13674
13675 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13676 !isa<ConstantSDNode>(RHS.getOperand(1)))
13677 return SDValue();
13678
13679 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13680 SDValue SrcVec = RHS.getOperand(0);
13681 EVT SrcVecVT = SrcVec.getValueType();
13682 assert(SrcVecVT.getVectorElementType() == VT);
13683 if (SrcVecVT.isScalableVector())
13684 return SDValue();
13685
13686 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13687 return SDValue();
13688
13689 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13690 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13691 // root of our reduction tree. TODO: We could extend this to any two
13692 // adjacent aligned constant indices if desired.
13693 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13694 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13695 uint64_t LHSIdx =
13696 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13697 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13698 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13699 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13700 DAG.getVectorIdxConstant(0, DL));
13701 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13702 }
13703 }
13704
13705 // Match (binop (reduce (extract_subvector V, 0),
13706 // (extract_vector_elt V, sizeof(SubVec))))
13707 // into a reduction of one more element from the original vector V.
13708 if (LHS.getOpcode() != ReduceOpc)
13709 return SDValue();
13710
13711 SDValue ReduceVec = LHS.getOperand(0);
13712 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13713 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13714 isNullConstant(ReduceVec.getOperand(1)) &&
13715 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13716 // For illegal types (e.g. 3xi32), most will be combined again into a
13717 // wider (hopefully legal) type. If this is a terminal state, we are
13718 // relying on type legalization here to produce something reasonable
13719 // and this lowering quality could probably be improved. (TODO)
13720 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13721 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13722 DAG.getVectorIdxConstant(0, DL));
13723 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13724 ReduceVec->getFlags() & N->getFlags());
13725 }
13726
13727 return SDValue();
13728}
13729
13730
13731// Try to fold (<bop> x, (reduction.<bop> vec, start))
13733 const RISCVSubtarget &Subtarget) {
13734 auto BinOpToRVVReduce = [](unsigned Opc) {
13735 switch (Opc) {
13736 default:
13737 llvm_unreachable("Unhandled binary to transfrom reduction");
13738 case ISD::ADD:
13740 case ISD::UMAX:
13742 case ISD::SMAX:
13744 case ISD::UMIN:
13746 case ISD::SMIN:
13748 case ISD::AND:
13750 case ISD::OR:
13752 case ISD::XOR:
13754 case ISD::FADD:
13756 case ISD::FMAXNUM:
13758 case ISD::FMINNUM:
13760 }
13761 };
13762
13763 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13764 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13765 isNullConstant(V.getOperand(1)) &&
13766 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13767 };
13768
13769 unsigned Opc = N->getOpcode();
13770 unsigned ReduceIdx;
13771 if (IsReduction(N->getOperand(0), Opc))
13772 ReduceIdx = 0;
13773 else if (IsReduction(N->getOperand(1), Opc))
13774 ReduceIdx = 1;
13775 else
13776 return SDValue();
13777
13778 // Skip if FADD disallows reassociation but the combiner needs.
13779 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13780 return SDValue();
13781
13782 SDValue Extract = N->getOperand(ReduceIdx);
13783 SDValue Reduce = Extract.getOperand(0);
13784 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13785 return SDValue();
13786
13787 SDValue ScalarV = Reduce.getOperand(2);
13788 EVT ScalarVT = ScalarV.getValueType();
13789 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13790 ScalarV.getOperand(0)->isUndef() &&
13791 isNullConstant(ScalarV.getOperand(2)))
13792 ScalarV = ScalarV.getOperand(1);
13793
13794 // Make sure that ScalarV is a splat with VL=1.
13795 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13796 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13797 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13798 return SDValue();
13799
13800 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13801 return SDValue();
13802
13803 // Check the scalar of ScalarV is neutral element
13804 // TODO: Deal with value other than neutral element.
13805 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13806 0))
13807 return SDValue();
13808
13809 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13810 // FIXME: We might be able to improve this if operand 0 is undef.
13811 if (!isNonZeroAVL(Reduce.getOperand(5)))
13812 return SDValue();
13813
13814 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13815
13816 SDLoc DL(N);
13817 SDValue NewScalarV =
13818 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13819 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13820
13821 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13822 if (ScalarVT != ScalarV.getValueType())
13823 NewScalarV =
13824 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13825 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13826
13827 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13828 NewScalarV, Reduce.getOperand(3),
13829 Reduce.getOperand(4), Reduce.getOperand(5)};
13830 SDValue NewReduce =
13831 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13832 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13833 Extract.getOperand(1));
13834}
13835
13836// Optimize (add (shl x, c0), (shl y, c1)) ->
13837// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13839 const RISCVSubtarget &Subtarget) {
13840 // Perform this optimization only in the zba extension.
13841 if (!Subtarget.hasStdExtZba())
13842 return SDValue();
13843
13844 // Skip for vector types and larger types.
13845 EVT VT = N->getValueType(0);
13846 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13847 return SDValue();
13848
13849 // The two operand nodes must be SHL and have no other use.
13850 SDValue N0 = N->getOperand(0);
13851 SDValue N1 = N->getOperand(1);
13852 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13853 !N0->hasOneUse() || !N1->hasOneUse())
13854 return SDValue();
13855
13856 // Check c0 and c1.
13857 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13858 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13859 if (!N0C || !N1C)
13860 return SDValue();
13861 int64_t C0 = N0C->getSExtValue();
13862 int64_t C1 = N1C->getSExtValue();
13863 if (C0 <= 0 || C1 <= 0)
13864 return SDValue();
13865
13866 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13867 int64_t Bits = std::min(C0, C1);
13868 int64_t Diff = std::abs(C0 - C1);
13869 if (Diff != 1 && Diff != 2 && Diff != 3)
13870 return SDValue();
13871
13872 // Build nodes.
13873 SDLoc DL(N);
13874 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13875 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13876 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13877 DAG.getConstant(Diff, DL, VT), NS);
13878 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13879}
13880
13881// Combine a constant select operand into its use:
13882//
13883// (and (select cond, -1, c), x)
13884// -> (select cond, x, (and x, c)) [AllOnes=1]
13885// (or (select cond, 0, c), x)
13886// -> (select cond, x, (or x, c)) [AllOnes=0]
13887// (xor (select cond, 0, c), x)
13888// -> (select cond, x, (xor x, c)) [AllOnes=0]
13889// (add (select cond, 0, c), x)
13890// -> (select cond, x, (add x, c)) [AllOnes=0]
13891// (sub x, (select cond, 0, c))
13892// -> (select cond, x, (sub x, c)) [AllOnes=0]
13894 SelectionDAG &DAG, bool AllOnes,
13895 const RISCVSubtarget &Subtarget) {
13896 EVT VT = N->getValueType(0);
13897
13898 // Skip vectors.
13899 if (VT.isVector())
13900 return SDValue();
13901
13902 if (!Subtarget.hasConditionalMoveFusion()) {
13903 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13904 if ((!Subtarget.hasStdExtZicond() &&
13905 !Subtarget.hasVendorXVentanaCondOps()) ||
13906 N->getOpcode() != ISD::AND)
13907 return SDValue();
13908
13909 // Maybe harmful when condition code has multiple use.
13910 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13911 return SDValue();
13912
13913 // Maybe harmful when VT is wider than XLen.
13914 if (VT.getSizeInBits() > Subtarget.getXLen())
13915 return SDValue();
13916 }
13917
13918 if ((Slct.getOpcode() != ISD::SELECT &&
13919 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13920 !Slct.hasOneUse())
13921 return SDValue();
13922
13923 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13925 };
13926
13927 bool SwapSelectOps;
13928 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13929 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13930 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13931 SDValue NonConstantVal;
13932 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13933 SwapSelectOps = false;
13934 NonConstantVal = FalseVal;
13935 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13936 SwapSelectOps = true;
13937 NonConstantVal = TrueVal;
13938 } else
13939 return SDValue();
13940
13941 // Slct is now know to be the desired identity constant when CC is true.
13942 TrueVal = OtherOp;
13943 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13944 // Unless SwapSelectOps says the condition should be false.
13945 if (SwapSelectOps)
13946 std::swap(TrueVal, FalseVal);
13947
13948 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13949 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13950 {Slct.getOperand(0), Slct.getOperand(1),
13951 Slct.getOperand(2), TrueVal, FalseVal});
13952
13953 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13954 {Slct.getOperand(0), TrueVal, FalseVal});
13955}
13956
13957// Attempt combineSelectAndUse on each operand of a commutative operator N.
13959 bool AllOnes,
13960 const RISCVSubtarget &Subtarget) {
13961 SDValue N0 = N->getOperand(0);
13962 SDValue N1 = N->getOperand(1);
13963 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13964 return Result;
13965 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13966 return Result;
13967 return SDValue();
13968}
13969
13970// Transform (add (mul x, c0), c1) ->
13971// (add (mul (add x, c1/c0), c0), c1%c0).
13972// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13973// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13974// to an infinite loop in DAGCombine if transformed.
13975// Or transform (add (mul x, c0), c1) ->
13976// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13977// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13978// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13979// lead to an infinite loop in DAGCombine if transformed.
13980// Or transform (add (mul x, c0), c1) ->
13981// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13982// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13983// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13984// lead to an infinite loop in DAGCombine if transformed.
13985// Or transform (add (mul x, c0), c1) ->
13986// (mul (add x, c1/c0), c0).
13987// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13989 const RISCVSubtarget &Subtarget) {
13990 // Skip for vector types and larger types.
13991 EVT VT = N->getValueType(0);
13992 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13993 return SDValue();
13994 // The first operand node must be a MUL and has no other use.
13995 SDValue N0 = N->getOperand(0);
13996 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13997 return SDValue();
13998 // Check if c0 and c1 match above conditions.
13999 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14000 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14001 if (!N0C || !N1C)
14002 return SDValue();
14003 // If N0C has multiple uses it's possible one of the cases in
14004 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14005 // in an infinite loop.
14006 if (!N0C->hasOneUse())
14007 return SDValue();
14008 int64_t C0 = N0C->getSExtValue();
14009 int64_t C1 = N1C->getSExtValue();
14010 int64_t CA, CB;
14011 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14012 return SDValue();
14013 // Search for proper CA (non-zero) and CB that both are simm12.
14014 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14015 !isInt<12>(C0 * (C1 / C0))) {
14016 CA = C1 / C0;
14017 CB = C1 % C0;
14018 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14019 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14020 CA = C1 / C0 + 1;
14021 CB = C1 % C0 - C0;
14022 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14023 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14024 CA = C1 / C0 - 1;
14025 CB = C1 % C0 + C0;
14026 } else
14027 return SDValue();
14028 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14029 SDLoc DL(N);
14030 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14031 DAG.getSignedConstant(CA, DL, VT));
14032 SDValue New1 =
14033 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14034 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14035}
14036
14037// add (zext, zext) -> zext (add (zext, zext))
14038// sub (zext, zext) -> sext (sub (zext, zext))
14039// mul (zext, zext) -> zext (mul (zext, zext))
14040// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14041// udiv (zext, zext) -> zext (udiv (zext, zext))
14042// srem (zext, zext) -> zext (srem (zext, zext))
14043// urem (zext, zext) -> zext (urem (zext, zext))
14044//
14045// where the sum of the extend widths match, and the the range of the bin op
14046// fits inside the width of the narrower bin op. (For profitability on rvv, we
14047// use a power of two for both inner and outer extend.)
14049
14050 EVT VT = N->getValueType(0);
14051 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14052 return SDValue();
14053
14054 SDValue N0 = N->getOperand(0);
14055 SDValue N1 = N->getOperand(1);
14057 return SDValue();
14058 if (!N0.hasOneUse() || !N1.hasOneUse())
14059 return SDValue();
14060
14061 SDValue Src0 = N0.getOperand(0);
14062 SDValue Src1 = N1.getOperand(0);
14063 EVT SrcVT = Src0.getValueType();
14064 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14065 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14066 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14067 return SDValue();
14068
14069 LLVMContext &C = *DAG.getContext();
14071 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14072
14073 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14074 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14075
14076 // Src0 and Src1 are zero extended, so they're always positive if signed.
14077 //
14078 // sub can produce a negative from two positive operands, so it needs sign
14079 // extended. Other nodes produce a positive from two positive operands, so
14080 // zero extend instead.
14081 unsigned OuterExtend =
14082 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14083
14084 return DAG.getNode(
14085 OuterExtend, SDLoc(N), VT,
14086 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14087}
14088
14089// Try to turn (add (xor bool, 1) -1) into (neg bool).
14091 SDValue N0 = N->getOperand(0);
14092 SDValue N1 = N->getOperand(1);
14093 EVT VT = N->getValueType(0);
14094 SDLoc DL(N);
14095
14096 // RHS should be -1.
14097 if (!isAllOnesConstant(N1))
14098 return SDValue();
14099
14100 // Look for (xor X, 1).
14101 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14102 return SDValue();
14103
14104 // First xor input should be 0 or 1.
14106 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14107 return SDValue();
14108
14109 // Emit a negate of the setcc.
14110 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14111 N0.getOperand(0));
14112}
14113
14116 const RISCVSubtarget &Subtarget) {
14117 SelectionDAG &DAG = DCI.DAG;
14118 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14119 return V;
14120 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14121 return V;
14122 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14123 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14124 return V;
14125 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14126 return V;
14127 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14128 return V;
14129 if (SDValue V = combineBinOpOfZExt(N, DAG))
14130 return V;
14131
14132 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14133 // (select lhs, rhs, cc, x, (add x, y))
14134 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14135}
14136
14137// Try to turn a sub boolean RHS and constant LHS into an addi.
14139 SDValue N0 = N->getOperand(0);
14140 SDValue N1 = N->getOperand(1);
14141 EVT VT = N->getValueType(0);
14142 SDLoc DL(N);
14143
14144 // Require a constant LHS.
14145 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14146 if (!N0C)
14147 return SDValue();
14148
14149 // All our optimizations involve subtracting 1 from the immediate and forming
14150 // an ADDI. Make sure the new immediate is valid for an ADDI.
14151 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14152 if (!ImmValMinus1.isSignedIntN(12))
14153 return SDValue();
14154
14155 SDValue NewLHS;
14156 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14157 // (sub constant, (setcc x, y, eq/neq)) ->
14158 // (add (setcc x, y, neq/eq), constant - 1)
14159 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14160 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14161 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14162 return SDValue();
14163 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14164 NewLHS =
14165 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14166 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14167 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14168 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14169 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14170 NewLHS = N1.getOperand(0);
14171 } else
14172 return SDValue();
14173
14174 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14175 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14176}
14177
14178// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14179// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14180// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14181// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14183 const RISCVSubtarget &Subtarget) {
14184 if (!Subtarget.hasStdExtZbb())
14185 return SDValue();
14186
14187 EVT VT = N->getValueType(0);
14188
14189 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14190 return SDValue();
14191
14192 SDValue N0 = N->getOperand(0);
14193 SDValue N1 = N->getOperand(1);
14194
14195 if (N0->getOpcode() != ISD::SHL)
14196 return SDValue();
14197
14198 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14199 if (!ShAmtCLeft)
14200 return SDValue();
14201 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14202
14203 if (ShiftedAmount >= 8)
14204 return SDValue();
14205
14206 SDValue LeftShiftOperand = N0->getOperand(0);
14207 SDValue RightShiftOperand = N1;
14208
14209 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14210 if (N1->getOpcode() != ISD::SRL)
14211 return SDValue();
14212 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14213 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14214 return SDValue();
14215 RightShiftOperand = N1.getOperand(0);
14216 }
14217
14218 // At least one shift should have a single use.
14219 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14220 return SDValue();
14221
14222 if (LeftShiftOperand != RightShiftOperand)
14223 return SDValue();
14224
14225 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14226 Mask <<= ShiftedAmount;
14227 // Check that X has indeed the right shape (only the Y-th bit can be set in
14228 // every byte).
14229 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14230 return SDValue();
14231
14232 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14233}
14234
14236 const RISCVSubtarget &Subtarget) {
14237 if (SDValue V = combineSubOfBoolean(N, DAG))
14238 return V;
14239
14240 EVT VT = N->getValueType(0);
14241 SDValue N0 = N->getOperand(0);
14242 SDValue N1 = N->getOperand(1);
14243 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14244 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14245 isNullConstant(N1.getOperand(1))) {
14246 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14247 if (CCVal == ISD::SETLT) {
14248 SDLoc DL(N);
14249 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14250 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14251 DAG.getConstant(ShAmt, DL, VT));
14252 }
14253 }
14254
14255 if (SDValue V = combineBinOpOfZExt(N, DAG))
14256 return V;
14257 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14258 return V;
14259
14260 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14261 // (select lhs, rhs, cc, x, (sub x, y))
14262 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14263}
14264
14265// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14266// Legalizing setcc can introduce xors like this. Doing this transform reduces
14267// the number of xors and may allow the xor to fold into a branch condition.
14269 SDValue N0 = N->getOperand(0);
14270 SDValue N1 = N->getOperand(1);
14271 bool IsAnd = N->getOpcode() == ISD::AND;
14272
14273 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14274 return SDValue();
14275
14276 if (!N0.hasOneUse() || !N1.hasOneUse())
14277 return SDValue();
14278
14279 SDValue N01 = N0.getOperand(1);
14280 SDValue N11 = N1.getOperand(1);
14281
14282 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14283 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14284 // operation is And, allow one of the Xors to use -1.
14285 if (isOneConstant(N01)) {
14286 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14287 return SDValue();
14288 } else if (isOneConstant(N11)) {
14289 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14290 if (!(IsAnd && isAllOnesConstant(N01)))
14291 return SDValue();
14292 } else
14293 return SDValue();
14294
14295 EVT VT = N->getValueType(0);
14296
14297 SDValue N00 = N0.getOperand(0);
14298 SDValue N10 = N1.getOperand(0);
14299
14300 // The LHS of the xors needs to be 0/1.
14302 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14303 return SDValue();
14304
14305 // Invert the opcode and insert a new xor.
14306 SDLoc DL(N);
14307 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14308 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14309 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14310}
14311
14312// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14313// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14314// value to an unsigned value. This will be lowered to vmax and series of
14315// vnclipu instructions later. This can be extended to other truncated types
14316// other than i8 by replacing 256 and 255 with the equivalent constants for the
14317// type.
14319 EVT VT = N->getValueType(0);
14320 SDValue N0 = N->getOperand(0);
14321 EVT SrcVT = N0.getValueType();
14322
14323 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14324 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14325 return SDValue();
14326
14327 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14328 return SDValue();
14329
14330 SDValue Cond = N0.getOperand(0);
14331 SDValue True = N0.getOperand(1);
14332 SDValue False = N0.getOperand(2);
14333
14334 if (Cond.getOpcode() != ISD::SETCC)
14335 return SDValue();
14336
14337 // FIXME: Support the version of this pattern with the select operands
14338 // swapped.
14339 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14340 if (CCVal != ISD::SETULT)
14341 return SDValue();
14342
14343 SDValue CondLHS = Cond.getOperand(0);
14344 SDValue CondRHS = Cond.getOperand(1);
14345
14346 if (CondLHS != True)
14347 return SDValue();
14348
14349 unsigned ScalarBits = VT.getScalarSizeInBits();
14350
14351 // FIXME: Support other constants.
14352 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14353 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14354 return SDValue();
14355
14356 if (False.getOpcode() != ISD::SIGN_EXTEND)
14357 return SDValue();
14358
14359 False = False.getOperand(0);
14360
14361 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14362 return SDValue();
14363
14364 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14365 if (!FalseRHSC || !FalseRHSC->isZero())
14366 return SDValue();
14367
14368 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14369 if (CCVal2 != ISD::SETGT)
14370 return SDValue();
14371
14372 // Emit the signed to unsigned saturation pattern.
14373 SDLoc DL(N);
14374 SDValue Max =
14375 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14376 SDValue Min =
14377 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14378 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14379 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14380}
14381
14383 const RISCVSubtarget &Subtarget) {
14384 SDValue N0 = N->getOperand(0);
14385 EVT VT = N->getValueType(0);
14386
14387 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14388 // extending X. This is safe since we only need the LSB after the shift and
14389 // shift amounts larger than 31 would produce poison. If we wait until
14390 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14391 // to use a BEXT instruction.
14392 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14393 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14394 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14395 SDLoc DL(N0);
14396 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14397 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14398 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14399 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14400 }
14401
14402 return combineTruncSelectToSMaxUSat(N, DAG);
14403}
14404
14405// Combines two comparison operation and logic operation to one selection
14406// operation(min, max) and logic operation. Returns new constructed Node if
14407// conditions for optimization are satisfied.
14410 const RISCVSubtarget &Subtarget) {
14411 SelectionDAG &DAG = DCI.DAG;
14412
14413 SDValue N0 = N->getOperand(0);
14414 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14415 // extending X. This is safe since we only need the LSB after the shift and
14416 // shift amounts larger than 31 would produce poison. If we wait until
14417 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14418 // to use a BEXT instruction.
14419 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14420 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14421 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14422 N0.hasOneUse()) {
14423 SDLoc DL(N);
14424 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14425 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14426 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14427 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14428 DAG.getConstant(1, DL, MVT::i64));
14429 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14430 }
14431
14432 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14433 return V;
14434 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14435 return V;
14436
14437 if (DCI.isAfterLegalizeDAG())
14438 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14439 return V;
14440
14441 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14442 // (select lhs, rhs, cc, x, (and x, y))
14443 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14444}
14445
14446// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14447// FIXME: Generalize to other binary operators with same operand.
14449 SelectionDAG &DAG) {
14450 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14451
14452 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14454 !N0.hasOneUse() || !N1.hasOneUse())
14455 return SDValue();
14456
14457 // Should have the same condition.
14458 SDValue Cond = N0.getOperand(1);
14459 if (Cond != N1.getOperand(1))
14460 return SDValue();
14461
14462 SDValue TrueV = N0.getOperand(0);
14463 SDValue FalseV = N1.getOperand(0);
14464
14465 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14466 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14467 !isOneConstant(TrueV.getOperand(1)) ||
14468 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14469 return SDValue();
14470
14471 EVT VT = N->getValueType(0);
14472 SDLoc DL(N);
14473
14474 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14475 Cond);
14476 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14477 Cond);
14478 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14479 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14480}
14481
14483 const RISCVSubtarget &Subtarget) {
14484 SelectionDAG &DAG = DCI.DAG;
14485
14486 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14487 return V;
14488 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14489 return V;
14490
14491 if (DCI.isAfterLegalizeDAG())
14492 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14493 return V;
14494
14495 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14496 // We may be able to pull a common operation out of the true and false value.
14497 SDValue N0 = N->getOperand(0);
14498 SDValue N1 = N->getOperand(1);
14499 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14500 return V;
14501 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14502 return V;
14503
14504 // fold (or (select cond, 0, y), x) ->
14505 // (select cond, x, (or x, y))
14506 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14507}
14508
14510 const RISCVSubtarget &Subtarget) {
14511 SDValue N0 = N->getOperand(0);
14512 SDValue N1 = N->getOperand(1);
14513
14514 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14515 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14516 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14517 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14518 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14519 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14520 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14521 SDLoc DL(N);
14522 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14523 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14524 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14525 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14526 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14527 }
14528
14529 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14530 // NOTE: Assumes ROL being legal means ROLW is legal.
14531 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14532 if (N0.getOpcode() == RISCVISD::SLLW &&
14534 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14535 SDLoc DL(N);
14536 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14537 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14538 }
14539
14540 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14541 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14542 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14543 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14544 if (ConstN00 && CC == ISD::SETLT) {
14545 EVT VT = N0.getValueType();
14546 SDLoc DL(N0);
14547 const APInt &Imm = ConstN00->getAPIntValue();
14548 if ((Imm + 1).isSignedIntN(12))
14549 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14550 DAG.getConstant(Imm + 1, DL, VT), CC);
14551 }
14552 }
14553
14554 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14555 return V;
14556 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14557 return V;
14558
14559 // fold (xor (select cond, 0, y), x) ->
14560 // (select cond, x, (xor x, y))
14561 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14562}
14563
14564// Try to expand a scalar multiply to a faster sequence.
14567 const RISCVSubtarget &Subtarget) {
14568
14569 EVT VT = N->getValueType(0);
14570
14571 // LI + MUL is usually smaller than the alternative sequence.
14573 return SDValue();
14574
14575 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14576 return SDValue();
14577
14578 if (VT != Subtarget.getXLenVT())
14579 return SDValue();
14580
14581 const bool HasShlAdd =
14582 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14583
14584 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14585 if (!CNode)
14586 return SDValue();
14587 uint64_t MulAmt = CNode->getZExtValue();
14588
14589 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14590 // We're adding additional uses of X here, and in principle, we should be freezing
14591 // X before doing so. However, adding freeze here causes real regressions, and no
14592 // other target properly freezes X in these cases either.
14593 SDValue X = N->getOperand(0);
14594
14595 if (HasShlAdd) {
14596 for (uint64_t Divisor : {3, 5, 9}) {
14597 if (MulAmt % Divisor != 0)
14598 continue;
14599 uint64_t MulAmt2 = MulAmt / Divisor;
14600 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14601 if (isPowerOf2_64(MulAmt2)) {
14602 SDLoc DL(N);
14603 SDValue X = N->getOperand(0);
14604 // Put the shift first if we can fold a zext into the
14605 // shift forming a slli.uw.
14606 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14607 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14608 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14609 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14610 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14611 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14612 Shl);
14613 }
14614 // Otherwise, put rhe shl second so that it can fold with following
14615 // instructions (e.g. sext or add).
14616 SDValue Mul359 =
14617 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14618 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14619 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14620 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14621 }
14622
14623 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14624 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14625 SDLoc DL(N);
14626 SDValue Mul359 =
14627 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14628 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14629 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14630 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14631 Mul359);
14632 }
14633 }
14634
14635 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14636 // shXadd. First check if this a sum of two power of 2s because that's
14637 // easy. Then count how many zeros are up to the first bit.
14638 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14639 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14640 if (ScaleShift >= 1 && ScaleShift < 4) {
14641 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14642 SDLoc DL(N);
14643 SDValue Shift1 =
14644 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14645 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14646 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14647 }
14648 }
14649
14650 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14651 // This is the two instruction form, there are also three instruction
14652 // variants we could implement. e.g.
14653 // (2^(1,2,3) * 3,5,9 + 1) << C2
14654 // 2^(C1>3) * 3,5,9 +/- 1
14655 for (uint64_t Divisor : {3, 5, 9}) {
14656 uint64_t C = MulAmt - 1;
14657 if (C <= Divisor)
14658 continue;
14659 unsigned TZ = llvm::countr_zero(C);
14660 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14661 SDLoc DL(N);
14662 SDValue Mul359 =
14663 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14664 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14665 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14666 DAG.getConstant(TZ, DL, VT), X);
14667 }
14668 }
14669
14670 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14671 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14672 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14673 if (ScaleShift >= 1 && ScaleShift < 4) {
14674 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14675 SDLoc DL(N);
14676 SDValue Shift1 =
14677 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14678 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14679 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14680 DAG.getConstant(ScaleShift, DL, VT), X));
14681 }
14682 }
14683
14684 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14685 for (uint64_t Offset : {3, 5, 9}) {
14686 if (isPowerOf2_64(MulAmt + Offset)) {
14687 SDLoc DL(N);
14688 SDValue Shift1 =
14689 DAG.getNode(ISD::SHL, DL, VT, X,
14690 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14691 SDValue Mul359 =
14692 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14693 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14694 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14695 }
14696 }
14697 }
14698
14699 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14700 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14701 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14702 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14703 SDLoc DL(N);
14704 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14705 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14706 SDValue Shift2 =
14707 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14708 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14709 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14710 }
14711
14712 if (HasShlAdd) {
14713 for (uint64_t Divisor : {3, 5, 9}) {
14714 if (MulAmt % Divisor != 0)
14715 continue;
14716 uint64_t MulAmt2 = MulAmt / Divisor;
14717 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14718 // of 25 which happen to be quite common.
14719 for (uint64_t Divisor2 : {3, 5, 9}) {
14720 if (MulAmt2 % Divisor2 != 0)
14721 continue;
14722 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14723 if (isPowerOf2_64(MulAmt3)) {
14724 SDLoc DL(N);
14725 SDValue Mul359A =
14726 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14727 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14728 SDValue Mul359B = DAG.getNode(
14729 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14730 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14731 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14732 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14733 }
14734 }
14735 }
14736 }
14737
14738 return SDValue();
14739}
14740
14741// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14742// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14743// Same for other equivalent types with other equivalent constants.
14745 EVT VT = N->getValueType(0);
14746 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14747
14748 // Do this for legal vectors unless they are i1 or i8 vectors.
14749 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14750 return SDValue();
14751
14752 if (N->getOperand(0).getOpcode() != ISD::AND ||
14753 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14754 return SDValue();
14755
14756 SDValue And = N->getOperand(0);
14757 SDValue Srl = And.getOperand(0);
14758
14759 APInt V1, V2, V3;
14760 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14761 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14763 return SDValue();
14764
14765 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14766 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14767 V3 != (HalfSize - 1))
14768 return SDValue();
14769
14770 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14771 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14772 VT.getVectorElementCount() * 2);
14773 SDLoc DL(N);
14774 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14775 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14776 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14777 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14778}
14779
14782 const RISCVSubtarget &Subtarget) {
14783 EVT VT = N->getValueType(0);
14784 if (!VT.isVector())
14785 return expandMul(N, DAG, DCI, Subtarget);
14786
14787 SDLoc DL(N);
14788 SDValue N0 = N->getOperand(0);
14789 SDValue N1 = N->getOperand(1);
14790 SDValue MulOper;
14791 unsigned AddSubOpc;
14792
14793 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14794 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14795 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14796 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14797 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14798 AddSubOpc = V->getOpcode();
14799 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14800 SDValue Opnd = V->getOperand(1);
14801 MulOper = V->getOperand(0);
14802 if (AddSubOpc == ISD::SUB)
14803 std::swap(Opnd, MulOper);
14804 if (isOneOrOneSplat(Opnd))
14805 return true;
14806 }
14807 return false;
14808 };
14809
14810 if (IsAddSubWith1(N0)) {
14811 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14812 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14813 }
14814
14815 if (IsAddSubWith1(N1)) {
14816 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14817 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14818 }
14819
14820 if (SDValue V = combineBinOpOfZExt(N, DAG))
14821 return V;
14822
14824 return V;
14825
14826 return SDValue();
14827}
14828
14829/// According to the property that indexed load/store instructions zero-extend
14830/// their indices, try to narrow the type of index operand.
14831static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14832 if (isIndexTypeSigned(IndexType))
14833 return false;
14834
14835 if (!N->hasOneUse())
14836 return false;
14837
14838 EVT VT = N.getValueType();
14839 SDLoc DL(N);
14840
14841 // In general, what we're doing here is seeing if we can sink a truncate to
14842 // a smaller element type into the expression tree building our index.
14843 // TODO: We can generalize this and handle a bunch more cases if useful.
14844
14845 // Narrow a buildvector to the narrowest element type. This requires less
14846 // work and less register pressure at high LMUL, and creates smaller constants
14847 // which may be cheaper to materialize.
14848 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14849 KnownBits Known = DAG.computeKnownBits(N);
14850 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14851 LLVMContext &C = *DAG.getContext();
14852 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14853 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14854 N = DAG.getNode(ISD::TRUNCATE, DL,
14855 VT.changeVectorElementType(ResultVT), N);
14856 return true;
14857 }
14858 }
14859
14860 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14861 if (N.getOpcode() != ISD::SHL)
14862 return false;
14863
14864 SDValue N0 = N.getOperand(0);
14865 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14867 return false;
14868 if (!N0->hasOneUse())
14869 return false;
14870
14871 APInt ShAmt;
14872 SDValue N1 = N.getOperand(1);
14873 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14874 return false;
14875
14876 SDValue Src = N0.getOperand(0);
14877 EVT SrcVT = Src.getValueType();
14878 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14879 unsigned ShAmtV = ShAmt.getZExtValue();
14880 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14881 NewElen = std::max(NewElen, 8U);
14882
14883 // Skip if NewElen is not narrower than the original extended type.
14884 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14885 return false;
14886
14887 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14888 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14889
14890 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14891 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14892 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14893 return true;
14894}
14895
14896// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14897// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14898// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14899// can become a sext.w instead of a shift pair.
14901 const RISCVSubtarget &Subtarget) {
14902 SDValue N0 = N->getOperand(0);
14903 SDValue N1 = N->getOperand(1);
14904 EVT VT = N->getValueType(0);
14905 EVT OpVT = N0.getValueType();
14906
14907 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14908 return SDValue();
14909
14910 // RHS needs to be a constant.
14911 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14912 if (!N1C)
14913 return SDValue();
14914
14915 // LHS needs to be (and X, 0xffffffff).
14916 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14917 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14918 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14919 return SDValue();
14920
14921 // Looking for an equality compare.
14922 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14923 if (!isIntEqualitySetCC(Cond))
14924 return SDValue();
14925
14926 // Don't do this if the sign bit is provably zero, it will be turned back into
14927 // an AND.
14928 APInt SignMask = APInt::getOneBitSet(64, 31);
14929 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14930 return SDValue();
14931
14932 const APInt &C1 = N1C->getAPIntValue();
14933
14934 SDLoc dl(N);
14935 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14936 // to be equal.
14937 if (C1.getActiveBits() > 32)
14938 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14939
14940 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14941 N0.getOperand(0), DAG.getValueType(MVT::i32));
14942 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14943 dl, OpVT), Cond);
14944}
14945
14946static SDValue
14948 const RISCVSubtarget &Subtarget) {
14949 SDValue Src = N->getOperand(0);
14950 EVT VT = N->getValueType(0);
14951 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14952 unsigned Opc = Src.getOpcode();
14953
14954 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14955 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14956 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14957 Subtarget.hasStdExtZfhmin())
14958 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14959 Src.getOperand(0));
14960
14961 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14962 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14963 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14964 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14965 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14966 Src.getOperand(1));
14967
14968 return SDValue();
14969}
14970
14971namespace {
14972// Forward declaration of the structure holding the necessary information to
14973// apply a combine.
14974struct CombineResult;
14975
14976enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14977/// Helper class for folding sign/zero extensions.
14978/// In particular, this class is used for the following combines:
14979/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14980/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14981/// mul | mul_vl -> vwmul(u) | vwmul_su
14982/// shl | shl_vl -> vwsll
14983/// fadd -> vfwadd | vfwadd_w
14984/// fsub -> vfwsub | vfwsub_w
14985/// fmul -> vfwmul
14986/// An object of this class represents an operand of the operation we want to
14987/// combine.
14988/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14989/// NodeExtensionHelper for `a` and one for `b`.
14990///
14991/// This class abstracts away how the extension is materialized and
14992/// how its number of users affect the combines.
14993///
14994/// In particular:
14995/// - VWADD_W is conceptually == add(op0, sext(op1))
14996/// - VWADDU_W == add(op0, zext(op1))
14997/// - VWSUB_W == sub(op0, sext(op1))
14998/// - VWSUBU_W == sub(op0, zext(op1))
14999/// - VFWADD_W == fadd(op0, fpext(op1))
15000/// - VFWSUB_W == fsub(op0, fpext(op1))
15001/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15002/// zext|sext(smaller_value).
15003struct NodeExtensionHelper {
15004 /// Records if this operand is like being zero extended.
15005 bool SupportsZExt;
15006 /// Records if this operand is like being sign extended.
15007 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15008 /// instance, a splat constant (e.g., 3), would support being both sign and
15009 /// zero extended.
15010 bool SupportsSExt;
15011 /// Records if this operand is like being floating-Point extended.
15012 bool SupportsFPExt;
15013 /// This boolean captures whether we care if this operand would still be
15014 /// around after the folding happens.
15015 bool EnforceOneUse;
15016 /// Original value that this NodeExtensionHelper represents.
15017 SDValue OrigOperand;
15018
15019 /// Get the value feeding the extension or the value itself.
15020 /// E.g., for zext(a), this would return a.
15021 SDValue getSource() const {
15022 switch (OrigOperand.getOpcode()) {
15023 case ISD::ZERO_EXTEND:
15024 case ISD::SIGN_EXTEND:
15025 case RISCVISD::VSEXT_VL:
15026 case RISCVISD::VZEXT_VL:
15028 return OrigOperand.getOperand(0);
15029 default:
15030 return OrigOperand;
15031 }
15032 }
15033
15034 /// Check if this instance represents a splat.
15035 bool isSplat() const {
15036 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15037 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15038 }
15039
15040 /// Get the extended opcode.
15041 unsigned getExtOpc(ExtKind SupportsExt) const {
15042 switch (SupportsExt) {
15043 case ExtKind::SExt:
15044 return RISCVISD::VSEXT_VL;
15045 case ExtKind::ZExt:
15046 return RISCVISD::VZEXT_VL;
15047 case ExtKind::FPExt:
15049 }
15050 llvm_unreachable("Unknown ExtKind enum");
15051 }
15052
15053 /// Get or create a value that can feed \p Root with the given extension \p
15054 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15055 /// operand. \see ::getSource().
15056 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15057 const RISCVSubtarget &Subtarget,
15058 std::optional<ExtKind> SupportsExt) const {
15059 if (!SupportsExt.has_value())
15060 return OrigOperand;
15061
15062 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15063
15064 SDValue Source = getSource();
15065 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15066 if (Source.getValueType() == NarrowVT)
15067 return Source;
15068
15069 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15070 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15071 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15072 Root->getOpcode() == RISCVISD::VFMADD_VL);
15073 return Source;
15074 }
15075
15076 unsigned ExtOpc = getExtOpc(*SupportsExt);
15077
15078 // If we need an extension, we should be changing the type.
15079 SDLoc DL(OrigOperand);
15080 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15081 switch (OrigOperand.getOpcode()) {
15082 case ISD::ZERO_EXTEND:
15083 case ISD::SIGN_EXTEND:
15084 case RISCVISD::VSEXT_VL:
15085 case RISCVISD::VZEXT_VL:
15087 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15088 case ISD::SPLAT_VECTOR:
15089 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15091 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15092 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15094 Source = Source.getOperand(1);
15095 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15096 Source = Source.getOperand(0);
15097 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15098 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15099 DAG.getUNDEF(NarrowVT), Source, VL);
15100 default:
15101 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15102 // and that operand should already have the right NarrowVT so no
15103 // extension should be required at this point.
15104 llvm_unreachable("Unsupported opcode");
15105 }
15106 }
15107
15108 /// Helper function to get the narrow type for \p Root.
15109 /// The narrow type is the type of \p Root where we divided the size of each
15110 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15111 /// \pre Both the narrow type and the original type should be legal.
15112 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15113 MVT VT = Root->getSimpleValueType(0);
15114
15115 // Determine the narrow size.
15116 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15117
15118 MVT EltVT = SupportsExt == ExtKind::FPExt
15119 ? MVT::getFloatingPointVT(NarrowSize)
15120 : MVT::getIntegerVT(NarrowSize);
15121
15122 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15123 "Trying to extend something we can't represent");
15124 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15125 return NarrowVT;
15126 }
15127
15128 /// Get the opcode to materialize:
15129 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15130 static unsigned getSExtOpcode(unsigned Opcode) {
15131 switch (Opcode) {
15132 case ISD::ADD:
15133 case RISCVISD::ADD_VL:
15136 case ISD::OR:
15137 return RISCVISD::VWADD_VL;
15138 case ISD::SUB:
15139 case RISCVISD::SUB_VL:
15142 return RISCVISD::VWSUB_VL;
15143 case ISD::MUL:
15144 case RISCVISD::MUL_VL:
15145 return RISCVISD::VWMUL_VL;
15146 default:
15147 llvm_unreachable("Unexpected opcode");
15148 }
15149 }
15150
15151 /// Get the opcode to materialize:
15152 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15153 static unsigned getZExtOpcode(unsigned Opcode) {
15154 switch (Opcode) {
15155 case ISD::ADD:
15156 case RISCVISD::ADD_VL:
15159 case ISD::OR:
15160 return RISCVISD::VWADDU_VL;
15161 case ISD::SUB:
15162 case RISCVISD::SUB_VL:
15165 return RISCVISD::VWSUBU_VL;
15166 case ISD::MUL:
15167 case RISCVISD::MUL_VL:
15168 return RISCVISD::VWMULU_VL;
15169 case ISD::SHL:
15170 case RISCVISD::SHL_VL:
15171 return RISCVISD::VWSLL_VL;
15172 default:
15173 llvm_unreachable("Unexpected opcode");
15174 }
15175 }
15176
15177 /// Get the opcode to materialize:
15178 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15179 static unsigned getFPExtOpcode(unsigned Opcode) {
15180 switch (Opcode) {
15181 case RISCVISD::FADD_VL:
15183 return RISCVISD::VFWADD_VL;
15184 case RISCVISD::FSUB_VL:
15186 return RISCVISD::VFWSUB_VL;
15187 case RISCVISD::FMUL_VL:
15188 return RISCVISD::VFWMUL_VL;
15190 return RISCVISD::VFWMADD_VL;
15192 return RISCVISD::VFWMSUB_VL;
15194 return RISCVISD::VFWNMADD_VL;
15196 return RISCVISD::VFWNMSUB_VL;
15197 default:
15198 llvm_unreachable("Unexpected opcode");
15199 }
15200 }
15201
15202 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15203 /// newOpcode(a, b).
15204 static unsigned getSUOpcode(unsigned Opcode) {
15205 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15206 "SU is only supported for MUL");
15207 return RISCVISD::VWMULSU_VL;
15208 }
15209
15210 /// Get the opcode to materialize
15211 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15212 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15213 switch (Opcode) {
15214 case ISD::ADD:
15215 case RISCVISD::ADD_VL:
15216 case ISD::OR:
15217 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15219 case ISD::SUB:
15220 case RISCVISD::SUB_VL:
15221 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15223 case RISCVISD::FADD_VL:
15224 return RISCVISD::VFWADD_W_VL;
15225 case RISCVISD::FSUB_VL:
15226 return RISCVISD::VFWSUB_W_VL;
15227 default:
15228 llvm_unreachable("Unexpected opcode");
15229 }
15230 }
15231
15232 using CombineToTry = std::function<std::optional<CombineResult>(
15233 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15234 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15235 const RISCVSubtarget &)>;
15236
15237 /// Check if this node needs to be fully folded or extended for all users.
15238 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15239
15240 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15241 const RISCVSubtarget &Subtarget) {
15242 unsigned Opc = OrigOperand.getOpcode();
15243 MVT VT = OrigOperand.getSimpleValueType();
15244
15245 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15246 "Unexpected Opcode");
15247
15248 // The pasthru must be undef for tail agnostic.
15249 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15250 return;
15251
15252 // Get the scalar value.
15253 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15254 : OrigOperand.getOperand(1);
15255
15256 // See if we have enough sign bits or zero bits in the scalar to use a
15257 // widening opcode by splatting to smaller element size.
15258 unsigned EltBits = VT.getScalarSizeInBits();
15259 unsigned ScalarBits = Op.getValueSizeInBits();
15260 // If we're not getting all bits from the element, we need special handling.
15261 if (ScalarBits < EltBits) {
15262 // This should only occur on RV32.
15263 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15264 !Subtarget.is64Bit() && "Unexpected splat");
15265 // vmv.v.x sign extends narrow inputs.
15266 SupportsSExt = true;
15267
15268 // If the input is positive, then sign extend is also zero extend.
15269 if (DAG.SignBitIsZero(Op))
15270 SupportsZExt = true;
15271
15272 EnforceOneUse = false;
15273 return;
15274 }
15275
15276 unsigned NarrowSize = EltBits / 2;
15277 // If the narrow type cannot be expressed with a legal VMV,
15278 // this is not a valid candidate.
15279 if (NarrowSize < 8)
15280 return;
15281
15282 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15283 SupportsSExt = true;
15284
15285 if (DAG.MaskedValueIsZero(Op,
15286 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15287 SupportsZExt = true;
15288
15289 EnforceOneUse = false;
15290 }
15291
15292 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15293 const RISCVSubtarget &Subtarget) {
15294 // Any f16 extension will neeed zvfh
15295 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15296 return false;
15297 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15298 // zvfbfwma
15299 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15300 Root->getOpcode() != RISCVISD::VFMADD_VL))
15301 return false;
15302 return true;
15303 }
15304
15305 /// Helper method to set the various fields of this struct based on the
15306 /// type of \p Root.
15307 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15308 const RISCVSubtarget &Subtarget) {
15309 SupportsZExt = false;
15310 SupportsSExt = false;
15311 SupportsFPExt = false;
15312 EnforceOneUse = true;
15313 unsigned Opc = OrigOperand.getOpcode();
15314 // For the nodes we handle below, we end up using their inputs directly: see
15315 // getSource(). However since they either don't have a passthru or we check
15316 // that their passthru is undef, we can safely ignore their mask and VL.
15317 switch (Opc) {
15318 case ISD::ZERO_EXTEND:
15319 case ISD::SIGN_EXTEND: {
15320 MVT VT = OrigOperand.getSimpleValueType();
15321 if (!VT.isVector())
15322 break;
15323
15324 SDValue NarrowElt = OrigOperand.getOperand(0);
15325 MVT NarrowVT = NarrowElt.getSimpleValueType();
15326 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15327 if (NarrowVT.getVectorElementType() == MVT::i1)
15328 break;
15329
15330 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15331 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15332 break;
15333 }
15334 case RISCVISD::VZEXT_VL:
15335 SupportsZExt = true;
15336 break;
15337 case RISCVISD::VSEXT_VL:
15338 SupportsSExt = true;
15339 break;
15341 MVT NarrowEltVT =
15343 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15344 break;
15345 SupportsFPExt = true;
15346 break;
15347 }
15348 case ISD::SPLAT_VECTOR:
15350 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15351 break;
15352 case RISCVISD::VFMV_V_F_VL: {
15353 MVT VT = OrigOperand.getSimpleValueType();
15354
15355 if (!OrigOperand.getOperand(0).isUndef())
15356 break;
15357
15358 SDValue Op = OrigOperand.getOperand(1);
15359 if (Op.getOpcode() != ISD::FP_EXTEND)
15360 break;
15361
15362 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15363 Subtarget))
15364 break;
15365
15366 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15367 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15368 if (NarrowSize != ScalarBits)
15369 break;
15370
15371 SupportsFPExt = true;
15372 break;
15373 }
15374 default:
15375 break;
15376 }
15377 }
15378
15379 /// Check if \p Root supports any extension folding combines.
15380 static bool isSupportedRoot(const SDNode *Root,
15381 const RISCVSubtarget &Subtarget) {
15382 switch (Root->getOpcode()) {
15383 case ISD::ADD:
15384 case ISD::SUB:
15385 case ISD::MUL: {
15386 return Root->getValueType(0).isScalableVector();
15387 }
15388 case ISD::OR: {
15389 return Root->getValueType(0).isScalableVector() &&
15390 Root->getFlags().hasDisjoint();
15391 }
15392 // Vector Widening Integer Add/Sub/Mul Instructions
15393 case RISCVISD::ADD_VL:
15394 case RISCVISD::MUL_VL:
15397 case RISCVISD::SUB_VL:
15400 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15401 case RISCVISD::FADD_VL:
15402 case RISCVISD::FSUB_VL:
15403 case RISCVISD::FMUL_VL:
15406 return true;
15407 case ISD::SHL:
15408 return Root->getValueType(0).isScalableVector() &&
15409 Subtarget.hasStdExtZvbb();
15410 case RISCVISD::SHL_VL:
15411 return Subtarget.hasStdExtZvbb();
15416 return true;
15417 default:
15418 return false;
15419 }
15420 }
15421
15422 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15423 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15424 const RISCVSubtarget &Subtarget) {
15425 assert(isSupportedRoot(Root, Subtarget) &&
15426 "Trying to build an helper with an "
15427 "unsupported root");
15428 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15430 OrigOperand = Root->getOperand(OperandIdx);
15431
15432 unsigned Opc = Root->getOpcode();
15433 switch (Opc) {
15434 // We consider
15435 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15436 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15437 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15444 if (OperandIdx == 1) {
15445 SupportsZExt =
15447 SupportsSExt =
15449 SupportsFPExt =
15451 // There's no existing extension here, so we don't have to worry about
15452 // making sure it gets removed.
15453 EnforceOneUse = false;
15454 break;
15455 }
15456 [[fallthrough]];
15457 default:
15458 fillUpExtensionSupport(Root, DAG, Subtarget);
15459 break;
15460 }
15461 }
15462
15463 /// Helper function to get the Mask and VL from \p Root.
15464 static std::pair<SDValue, SDValue>
15465 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15466 const RISCVSubtarget &Subtarget) {
15467 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15468 switch (Root->getOpcode()) {
15469 case ISD::ADD:
15470 case ISD::SUB:
15471 case ISD::MUL:
15472 case ISD::OR:
15473 case ISD::SHL: {
15474 SDLoc DL(Root);
15475 MVT VT = Root->getSimpleValueType(0);
15476 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15477 }
15478 default:
15479 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15480 }
15481 }
15482
15483 /// Helper function to check if \p N is commutative with respect to the
15484 /// foldings that are supported by this class.
15485 static bool isCommutative(const SDNode *N) {
15486 switch (N->getOpcode()) {
15487 case ISD::ADD:
15488 case ISD::MUL:
15489 case ISD::OR:
15490 case RISCVISD::ADD_VL:
15491 case RISCVISD::MUL_VL:
15494 case RISCVISD::FADD_VL:
15495 case RISCVISD::FMUL_VL:
15501 return true;
15502 case ISD::SUB:
15503 case RISCVISD::SUB_VL:
15506 case RISCVISD::FSUB_VL:
15508 case ISD::SHL:
15509 case RISCVISD::SHL_VL:
15510 return false;
15511 default:
15512 llvm_unreachable("Unexpected opcode");
15513 }
15514 }
15515
15516 /// Get a list of combine to try for folding extensions in \p Root.
15517 /// Note that each returned CombineToTry function doesn't actually modify
15518 /// anything. Instead they produce an optional CombineResult that if not None,
15519 /// need to be materialized for the combine to be applied.
15520 /// \see CombineResult::materialize.
15521 /// If the related CombineToTry function returns std::nullopt, that means the
15522 /// combine didn't match.
15523 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15524};
15525
15526/// Helper structure that holds all the necessary information to materialize a
15527/// combine that does some extension folding.
15528struct CombineResult {
15529 /// Opcode to be generated when materializing the combine.
15530 unsigned TargetOpcode;
15531 // No value means no extension is needed.
15532 std::optional<ExtKind> LHSExt;
15533 std::optional<ExtKind> RHSExt;
15534 /// Root of the combine.
15535 SDNode *Root;
15536 /// LHS of the TargetOpcode.
15537 NodeExtensionHelper LHS;
15538 /// RHS of the TargetOpcode.
15539 NodeExtensionHelper RHS;
15540
15541 CombineResult(unsigned TargetOpcode, SDNode *Root,
15542 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15543 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15544 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15545 LHS(LHS), RHS(RHS) {}
15546
15547 /// Return a value that uses TargetOpcode and that can be used to replace
15548 /// Root.
15549 /// The actual replacement is *not* done in that method.
15550 SDValue materialize(SelectionDAG &DAG,
15551 const RISCVSubtarget &Subtarget) const {
15552 SDValue Mask, VL, Passthru;
15553 std::tie(Mask, VL) =
15554 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15555 switch (Root->getOpcode()) {
15556 default:
15557 Passthru = Root->getOperand(2);
15558 break;
15559 case ISD::ADD:
15560 case ISD::SUB:
15561 case ISD::MUL:
15562 case ISD::OR:
15563 case ISD::SHL:
15564 Passthru = DAG.getUNDEF(Root->getValueType(0));
15565 break;
15566 }
15567 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15568 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15569 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15570 Passthru, Mask, VL);
15571 }
15572};
15573
15574/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15575/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15576/// are zext) and LHS and RHS can be folded into Root.
15577/// AllowExtMask define which form `ext` can take in this pattern.
15578///
15579/// \note If the pattern can match with both zext and sext, the returned
15580/// CombineResult will feature the zext result.
15581///
15582/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15583/// can be used to apply the pattern.
15584static std::optional<CombineResult>
15585canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15586 const NodeExtensionHelper &RHS,
15587 uint8_t AllowExtMask, SelectionDAG &DAG,
15588 const RISCVSubtarget &Subtarget) {
15589 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15590 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15591 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15592 /*RHSExt=*/{ExtKind::ZExt});
15593 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15594 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15595 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15596 /*RHSExt=*/{ExtKind::SExt});
15597 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15598 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15599 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15600 /*RHSExt=*/{ExtKind::FPExt});
15601 return std::nullopt;
15602}
15603
15604/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15605/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15606/// are zext) and LHS and RHS can be folded into Root.
15607///
15608/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15609/// can be used to apply the pattern.
15610static std::optional<CombineResult>
15611canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15612 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15613 const RISCVSubtarget &Subtarget) {
15614 return canFoldToVWWithSameExtensionImpl(
15615 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15616 Subtarget);
15617}
15618
15619/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15620///
15621/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15622/// can be used to apply the pattern.
15623static std::optional<CombineResult>
15624canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15625 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15626 const RISCVSubtarget &Subtarget) {
15627 if (RHS.SupportsFPExt)
15628 return CombineResult(
15629 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15630 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15631
15632 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15633 // sext/zext?
15634 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15635 // purposes.
15636 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15637 return CombineResult(
15638 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15639 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15640 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15641 return CombineResult(
15642 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15643 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15644 return std::nullopt;
15645}
15646
15647/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15648///
15649/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15650/// can be used to apply the pattern.
15651static std::optional<CombineResult>
15652canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15653 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15654 const RISCVSubtarget &Subtarget) {
15655 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15656 Subtarget);
15657}
15658
15659/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15660///
15661/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15662/// can be used to apply the pattern.
15663static std::optional<CombineResult>
15664canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15665 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15666 const RISCVSubtarget &Subtarget) {
15667 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15668 Subtarget);
15669}
15670
15671/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15672///
15673/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15674/// can be used to apply the pattern.
15675static std::optional<CombineResult>
15676canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15677 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15678 const RISCVSubtarget &Subtarget) {
15679 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15680 Subtarget);
15681}
15682
15683/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15684///
15685/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15686/// can be used to apply the pattern.
15687static std::optional<CombineResult>
15688canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15689 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15690 const RISCVSubtarget &Subtarget) {
15691
15692 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15693 return std::nullopt;
15694 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15695 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15696 /*RHSExt=*/{ExtKind::ZExt});
15697}
15698
15700NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15701 SmallVector<CombineToTry> Strategies;
15702 switch (Root->getOpcode()) {
15703 case ISD::ADD:
15704 case ISD::SUB:
15705 case ISD::OR:
15706 case RISCVISD::ADD_VL:
15707 case RISCVISD::SUB_VL:
15708 case RISCVISD::FADD_VL:
15709 case RISCVISD::FSUB_VL:
15710 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15711 Strategies.push_back(canFoldToVWWithSameExtension);
15712 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15713 Strategies.push_back(canFoldToVW_W);
15714 break;
15715 case RISCVISD::FMUL_VL:
15720 Strategies.push_back(canFoldToVWWithSameExtension);
15721 break;
15722 case ISD::MUL:
15723 case RISCVISD::MUL_VL:
15724 // mul -> vwmul(u)
15725 Strategies.push_back(canFoldToVWWithSameExtension);
15726 // mul -> vwmulsu
15727 Strategies.push_back(canFoldToVW_SU);
15728 break;
15729 case ISD::SHL:
15730 case RISCVISD::SHL_VL:
15731 // shl -> vwsll
15732 Strategies.push_back(canFoldToVWWithZEXT);
15733 break;
15736 // vwadd_w|vwsub_w -> vwadd|vwsub
15737 Strategies.push_back(canFoldToVWWithSEXT);
15738 break;
15741 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15742 Strategies.push_back(canFoldToVWWithZEXT);
15743 break;
15746 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15747 Strategies.push_back(canFoldToVWWithFPEXT);
15748 break;
15749 default:
15750 llvm_unreachable("Unexpected opcode");
15751 }
15752 return Strategies;
15753}
15754} // End anonymous namespace.
15755
15756/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15757/// The supported combines are:
15758/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15759/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15760/// mul | mul_vl -> vwmul(u) | vwmul_su
15761/// shl | shl_vl -> vwsll
15762/// fadd_vl -> vfwadd | vfwadd_w
15763/// fsub_vl -> vfwsub | vfwsub_w
15764/// fmul_vl -> vfwmul
15765/// vwadd_w(u) -> vwadd(u)
15766/// vwsub_w(u) -> vwsub(u)
15767/// vfwadd_w -> vfwadd
15768/// vfwsub_w -> vfwsub
15771 const RISCVSubtarget &Subtarget) {
15772 SelectionDAG &DAG = DCI.DAG;
15773 if (DCI.isBeforeLegalize())
15774 return SDValue();
15775
15776 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15777 return SDValue();
15778
15779 SmallVector<SDNode *> Worklist;
15780 SmallSet<SDNode *, 8> Inserted;
15781 Worklist.push_back(N);
15782 Inserted.insert(N);
15783 SmallVector<CombineResult> CombinesToApply;
15784
15785 while (!Worklist.empty()) {
15786 SDNode *Root = Worklist.pop_back_val();
15787
15788 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15789 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15790 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15791 &Inserted](const NodeExtensionHelper &Op) {
15792 if (Op.needToPromoteOtherUsers()) {
15793 for (SDUse &Use : Op.OrigOperand->uses()) {
15794 SDNode *TheUser = Use.getUser();
15795 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15796 return false;
15797 // We only support the first 2 operands of FMA.
15798 if (Use.getOperandNo() >= 2)
15799 return false;
15800 if (Inserted.insert(TheUser).second)
15801 Worklist.push_back(TheUser);
15802 }
15803 }
15804 return true;
15805 };
15806
15807 // Control the compile time by limiting the number of node we look at in
15808 // total.
15809 if (Inserted.size() > ExtensionMaxWebSize)
15810 return SDValue();
15811
15813 NodeExtensionHelper::getSupportedFoldings(Root);
15814
15815 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15816 bool Matched = false;
15817 for (int Attempt = 0;
15818 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15819 ++Attempt) {
15820
15821 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15822 FoldingStrategies) {
15823 std::optional<CombineResult> Res =
15824 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15825 if (Res) {
15826 Matched = true;
15827 CombinesToApply.push_back(*Res);
15828 // All the inputs that are extended need to be folded, otherwise
15829 // we would be leaving the old input (since it is may still be used),
15830 // and the new one.
15831 if (Res->LHSExt.has_value())
15832 if (!AppendUsersIfNeeded(LHS))
15833 return SDValue();
15834 if (Res->RHSExt.has_value())
15835 if (!AppendUsersIfNeeded(RHS))
15836 return SDValue();
15837 break;
15838 }
15839 }
15840 std::swap(LHS, RHS);
15841 }
15842 // Right now we do an all or nothing approach.
15843 if (!Matched)
15844 return SDValue();
15845 }
15846 // Store the value for the replacement of the input node separately.
15847 SDValue InputRootReplacement;
15848 // We do the RAUW after we materialize all the combines, because some replaced
15849 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15850 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15851 // yet-to-be-visited CombinesToApply roots.
15853 ValuesToReplace.reserve(CombinesToApply.size());
15854 for (CombineResult Res : CombinesToApply) {
15855 SDValue NewValue = Res.materialize(DAG, Subtarget);
15856 if (!InputRootReplacement) {
15857 assert(Res.Root == N &&
15858 "First element is expected to be the current node");
15859 InputRootReplacement = NewValue;
15860 } else {
15861 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15862 }
15863 }
15864 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15865 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15866 DCI.AddToWorklist(OldNewValues.second.getNode());
15867 }
15868 return InputRootReplacement;
15869}
15870
15871// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15872// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15873// y will be the Passthru and cond will be the Mask.
15875 unsigned Opc = N->getOpcode();
15878
15879 SDValue Y = N->getOperand(0);
15880 SDValue MergeOp = N->getOperand(1);
15881 unsigned MergeOpc = MergeOp.getOpcode();
15882
15883 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15884 return SDValue();
15885
15886 SDValue X = MergeOp->getOperand(1);
15887
15888 if (!MergeOp.hasOneUse())
15889 return SDValue();
15890
15891 // Passthru should be undef
15892 SDValue Passthru = N->getOperand(2);
15893 if (!Passthru.isUndef())
15894 return SDValue();
15895
15896 // Mask should be all ones
15897 SDValue Mask = N->getOperand(3);
15898 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15899 return SDValue();
15900
15901 // False value of MergeOp should be all zeros
15902 SDValue Z = MergeOp->getOperand(2);
15903
15904 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15905 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15906 Z = Z.getOperand(1);
15907
15908 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15909 return SDValue();
15910
15911 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15912 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15913 N->getFlags());
15914}
15915
15918 const RISCVSubtarget &Subtarget) {
15919 [[maybe_unused]] unsigned Opc = N->getOpcode();
15922
15923 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15924 return V;
15925
15926 return combineVWADDSUBWSelect(N, DCI.DAG);
15927}
15928
15929// Helper function for performMemPairCombine.
15930// Try to combine the memory loads/stores LSNode1 and LSNode2
15931// into a single memory pair operation.
15933 LSBaseSDNode *LSNode2, SDValue BasePtr,
15934 uint64_t Imm) {
15936 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15937
15938 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15939 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15940 return SDValue();
15941
15943 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15944
15945 // The new operation has twice the width.
15946 MVT XLenVT = Subtarget.getXLenVT();
15947 EVT MemVT = LSNode1->getMemoryVT();
15948 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15949 MachineMemOperand *MMO = LSNode1->getMemOperand();
15951 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15952
15953 if (LSNode1->getOpcode() == ISD::LOAD) {
15954 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15955 unsigned Opcode;
15956 if (MemVT == MVT::i32)
15957 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15958 else
15959 Opcode = RISCVISD::TH_LDD;
15960
15961 SDValue Res = DAG.getMemIntrinsicNode(
15962 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15963 {LSNode1->getChain(), BasePtr,
15964 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15965 NewMemVT, NewMMO);
15966
15967 SDValue Node1 =
15968 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15969 SDValue Node2 =
15970 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15971
15972 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15973 return Node1;
15974 } else {
15975 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15976
15977 SDValue Res = DAG.getMemIntrinsicNode(
15978 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15979 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15980 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15981 NewMemVT, NewMMO);
15982
15983 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15984 return Res;
15985 }
15986}
15987
15988// Try to combine two adjacent loads/stores to a single pair instruction from
15989// the XTHeadMemPair vendor extension.
15992 SelectionDAG &DAG = DCI.DAG;
15994 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15995
15996 // Target does not support load/store pair.
15997 if (!Subtarget.hasVendorXTHeadMemPair())
15998 return SDValue();
15999
16000 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
16001 EVT MemVT = LSNode1->getMemoryVT();
16002 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16003
16004 // No volatile, indexed or atomic loads/stores.
16005 if (!LSNode1->isSimple() || LSNode1->isIndexed())
16006 return SDValue();
16007
16008 // Function to get a base + constant representation from a memory value.
16009 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16010 if (Ptr->getOpcode() == ISD::ADD)
16011 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16012 return {Ptr->getOperand(0), C1->getZExtValue()};
16013 return {Ptr, 0};
16014 };
16015
16016 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16017
16018 SDValue Chain = N->getOperand(0);
16019 for (SDUse &Use : Chain->uses()) {
16020 if (Use.getUser() != N && Use.getResNo() == 0 &&
16021 Use.getUser()->getOpcode() == N->getOpcode()) {
16022 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16023
16024 // No volatile, indexed or atomic loads/stores.
16025 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16026 continue;
16027
16028 // Check if LSNode1 and LSNode2 have the same type and extension.
16029 if (LSNode1->getOpcode() == ISD::LOAD)
16030 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16031 cast<LoadSDNode>(LSNode1)->getExtensionType())
16032 continue;
16033
16034 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16035 continue;
16036
16037 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16038
16039 // Check if the base pointer is the same for both instruction.
16040 if (Base1 != Base2)
16041 continue;
16042
16043 // Check if the offsets match the XTHeadMemPair encoding contraints.
16044 bool Valid = false;
16045 if (MemVT == MVT::i32) {
16046 // Check for adjacent i32 values and a 2-bit index.
16047 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16048 Valid = true;
16049 } else if (MemVT == MVT::i64) {
16050 // Check for adjacent i64 values and a 2-bit index.
16051 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16052 Valid = true;
16053 }
16054
16055 if (!Valid)
16056 continue;
16057
16058 // Try to combine.
16059 if (SDValue Res =
16060 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16061 return Res;
16062 }
16063 }
16064
16065 return SDValue();
16066}
16067
16068// Fold
16069// (fp_to_int (froundeven X)) -> fcvt X, rne
16070// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16071// (fp_to_int (ffloor X)) -> fcvt X, rdn
16072// (fp_to_int (fceil X)) -> fcvt X, rup
16073// (fp_to_int (fround X)) -> fcvt X, rmm
16074// (fp_to_int (frint X)) -> fcvt X
16077 const RISCVSubtarget &Subtarget) {
16078 SelectionDAG &DAG = DCI.DAG;
16079 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16080 MVT XLenVT = Subtarget.getXLenVT();
16081
16082 SDValue Src = N->getOperand(0);
16083
16084 // Don't do this for strict-fp Src.
16085 if (Src->isStrictFPOpcode())
16086 return SDValue();
16087
16088 // Ensure the FP type is legal.
16089 if (!TLI.isTypeLegal(Src.getValueType()))
16090 return SDValue();
16091
16092 // Don't do this for f16 with Zfhmin and not Zfh.
16093 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16094 return SDValue();
16095
16096 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16097 // If the result is invalid, we didn't find a foldable instruction.
16098 if (FRM == RISCVFPRndMode::Invalid)
16099 return SDValue();
16100
16101 SDLoc DL(N);
16102 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16103 EVT VT = N->getValueType(0);
16104
16105 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16106 MVT SrcVT = Src.getSimpleValueType();
16107 MVT SrcContainerVT = SrcVT;
16108 MVT ContainerVT = VT.getSimpleVT();
16109 SDValue XVal = Src.getOperand(0);
16110
16111 // For widening and narrowing conversions we just combine it into a
16112 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16113 // end up getting lowered to their appropriate pseudo instructions based on
16114 // their operand types
16115 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16116 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16117 return SDValue();
16118
16119 // Make fixed-length vectors scalable first
16120 if (SrcVT.isFixedLengthVector()) {
16121 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16122 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16123 ContainerVT =
16124 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16125 }
16126
16127 auto [Mask, VL] =
16128 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16129
16130 SDValue FpToInt;
16131 if (FRM == RISCVFPRndMode::RTZ) {
16132 // Use the dedicated trunc static rounding mode if we're truncating so we
16133 // don't need to generate calls to fsrmi/fsrm
16134 unsigned Opc =
16136 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16137 } else {
16138 unsigned Opc =
16140 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16141 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16142 }
16143
16144 // If converted from fixed-length to scalable, convert back
16145 if (VT.isFixedLengthVector())
16146 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16147
16148 return FpToInt;
16149 }
16150
16151 // Only handle XLen or i32 types. Other types narrower than XLen will
16152 // eventually be legalized to XLenVT.
16153 if (VT != MVT::i32 && VT != XLenVT)
16154 return SDValue();
16155
16156 unsigned Opc;
16157 if (VT == XLenVT)
16158 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16159 else
16161
16162 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16163 DAG.getTargetConstant(FRM, DL, XLenVT));
16164 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16165}
16166
16167// Fold
16168// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16169// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16170// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16171// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16172// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16173// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16176 const RISCVSubtarget &Subtarget) {
16177 SelectionDAG &DAG = DCI.DAG;
16178 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16179 MVT XLenVT = Subtarget.getXLenVT();
16180
16181 // Only handle XLen types. Other types narrower than XLen will eventually be
16182 // legalized to XLenVT.
16183 EVT DstVT = N->getValueType(0);
16184 if (DstVT != XLenVT)
16185 return SDValue();
16186
16187 SDValue Src = N->getOperand(0);
16188
16189 // Don't do this for strict-fp Src.
16190 if (Src->isStrictFPOpcode())
16191 return SDValue();
16192
16193 // Ensure the FP type is also legal.
16194 if (!TLI.isTypeLegal(Src.getValueType()))
16195 return SDValue();
16196
16197 // Don't do this for f16 with Zfhmin and not Zfh.
16198 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16199 return SDValue();
16200
16201 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16202
16203 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16204 if (FRM == RISCVFPRndMode::Invalid)
16205 return SDValue();
16206
16207 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16208
16209 unsigned Opc;
16210 if (SatVT == DstVT)
16211 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16212 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16214 else
16215 return SDValue();
16216 // FIXME: Support other SatVTs by clamping before or after the conversion.
16217
16218 Src = Src.getOperand(0);
16219
16220 SDLoc DL(N);
16221 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16222 DAG.getTargetConstant(FRM, DL, XLenVT));
16223
16224 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16225 // extend.
16226 if (Opc == RISCVISD::FCVT_WU_RV64)
16227 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16228
16229 // RISC-V FP-to-int conversions saturate to the destination register size, but
16230 // don't produce 0 for nan.
16231 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16232 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16233}
16234
16235// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16236// smaller than XLenVT.
16238 const RISCVSubtarget &Subtarget) {
16239 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16240
16241 SDValue Src = N->getOperand(0);
16242 if (Src.getOpcode() != ISD::BSWAP)
16243 return SDValue();
16244
16245 EVT VT = N->getValueType(0);
16246 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16247 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16248 return SDValue();
16249
16250 SDLoc DL(N);
16251 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16252}
16253
16255 const RISCVSubtarget &Subtarget) {
16256 // Fold:
16257 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16258
16259 // Check if its first operand is a vp.load.
16260 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16261 if (!VPLoad)
16262 return SDValue();
16263
16264 EVT LoadVT = VPLoad->getValueType(0);
16265 // We do not have a strided_load version for masks, and the evl of vp.reverse
16266 // and vp.load should always be the same.
16267 if (!LoadVT.getVectorElementType().isByteSized() ||
16268 N->getOperand(2) != VPLoad->getVectorLength() ||
16269 !N->getOperand(0).hasOneUse())
16270 return SDValue();
16271
16272 // Check if the mask of outer vp.reverse are all 1's.
16273 if (!isOneOrOneSplat(N->getOperand(1)))
16274 return SDValue();
16275
16276 SDValue LoadMask = VPLoad->getMask();
16277 // If Mask is all ones, then load is unmasked and can be reversed.
16278 if (!isOneOrOneSplat(LoadMask)) {
16279 // If the mask is not all ones, we can reverse the load if the mask was also
16280 // reversed by an unmasked vp.reverse with the same EVL.
16281 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16282 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16283 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16284 return SDValue();
16285 LoadMask = LoadMask.getOperand(0);
16286 }
16287
16288 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16289 SDLoc DL(N);
16290 MVT XLenVT = Subtarget.getXLenVT();
16291 SDValue NumElem = VPLoad->getVectorLength();
16292 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16293
16294 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16295 DAG.getConstant(1, DL, XLenVT));
16296 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16297 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16298 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16299 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16300
16302 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16304 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16305 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16306
16307 SDValue Ret = DAG.getStridedLoadVP(
16308 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16309 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16310
16311 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16312
16313 return Ret;
16314}
16315
16317 const RISCVSubtarget &Subtarget) {
16318 // Fold:
16319 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16320 // -1, MASK)
16321 auto *VPStore = cast<VPStoreSDNode>(N);
16322
16323 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16324 return SDValue();
16325
16326 SDValue VPReverse = VPStore->getValue();
16327 EVT ReverseVT = VPReverse->getValueType(0);
16328
16329 // We do not have a strided_store version for masks, and the evl of vp.reverse
16330 // and vp.store should always be the same.
16331 if (!ReverseVT.getVectorElementType().isByteSized() ||
16332 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16333 !VPReverse.hasOneUse())
16334 return SDValue();
16335
16336 SDValue StoreMask = VPStore->getMask();
16337 // If Mask is all ones, then load is unmasked and can be reversed.
16338 if (!isOneOrOneSplat(StoreMask)) {
16339 // If the mask is not all ones, we can reverse the store if the mask was
16340 // also reversed by an unmasked vp.reverse with the same EVL.
16341 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16342 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16343 StoreMask.getOperand(2) != VPStore->getVectorLength())
16344 return SDValue();
16345 StoreMask = StoreMask.getOperand(0);
16346 }
16347
16348 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16349 SDLoc DL(N);
16350 MVT XLenVT = Subtarget.getXLenVT();
16351 SDValue NumElem = VPStore->getVectorLength();
16352 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16353
16354 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16355 DAG.getConstant(1, DL, XLenVT));
16356 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16357 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16358 SDValue Base =
16359 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16360 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16361
16363 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16365 PtrInfo, VPStore->getMemOperand()->getFlags(),
16366 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16367
16368 return DAG.getStridedStoreVP(
16369 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16370 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16371 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16372 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16373}
16374
16375// Peephole avgceil pattern.
16376// %1 = zext <N x i8> %a to <N x i32>
16377// %2 = zext <N x i8> %b to <N x i32>
16378// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
16379// %4 = add nuw nsw <N x i32> %3, %2
16380// %5 = lshr <N x i32> %4, splat (i32 1)
16381// %6 = trunc <N x i32> %5 to <N x i8>
16383 const RISCVSubtarget &Subtarget) {
16384 EVT VT = N->getValueType(0);
16385
16386 // Ignore fixed vectors.
16387 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16388 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
16389 return SDValue();
16390
16391 SDValue In = N->getOperand(0);
16392 SDValue Mask = N->getOperand(1);
16393 SDValue VL = N->getOperand(2);
16394
16395 // Input should be a vp_srl with same mask and VL.
16396 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
16397 In.getOperand(3) != VL)
16398 return SDValue();
16399
16400 // Shift amount should be 1.
16401 if (!isOneOrOneSplat(In.getOperand(1)))
16402 return SDValue();
16403
16404 // Shifted value should be a vp_add with same mask and VL.
16405 SDValue LHS = In.getOperand(0);
16406 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
16407 LHS.getOperand(3) != VL)
16408 return SDValue();
16409
16410 SDValue Operands[3];
16411
16412 // Matches another VP_ADD with same VL and Mask.
16413 auto FindAdd = [&](SDValue V, SDValue Other) {
16414 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
16415 V.getOperand(3) != VL)
16416 return false;
16417
16418 Operands[0] = Other;
16419 Operands[1] = V.getOperand(1);
16420 Operands[2] = V.getOperand(0);
16421 return true;
16422 };
16423
16424 // We need to find another VP_ADD in one of the operands.
16425 SDValue LHS0 = LHS.getOperand(0);
16426 SDValue LHS1 = LHS.getOperand(1);
16427 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
16428 return SDValue();
16429
16430 // Now we have three operands of two additions. Check that one of them is a
16431 // constant vector with ones.
16432 auto I = llvm::find_if(Operands,
16433 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
16434 if (I == std::end(Operands))
16435 return SDValue();
16436 // We found a vector with ones, move if it to the end of the Operands array.
16437 std::swap(*I, Operands[2]);
16438
16439 // Make sure the other 2 operands can be promoted from the result type.
16440 for (SDValue Op : drop_end(Operands)) {
16441 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
16442 Op.getOperand(2) != VL)
16443 return SDValue();
16444 // Input must be the same size or smaller than our result.
16445 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
16446 return SDValue();
16447 }
16448
16449 // Pattern is detected.
16450 // Rebuild the zero extends in case the inputs are smaller than our result.
16451 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
16452 Operands[0].getOperand(0), Mask, VL);
16453 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
16454 Operands[1].getOperand(0), Mask, VL);
16455 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
16456 // mode.
16457 SDLoc DL(N);
16458 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
16459 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
16460}
16461
16462// Convert from one FMA opcode to another based on whether we are negating the
16463// multiply result and/or the accumulator.
16464// NOTE: Only supports RVV operations with VL.
16465static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16466 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16467 if (NegMul) {
16468 // clang-format off
16469 switch (Opcode) {
16470 default: llvm_unreachable("Unexpected opcode");
16471 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16472 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16473 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16474 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16479 }
16480 // clang-format on
16481 }
16482
16483 // Negating the accumulator changes ADD<->SUB.
16484 if (NegAcc) {
16485 // clang-format off
16486 switch (Opcode) {
16487 default: llvm_unreachable("Unexpected opcode");
16488 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16489 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16490 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16491 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16496 }
16497 // clang-format on
16498 }
16499
16500 return Opcode;
16501}
16502
16504 // Fold FNEG_VL into FMA opcodes.
16505 // The first operand of strict-fp is chain.
16506 bool IsStrict =
16507 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16508 unsigned Offset = IsStrict ? 1 : 0;
16509 SDValue A = N->getOperand(0 + Offset);
16510 SDValue B = N->getOperand(1 + Offset);
16511 SDValue C = N->getOperand(2 + Offset);
16512 SDValue Mask = N->getOperand(3 + Offset);
16513 SDValue VL = N->getOperand(4 + Offset);
16514
16515 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16516 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16517 V.getOperand(2) == VL) {
16518 // Return the negated input.
16519 V = V.getOperand(0);
16520 return true;
16521 }
16522
16523 return false;
16524 };
16525
16526 bool NegA = invertIfNegative(A);
16527 bool NegB = invertIfNegative(B);
16528 bool NegC = invertIfNegative(C);
16529
16530 // If no operands are negated, we're done.
16531 if (!NegA && !NegB && !NegC)
16532 return SDValue();
16533
16534 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16535 if (IsStrict)
16536 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16537 {N->getOperand(0), A, B, C, Mask, VL});
16538 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16539 VL);
16540}
16541
16544 const RISCVSubtarget &Subtarget) {
16545 SelectionDAG &DAG = DCI.DAG;
16546
16548 return V;
16549
16550 // FIXME: Ignore strict opcodes for now.
16551 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16552 return SDValue();
16553
16554 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16555}
16556
16558 const RISCVSubtarget &Subtarget) {
16559 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16560
16561 EVT VT = N->getValueType(0);
16562
16563 if (VT != Subtarget.getXLenVT())
16564 return SDValue();
16565
16566 if (!isa<ConstantSDNode>(N->getOperand(1)))
16567 return SDValue();
16568 uint64_t ShAmt = N->getConstantOperandVal(1);
16569
16570 SDValue N0 = N->getOperand(0);
16571
16572 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16573 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16574 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16575 unsigned ExtSize =
16576 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16577 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16578 N0.getOperand(0).hasOneUse() &&
16579 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16580 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16581 if (LShAmt < ExtSize) {
16582 unsigned Size = VT.getSizeInBits();
16583 SDLoc ShlDL(N0.getOperand(0));
16584 SDValue Shl =
16585 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16586 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16587 SDLoc DL(N);
16588 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16589 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16590 }
16591 }
16592 }
16593
16594 if (ShAmt > 32 || VT != MVT::i64)
16595 return SDValue();
16596
16597 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16598 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16599 //
16600 // Also try these folds where an add or sub is in the middle.
16601 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16602 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16603 SDValue Shl;
16604 ConstantSDNode *AddC = nullptr;
16605
16606 // We might have an ADD or SUB between the SRA and SHL.
16607 bool IsAdd = N0.getOpcode() == ISD::ADD;
16608 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16609 // Other operand needs to be a constant we can modify.
16610 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16611 if (!AddC)
16612 return SDValue();
16613
16614 // AddC needs to have at least 32 trailing zeros.
16615 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16616 return SDValue();
16617
16618 // All users should be a shift by constant less than or equal to 32. This
16619 // ensures we'll do this optimization for each of them to produce an
16620 // add/sub+sext_inreg they can all share.
16621 for (SDNode *U : N0->users()) {
16622 if (U->getOpcode() != ISD::SRA ||
16623 !isa<ConstantSDNode>(U->getOperand(1)) ||
16624 U->getConstantOperandVal(1) > 32)
16625 return SDValue();
16626 }
16627
16628 Shl = N0.getOperand(IsAdd ? 0 : 1);
16629 } else {
16630 // Not an ADD or SUB.
16631 Shl = N0;
16632 }
16633
16634 // Look for a shift left by 32.
16635 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16636 Shl.getConstantOperandVal(1) != 32)
16637 return SDValue();
16638
16639 // We if we didn't look through an add/sub, then the shl should have one use.
16640 // If we did look through an add/sub, the sext_inreg we create is free so
16641 // we're only creating 2 new instructions. It's enough to only remove the
16642 // original sra+add/sub.
16643 if (!AddC && !Shl.hasOneUse())
16644 return SDValue();
16645
16646 SDLoc DL(N);
16647 SDValue In = Shl.getOperand(0);
16648
16649 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16650 // constant.
16651 if (AddC) {
16652 SDValue ShiftedAddC =
16653 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16654 if (IsAdd)
16655 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16656 else
16657 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16658 }
16659
16660 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16661 DAG.getValueType(MVT::i32));
16662 if (ShAmt == 32)
16663 return SExt;
16664
16665 return DAG.getNode(
16666 ISD::SHL, DL, MVT::i64, SExt,
16667 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16668}
16669
16670// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16671// the result is used as the conditon of a br_cc or select_cc we can invert,
16672// inverting the setcc is free, and Z is 0/1. Caller will invert the
16673// br_cc/select_cc.
16675 bool IsAnd = Cond.getOpcode() == ISD::AND;
16676 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16677 return SDValue();
16678
16679 if (!Cond.hasOneUse())
16680 return SDValue();
16681
16682 SDValue Setcc = Cond.getOperand(0);
16683 SDValue Xor = Cond.getOperand(1);
16684 // Canonicalize setcc to LHS.
16685 if (Setcc.getOpcode() != ISD::SETCC)
16686 std::swap(Setcc, Xor);
16687 // LHS should be a setcc and RHS should be an xor.
16688 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16689 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16690 return SDValue();
16691
16692 // If the condition is an And, SimplifyDemandedBits may have changed
16693 // (xor Z, 1) to (not Z).
16694 SDValue Xor1 = Xor.getOperand(1);
16695 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16696 return SDValue();
16697
16698 EVT VT = Cond.getValueType();
16699 SDValue Xor0 = Xor.getOperand(0);
16700
16701 // The LHS of the xor needs to be 0/1.
16703 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16704 return SDValue();
16705
16706 // We can only invert integer setccs.
16707 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16708 if (!SetCCOpVT.isScalarInteger())
16709 return SDValue();
16710
16711 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16712 if (ISD::isIntEqualitySetCC(CCVal)) {
16713 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16714 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16715 Setcc.getOperand(1), CCVal);
16716 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16717 // Invert (setlt 0, X) by converting to (setlt X, 1).
16718 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16719 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16720 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16721 // (setlt X, 1) by converting to (setlt 0, X).
16722 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16723 DAG.getConstant(0, SDLoc(Setcc), VT),
16724 Setcc.getOperand(0), CCVal);
16725 } else
16726 return SDValue();
16727
16728 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16729 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16730}
16731
16732// Perform common combines for BR_CC and SELECT_CC condtions.
16733static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16734 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16735 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16736
16737 // As far as arithmetic right shift always saves the sign,
16738 // shift can be omitted.
16739 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16740 // setge (sra X, N), 0 -> setge X, 0
16741 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16742 LHS.getOpcode() == ISD::SRA) {
16743 LHS = LHS.getOperand(0);
16744 return true;
16745 }
16746
16747 if (!ISD::isIntEqualitySetCC(CCVal))
16748 return false;
16749
16750 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16751 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16752 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16753 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16754 // If we're looking for eq 0 instead of ne 0, we need to invert the
16755 // condition.
16756 bool Invert = CCVal == ISD::SETEQ;
16757 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16758 if (Invert)
16759 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16760
16761 RHS = LHS.getOperand(1);
16762 LHS = LHS.getOperand(0);
16763 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16764
16765 CC = DAG.getCondCode(CCVal);
16766 return true;
16767 }
16768
16769 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16770 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16771 RHS = LHS.getOperand(1);
16772 LHS = LHS.getOperand(0);
16773 return true;
16774 }
16775
16776 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16777 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16778 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16779 SDValue LHS0 = LHS.getOperand(0);
16780 if (LHS0.getOpcode() == ISD::AND &&
16781 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16782 uint64_t Mask = LHS0.getConstantOperandVal(1);
16783 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16784 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16785 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16786 CC = DAG.getCondCode(CCVal);
16787
16788 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16789 LHS = LHS0.getOperand(0);
16790 if (ShAmt != 0)
16791 LHS =
16792 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16793 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16794 return true;
16795 }
16796 }
16797 }
16798
16799 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16800 // This can occur when legalizing some floating point comparisons.
16801 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16802 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16803 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16804 CC = DAG.getCondCode(CCVal);
16805 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16806 return true;
16807 }
16808
16809 if (isNullConstant(RHS)) {
16810 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16811 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16812 CC = DAG.getCondCode(CCVal);
16813 LHS = NewCond;
16814 return true;
16815 }
16816 }
16817
16818 return false;
16819}
16820
16821// Fold
16822// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16823// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16824// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16825// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16827 SDValue TrueVal, SDValue FalseVal,
16828 bool Swapped) {
16829 bool Commutative = true;
16830 unsigned Opc = TrueVal.getOpcode();
16831 switch (Opc) {
16832 default:
16833 return SDValue();
16834 case ISD::SHL:
16835 case ISD::SRA:
16836 case ISD::SRL:
16837 case ISD::SUB:
16838 Commutative = false;
16839 break;
16840 case ISD::ADD:
16841 case ISD::OR:
16842 case ISD::XOR:
16843 break;
16844 }
16845
16846 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16847 return SDValue();
16848
16849 unsigned OpToFold;
16850 if (FalseVal == TrueVal.getOperand(0))
16851 OpToFold = 0;
16852 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16853 OpToFold = 1;
16854 else
16855 return SDValue();
16856
16857 EVT VT = N->getValueType(0);
16858 SDLoc DL(N);
16859 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16860 EVT OtherOpVT = OtherOp.getValueType();
16861 SDValue IdentityOperand =
16862 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16863 if (!Commutative)
16864 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16865 assert(IdentityOperand && "No identity operand!");
16866
16867 if (Swapped)
16868 std::swap(OtherOp, IdentityOperand);
16869 SDValue NewSel =
16870 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16871 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16872}
16873
16874// This tries to get rid of `select` and `icmp` that are being used to handle
16875// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16877 SDValue Cond = N->getOperand(0);
16878
16879 // This represents either CTTZ or CTLZ instruction.
16880 SDValue CountZeroes;
16881
16882 SDValue ValOnZero;
16883
16884 if (Cond.getOpcode() != ISD::SETCC)
16885 return SDValue();
16886
16887 if (!isNullConstant(Cond->getOperand(1)))
16888 return SDValue();
16889
16890 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16891 if (CCVal == ISD::CondCode::SETEQ) {
16892 CountZeroes = N->getOperand(2);
16893 ValOnZero = N->getOperand(1);
16894 } else if (CCVal == ISD::CondCode::SETNE) {
16895 CountZeroes = N->getOperand(1);
16896 ValOnZero = N->getOperand(2);
16897 } else {
16898 return SDValue();
16899 }
16900
16901 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16902 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16903 CountZeroes = CountZeroes.getOperand(0);
16904
16905 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16906 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16907 CountZeroes.getOpcode() != ISD::CTLZ &&
16908 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16909 return SDValue();
16910
16911 if (!isNullConstant(ValOnZero))
16912 return SDValue();
16913
16914 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16915 if (Cond->getOperand(0) != CountZeroesArgument)
16916 return SDValue();
16917
16918 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16919 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16920 CountZeroes.getValueType(), CountZeroesArgument);
16921 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16922 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16923 CountZeroes.getValueType(), CountZeroesArgument);
16924 }
16925
16926 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16927 SDValue BitWidthMinusOne =
16928 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16929
16930 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16931 CountZeroes, BitWidthMinusOne);
16932 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16933}
16934
16936 const RISCVSubtarget &Subtarget) {
16937 SDValue Cond = N->getOperand(0);
16938 SDValue True = N->getOperand(1);
16939 SDValue False = N->getOperand(2);
16940 SDLoc DL(N);
16941 EVT VT = N->getValueType(0);
16942 EVT CondVT = Cond.getValueType();
16943
16944 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16945 return SDValue();
16946
16947 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16948 // BEXTI, where C is power of 2.
16949 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16950 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16951 SDValue LHS = Cond.getOperand(0);
16952 SDValue RHS = Cond.getOperand(1);
16953 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16954 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16955 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16956 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16957 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16958 return DAG.getSelect(DL, VT,
16959 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16960 False, True);
16961 }
16962 }
16963 return SDValue();
16964}
16965
16966static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
16967 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
16968 return false;
16969
16970 SwapCC = false;
16971 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
16972 std::swap(TrueVal, FalseVal);
16973 SwapCC = true;
16974 }
16975
16976 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
16977 return false;
16978
16979 SDValue A = FalseVal.getOperand(0);
16980 SDValue B = FalseVal.getOperand(1);
16981 // Add is commutative, so check both orders
16982 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
16983 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
16984}
16985
16986/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
16987/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
16988/// register pressure over the add followed by masked vsub sequence.
16990 SDLoc DL(N);
16991 EVT VT = N->getValueType(0);
16992 SDValue CC = N->getOperand(0);
16993 SDValue TrueVal = N->getOperand(1);
16994 SDValue FalseVal = N->getOperand(2);
16995
16996 bool SwapCC;
16997 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
16998 return SDValue();
16999
17000 SDValue Sub = SwapCC ? TrueVal : FalseVal;
17001 SDValue A = Sub.getOperand(0);
17002 SDValue B = Sub.getOperand(1);
17003
17004 // Arrange the select such that we can match a masked
17005 // vrsub.vi to perform the conditional negate
17006 SDValue NegB = DAG.getNegative(B, DL, VT);
17007 if (!SwapCC)
17008 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17009 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17010 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17011}
17012
17014 const RISCVSubtarget &Subtarget) {
17015 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
17016 return Folded;
17017
17018 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
17019 return V;
17020
17021 if (Subtarget.hasConditionalMoveFusion())
17022 return SDValue();
17023
17024 SDValue TrueVal = N->getOperand(1);
17025 SDValue FalseVal = N->getOperand(2);
17026 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
17027 return V;
17028 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
17029}
17030
17031/// If we have a build_vector where each lane is binop X, C, where C
17032/// is a constant (but not necessarily the same constant on all lanes),
17033/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
17034/// We assume that materializing a constant build vector will be no more
17035/// expensive that performing O(n) binops.
17037 const RISCVSubtarget &Subtarget,
17038 const RISCVTargetLowering &TLI) {
17039 SDLoc DL(N);
17040 EVT VT = N->getValueType(0);
17041
17042 assert(!VT.isScalableVector() && "unexpected build vector");
17043
17044 if (VT.getVectorNumElements() == 1)
17045 return SDValue();
17046
17047 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
17048 if (!TLI.isBinOp(Opcode))
17049 return SDValue();
17050
17051 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
17052 return SDValue();
17053
17054 // This BUILD_VECTOR involves an implicit truncation, and sinking
17055 // truncates through binops is non-trivial.
17056 if (N->op_begin()->getValueType() != VT.getVectorElementType())
17057 return SDValue();
17058
17059 SmallVector<SDValue> LHSOps;
17060 SmallVector<SDValue> RHSOps;
17061 for (SDValue Op : N->ops()) {
17062 if (Op.isUndef()) {
17063 // We can't form a divide or remainder from undef.
17064 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
17065 return SDValue();
17066
17067 LHSOps.push_back(Op);
17068 RHSOps.push_back(Op);
17069 continue;
17070 }
17071
17072 // TODO: We can handle operations which have an neutral rhs value
17073 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
17074 // of profit in a more explicit manner.
17075 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
17076 return SDValue();
17077
17078 LHSOps.push_back(Op.getOperand(0));
17079 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
17080 !isa<ConstantFPSDNode>(Op.getOperand(1)))
17081 return SDValue();
17082 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17083 // have different LHS and RHS types.
17084 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
17085 return SDValue();
17086
17087 RHSOps.push_back(Op.getOperand(1));
17088 }
17089
17090 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
17091 DAG.getBuildVector(VT, DL, RHSOps));
17092}
17093
17095 const RISCVSubtarget &Subtarget,
17096 const RISCVTargetLowering &TLI) {
17097 SDValue InVec = N->getOperand(0);
17098 SDValue InVal = N->getOperand(1);
17099 SDValue EltNo = N->getOperand(2);
17100 SDLoc DL(N);
17101
17102 EVT VT = InVec.getValueType();
17103 if (VT.isScalableVector())
17104 return SDValue();
17105
17106 if (!InVec.hasOneUse())
17107 return SDValue();
17108
17109 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
17110 // move the insert_vector_elts into the arms of the binop. Note that
17111 // the new RHS must be a constant.
17112 const unsigned InVecOpcode = InVec->getOpcode();
17113 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
17114 InVal.hasOneUse()) {
17115 SDValue InVecLHS = InVec->getOperand(0);
17116 SDValue InVecRHS = InVec->getOperand(1);
17117 SDValue InValLHS = InVal->getOperand(0);
17118 SDValue InValRHS = InVal->getOperand(1);
17119
17121 return SDValue();
17122 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17123 return SDValue();
17124 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17125 // have different LHS and RHS types.
17126 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
17127 return SDValue();
17129 InVecLHS, InValLHS, EltNo);
17131 InVecRHS, InValRHS, EltNo);
17132 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
17133 }
17134
17135 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17136 // move the insert_vector_elt to the source operand of the concat_vector.
17137 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17138 return SDValue();
17139
17140 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17141 if (!IndexC)
17142 return SDValue();
17143 unsigned Elt = IndexC->getZExtValue();
17144
17145 EVT ConcatVT = InVec.getOperand(0).getValueType();
17146 if (ConcatVT.getVectorElementType() != InVal.getValueType())
17147 return SDValue();
17148 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17149 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17150
17151 unsigned ConcatOpIdx = Elt / ConcatNumElts;
17152 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17153 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17154 ConcatOp, InVal, NewIdx);
17155
17156 SmallVector<SDValue> ConcatOps;
17157 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17158 ConcatOps[ConcatOpIdx] = ConcatOp;
17159 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17160}
17161
17162// If we're concatenating a series of vector loads like
17163// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17164// Then we can turn this into a strided load by widening the vector elements
17165// vlse32 p, stride=n
17167 const RISCVSubtarget &Subtarget,
17168 const RISCVTargetLowering &TLI) {
17169 SDLoc DL(N);
17170 EVT VT = N->getValueType(0);
17171
17172 // Only perform this combine on legal MVTs.
17173 if (!TLI.isTypeLegal(VT))
17174 return SDValue();
17175
17176 // TODO: Potentially extend this to scalable vectors
17177 if (VT.isScalableVector())
17178 return SDValue();
17179
17180 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17181 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17182 !SDValue(BaseLd, 0).hasOneUse())
17183 return SDValue();
17184
17185 EVT BaseLdVT = BaseLd->getValueType(0);
17186
17187 // Go through the loads and check that they're strided
17189 Lds.push_back(BaseLd);
17190 Align Align = BaseLd->getAlign();
17191 for (SDValue Op : N->ops().drop_front()) {
17192 auto *Ld = dyn_cast<LoadSDNode>(Op);
17193 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17194 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17195 Ld->getValueType(0) != BaseLdVT)
17196 return SDValue();
17197
17198 Lds.push_back(Ld);
17199
17200 // The common alignment is the most restrictive (smallest) of all the loads
17201 Align = std::min(Align, Ld->getAlign());
17202 }
17203
17204 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17205 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17206 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17207 // If the load ptrs can be decomposed into a common (Base + Index) with a
17208 // common constant stride, then return the constant stride.
17209 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17210 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17211 if (BIO1.equalBaseIndex(BIO2, DAG))
17212 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17213
17214 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17215 SDValue P1 = Ld1->getBasePtr();
17216 SDValue P2 = Ld2->getBasePtr();
17217 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17218 return {{P2.getOperand(1), false}};
17219 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17220 return {{P1.getOperand(1), true}};
17221
17222 return std::nullopt;
17223 };
17224
17225 // Get the distance between the first and second loads
17226 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17227 if (!BaseDiff)
17228 return SDValue();
17229
17230 // Check all the loads are the same distance apart
17231 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17232 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17233 return SDValue();
17234
17235 // TODO: At this point, we've successfully matched a generalized gather
17236 // load. Maybe we should emit that, and then move the specialized
17237 // matchers above and below into a DAG combine?
17238
17239 // Get the widened scalar type, e.g. v4i8 -> i64
17240 unsigned WideScalarBitWidth =
17241 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17242 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17243
17244 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17245 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17246 if (!TLI.isTypeLegal(WideVecVT))
17247 return SDValue();
17248
17249 // Check that the operation is legal
17250 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17251 return SDValue();
17252
17253 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17254 SDValue Stride =
17255 std::holds_alternative<SDValue>(StrideVariant)
17256 ? std::get<SDValue>(StrideVariant)
17257 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17258 Lds[0]->getOffset().getValueType());
17259 if (MustNegateStride)
17260 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17261
17262 SDValue AllOneMask =
17263 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17264 DAG.getConstant(1, DL, MVT::i1));
17265
17266 uint64_t MemSize;
17267 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17268 ConstStride && ConstStride->getSExtValue() >= 0)
17269 // total size = (elsize * n) + (stride - elsize) * (n-1)
17270 // = elsize + stride * (n-1)
17271 MemSize = WideScalarVT.getSizeInBits() +
17272 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17273 else
17274 // If Stride isn't constant, then we can't know how much it will load
17276
17278 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17279 Align);
17280
17281 SDValue StridedLoad = DAG.getStridedLoadVP(
17282 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17283 AllOneMask,
17284 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17285
17286 for (SDValue Ld : N->ops())
17287 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17288
17289 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17290}
17291
17293 const RISCVSubtarget &Subtarget,
17294 const RISCVTargetLowering &TLI) {
17295 SDLoc DL(N);
17296 EVT VT = N->getValueType(0);
17297 const unsigned ElementSize = VT.getScalarSizeInBits();
17298 const unsigned NumElts = VT.getVectorNumElements();
17299 SDValue V1 = N->getOperand(0);
17300 SDValue V2 = N->getOperand(1);
17301 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17302 MVT XLenVT = Subtarget.getXLenVT();
17303
17304 // Recognized a disguised select of add/sub.
17305 bool SwapCC;
17306 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
17307 matchSelectAddSub(V1, V2, SwapCC)) {
17308 SDValue Sub = SwapCC ? V1 : V2;
17309 SDValue A = Sub.getOperand(0);
17310 SDValue B = Sub.getOperand(1);
17311
17312 SmallVector<SDValue> MaskVals;
17313 for (int MaskIndex : Mask) {
17314 bool SelectMaskVal = (MaskIndex < (int)NumElts);
17315 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
17316 }
17317 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
17318 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
17319 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
17320
17321 // Arrange the select such that we can match a masked
17322 // vrsub.vi to perform the conditional negate
17323 SDValue NegB = DAG.getNegative(B, DL, VT);
17324 if (!SwapCC)
17325 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17326 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17327 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17328 }
17329
17330 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17331 // during the combine phase before type legalization, and relies on
17332 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17333 // for the source mask.
17334 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17335 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17336 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17337 return SDValue();
17338
17339 SmallVector<int, 8> NewMask;
17340 narrowShuffleMaskElts(2, Mask, NewMask);
17341
17342 LLVMContext &C = *DAG.getContext();
17343 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17344 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17345 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17346 DAG.getBitcast(NewVT, V2), NewMask);
17347 return DAG.getBitcast(VT, Res);
17348}
17349
17351 const RISCVSubtarget &Subtarget) {
17352
17353 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17354
17355 if (N->getValueType(0).isFixedLengthVector())
17356 return SDValue();
17357
17358 SDValue Addend = N->getOperand(0);
17359 SDValue MulOp = N->getOperand(1);
17360
17361 if (N->getOpcode() == RISCVISD::ADD_VL) {
17362 SDValue AddPassthruOp = N->getOperand(2);
17363 if (!AddPassthruOp.isUndef())
17364 return SDValue();
17365 }
17366
17367 auto IsVWMulOpc = [](unsigned Opc) {
17368 switch (Opc) {
17369 case RISCVISD::VWMUL_VL:
17372 return true;
17373 default:
17374 return false;
17375 }
17376 };
17377
17378 if (!IsVWMulOpc(MulOp.getOpcode()))
17379 std::swap(Addend, MulOp);
17380
17381 if (!IsVWMulOpc(MulOp.getOpcode()))
17382 return SDValue();
17383
17384 SDValue MulPassthruOp = MulOp.getOperand(2);
17385
17386 if (!MulPassthruOp.isUndef())
17387 return SDValue();
17388
17389 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17390 const RISCVSubtarget &Subtarget) {
17391 if (N->getOpcode() == ISD::ADD) {
17392 SDLoc DL(N);
17393 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17394 Subtarget);
17395 }
17396 return std::make_pair(N->getOperand(3), N->getOperand(4));
17397 }(N, DAG, Subtarget);
17398
17399 SDValue MulMask = MulOp.getOperand(3);
17400 SDValue MulVL = MulOp.getOperand(4);
17401
17402 if (AddMask != MulMask || AddVL != MulVL)
17403 return SDValue();
17404
17405 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17406 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17407 "Unexpected opcode after VWMACC_VL");
17408 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17409 "Unexpected opcode after VWMACC_VL!");
17410 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17411 "Unexpected opcode after VWMUL_VL!");
17412 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17413 "Unexpected opcode after VWMUL_VL!");
17414
17415 SDLoc DL(N);
17416 EVT VT = N->getValueType(0);
17417 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17418 AddVL};
17419 return DAG.getNode(Opc, DL, VT, Ops);
17420}
17421
17423 ISD::MemIndexType &IndexType,
17425 if (!DCI.isBeforeLegalize())
17426 return false;
17427
17428 SelectionDAG &DAG = DCI.DAG;
17429 const MVT XLenVT =
17430 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17431
17432 const EVT IndexVT = Index.getValueType();
17433
17434 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17435 // mode, so anything else must be manually legalized.
17436 if (!isIndexTypeSigned(IndexType))
17437 return false;
17438
17439 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17440 // Any index legalization should first promote to XLenVT, so we don't lose
17441 // bits when scaling. This may create an illegal index type so we let
17442 // LLVM's legalization take care of the splitting.
17443 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17444 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17445 IndexVT.changeVectorElementType(XLenVT), Index);
17446 }
17447 IndexType = ISD::UNSIGNED_SCALED;
17448 return true;
17449}
17450
17451/// Match the index vector of a scatter or gather node as the shuffle mask
17452/// which performs the rearrangement if possible. Will only match if
17453/// all lanes are touched, and thus replacing the scatter or gather with
17454/// a unit strided access and shuffle is legal.
17455static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17456 SmallVector<int> &ShuffleMask) {
17457 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17458 return false;
17459 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17460 return false;
17461
17462 const unsigned ElementSize = VT.getScalarStoreSize();
17463 const unsigned NumElems = VT.getVectorNumElements();
17464
17465 // Create the shuffle mask and check all bits active
17466 assert(ShuffleMask.empty());
17467 BitVector ActiveLanes(NumElems);
17468 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17469 // TODO: We've found an active bit of UB, and could be
17470 // more aggressive here if desired.
17471 if (Index->getOperand(i)->isUndef())
17472 return false;
17473 uint64_t C = Index->getConstantOperandVal(i);
17474 if (C % ElementSize != 0)
17475 return false;
17476 C = C / ElementSize;
17477 if (C >= NumElems)
17478 return false;
17479 ShuffleMask.push_back(C);
17480 ActiveLanes.set(C);
17481 }
17482 return ActiveLanes.all();
17483}
17484
17485/// Match the index of a gather or scatter operation as an operation
17486/// with twice the element width and half the number of elements. This is
17487/// generally profitable (if legal) because these operations are linear
17488/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17489/// come out ahead.
17490static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17491 Align BaseAlign, const RISCVSubtarget &ST) {
17492 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17493 return false;
17494 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17495 return false;
17496
17497 // Attempt a doubling. If we can use a element type 4x or 8x in
17498 // size, this will happen via multiply iterations of the transform.
17499 const unsigned NumElems = VT.getVectorNumElements();
17500 if (NumElems % 2 != 0)
17501 return false;
17502
17503 const unsigned ElementSize = VT.getScalarStoreSize();
17504 const unsigned WiderElementSize = ElementSize * 2;
17505 if (WiderElementSize > ST.getELen()/8)
17506 return false;
17507
17508 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17509 return false;
17510
17511 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17512 // TODO: We've found an active bit of UB, and could be
17513 // more aggressive here if desired.
17514 if (Index->getOperand(i)->isUndef())
17515 return false;
17516 // TODO: This offset check is too strict if we support fully
17517 // misaligned memory operations.
17518 uint64_t C = Index->getConstantOperandVal(i);
17519 if (i % 2 == 0) {
17520 if (C % WiderElementSize != 0)
17521 return false;
17522 continue;
17523 }
17524 uint64_t Last = Index->getConstantOperandVal(i-1);
17525 if (C != Last + ElementSize)
17526 return false;
17527 }
17528 return true;
17529}
17530
17531// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17532// This would be benefit for the cases where X and Y are both the same value
17533// type of low precision vectors. Since the truncate would be lowered into
17534// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17535// restriction, such pattern would be expanded into a series of "vsetvli"
17536// and "vnsrl" instructions later to reach this point.
17538 SDValue Mask = N->getOperand(1);
17539 SDValue VL = N->getOperand(2);
17540
17541 bool IsVLMAX = isAllOnesConstant(VL) ||
17542 (isa<RegisterSDNode>(VL) &&
17543 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17544 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17545 Mask.getOperand(0) != VL)
17546 return SDValue();
17547
17548 auto IsTruncNode = [&](SDValue V) {
17549 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17550 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17551 };
17552
17553 SDValue Op = N->getOperand(0);
17554
17555 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17556 // to distinguish such pattern.
17557 while (IsTruncNode(Op)) {
17558 if (!Op.hasOneUse())
17559 return SDValue();
17560 Op = Op.getOperand(0);
17561 }
17562
17563 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17564 return SDValue();
17565
17566 SDValue N0 = Op.getOperand(0);
17567 SDValue N1 = Op.getOperand(1);
17568 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17569 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17570 return SDValue();
17571
17572 SDValue N00 = N0.getOperand(0);
17573 SDValue N10 = N1.getOperand(0);
17574 if (!N00.getValueType().isVector() ||
17575 N00.getValueType() != N10.getValueType() ||
17576 N->getValueType(0) != N10.getValueType())
17577 return SDValue();
17578
17579 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17580 SDValue SMin =
17581 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17582 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17583 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17584}
17585
17586// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17587// maximum value for the truncated type.
17588// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17589// is the signed maximum value for the truncated type and C2 is the signed
17590// minimum value.
17592 const RISCVSubtarget &Subtarget) {
17593 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17594
17595 MVT VT = N->getSimpleValueType(0);
17596
17597 SDValue Mask = N->getOperand(1);
17598 SDValue VL = N->getOperand(2);
17599
17600 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17601 APInt &SplatVal) {
17602 if (V.getOpcode() != Opc &&
17603 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17604 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17605 return SDValue();
17606
17607 SDValue Op = V.getOperand(1);
17608
17609 // Peek through conversion between fixed and scalable vectors.
17610 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17611 isNullConstant(Op.getOperand(2)) &&
17612 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17613 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17614 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17615 isNullConstant(Op.getOperand(1).getOperand(1)))
17616 Op = Op.getOperand(1).getOperand(0);
17617
17618 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17619 return V.getOperand(0);
17620
17621 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17622 Op.getOperand(2) == VL) {
17623 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17624 SplatVal =
17625 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17626 return V.getOperand(0);
17627 }
17628 }
17629
17630 return SDValue();
17631 };
17632
17633 SDLoc DL(N);
17634
17635 auto DetectUSatPattern = [&](SDValue V) {
17636 APInt LoC, HiC;
17637
17638 // Simple case, V is a UMIN.
17639 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17640 if (HiC.isMask(VT.getScalarSizeInBits()))
17641 return UMinOp;
17642
17643 // If we have an SMAX that removes negative numbers first, then we can match
17644 // SMIN instead of UMIN.
17645 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17646 if (SDValue SMaxOp =
17647 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17648 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17649 return SMinOp;
17650
17651 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17652 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17653 // first.
17654 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17655 if (SDValue SMinOp =
17656 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17657 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17658 HiC.uge(LoC))
17659 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17660 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17661 Mask, VL);
17662
17663 return SDValue();
17664 };
17665
17666 auto DetectSSatPattern = [&](SDValue V) {
17667 unsigned NumDstBits = VT.getScalarSizeInBits();
17668 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17669 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17670 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17671
17672 APInt HiC, LoC;
17673 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17674 if (SDValue SMaxOp =
17675 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17676 if (HiC == SignedMax && LoC == SignedMin)
17677 return SMaxOp;
17678
17679 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17680 if (SDValue SMinOp =
17681 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17682 if (HiC == SignedMax && LoC == SignedMin)
17683 return SMinOp;
17684
17685 return SDValue();
17686 };
17687
17688 SDValue Src = N->getOperand(0);
17689
17690 // Look through multiple layers of truncates.
17691 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17692 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17693 Src.hasOneUse())
17694 Src = Src.getOperand(0);
17695
17696 SDValue Val;
17697 unsigned ClipOpc;
17698 if ((Val = DetectUSatPattern(Src)))
17700 else if ((Val = DetectSSatPattern(Src)))
17702 else
17703 return SDValue();
17704
17705 MVT ValVT = Val.getSimpleValueType();
17706
17707 do {
17708 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17709 ValVT = ValVT.changeVectorElementType(ValEltVT);
17710 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17711 } while (ValVT != VT);
17712
17713 return Val;
17714}
17715
17716// Convert
17717// (iX ctpop (bitcast (vXi1 A)))
17718// ->
17719// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17720// FIXME: It's complicated to match all the variations of this after type
17721// legalization so we only handle the pre-type legalization pattern, but that
17722// requires the fixed vector type to be legal.
17724 const RISCVSubtarget &Subtarget) {
17725 EVT VT = N->getValueType(0);
17726 if (!VT.isScalarInteger())
17727 return SDValue();
17728
17729 SDValue Src = N->getOperand(0);
17730
17731 // Peek through zero_extend. It doesn't change the count.
17732 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17733 Src = Src.getOperand(0);
17734
17735 if (Src.getOpcode() != ISD::BITCAST)
17736 return SDValue();
17737
17738 Src = Src.getOperand(0);
17739 EVT SrcEVT = Src.getValueType();
17740 if (!SrcEVT.isSimple())
17741 return SDValue();
17742
17743 MVT SrcMVT = SrcEVT.getSimpleVT();
17744 // Make sure the input is an i1 vector.
17745 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17746 return SDValue();
17747
17748 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17749 return SDValue();
17750
17751 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17752 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17753
17754 SDLoc DL(N);
17755 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17756
17757 MVT XLenVT = Subtarget.getXLenVT();
17758 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17759 return DAG.getZExtOrTrunc(Pop, DL, VT);
17760}
17761
17763 DAGCombinerInfo &DCI) const {
17764 SelectionDAG &DAG = DCI.DAG;
17765 const MVT XLenVT = Subtarget.getXLenVT();
17766 SDLoc DL(N);
17767
17768 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17769 // bits are demanded. N will be added to the Worklist if it was not deleted.
17770 // Caller should return SDValue(N, 0) if this returns true.
17771 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17772 SDValue Op = N->getOperand(OpNo);
17773 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17774 if (!SimplifyDemandedBits(Op, Mask, DCI))
17775 return false;
17776
17777 if (N->getOpcode() != ISD::DELETED_NODE)
17778 DCI.AddToWorklist(N);
17779 return true;
17780 };
17781
17782 switch (N->getOpcode()) {
17783 default:
17784 break;
17785 case RISCVISD::SplitF64: {
17786 SDValue Op0 = N->getOperand(0);
17787 // If the input to SplitF64 is just BuildPairF64 then the operation is
17788 // redundant. Instead, use BuildPairF64's operands directly.
17789 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17790 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17791
17792 if (Op0->isUndef()) {
17793 SDValue Lo = DAG.getUNDEF(MVT::i32);
17794 SDValue Hi = DAG.getUNDEF(MVT::i32);
17795 return DCI.CombineTo(N, Lo, Hi);
17796 }
17797
17798 // It's cheaper to materialise two 32-bit integers than to load a double
17799 // from the constant pool and transfer it to integer registers through the
17800 // stack.
17801 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17802 APInt V = C->getValueAPF().bitcastToAPInt();
17803 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17804 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17805 return DCI.CombineTo(N, Lo, Hi);
17806 }
17807
17808 // This is a target-specific version of a DAGCombine performed in
17809 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17810 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17811 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17812 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17813 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17814 break;
17815 SDValue NewSplitF64 =
17816 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17817 Op0.getOperand(0));
17818 SDValue Lo = NewSplitF64.getValue(0);
17819 SDValue Hi = NewSplitF64.getValue(1);
17820 APInt SignBit = APInt::getSignMask(32);
17821 if (Op0.getOpcode() == ISD::FNEG) {
17822 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17823 DAG.getConstant(SignBit, DL, MVT::i32));
17824 return DCI.CombineTo(N, Lo, NewHi);
17825 }
17826 assert(Op0.getOpcode() == ISD::FABS);
17827 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17828 DAG.getConstant(~SignBit, DL, MVT::i32));
17829 return DCI.CombineTo(N, Lo, NewHi);
17830 }
17831 case RISCVISD::SLLW:
17832 case RISCVISD::SRAW:
17833 case RISCVISD::SRLW:
17834 case RISCVISD::RORW:
17835 case RISCVISD::ROLW: {
17836 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17837 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17838 SimplifyDemandedLowBitsHelper(1, 5))
17839 return SDValue(N, 0);
17840
17841 break;
17842 }
17843 case RISCVISD::CLZW:
17844 case RISCVISD::CTZW: {
17845 // Only the lower 32 bits of the first operand are read
17846 if (SimplifyDemandedLowBitsHelper(0, 32))
17847 return SDValue(N, 0);
17848 break;
17849 }
17851 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17852 // conversion is unnecessary and can be replaced with the
17853 // FMV_X_ANYEXTW_RV64 operand.
17854 SDValue Op0 = N->getOperand(0);
17856 return Op0.getOperand(0);
17857 break;
17858 }
17861 SDLoc DL(N);
17862 SDValue Op0 = N->getOperand(0);
17863 MVT VT = N->getSimpleValueType(0);
17864
17865 // Constant fold.
17866 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17867 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17868 return DAG.getConstant(Val, DL, VT);
17869 }
17870
17871 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17872 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17873 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17874 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17875 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17876 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17877 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17878 assert(Op0.getOperand(0).getValueType() == VT &&
17879 "Unexpected value type!");
17880 return Op0.getOperand(0);
17881 }
17882
17883 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17884 cast<LoadSDNode>(Op0)->isSimple()) {
17886 auto *LN0 = cast<LoadSDNode>(Op0);
17887 SDValue Load =
17888 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17889 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17890 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17891 return Load;
17892 }
17893
17894 // This is a target-specific version of a DAGCombine performed in
17895 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17896 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17897 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17898 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17899 !Op0.getNode()->hasOneUse())
17900 break;
17901 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17902 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17903 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17904 if (Op0.getOpcode() == ISD::FNEG)
17905 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17906 DAG.getConstant(SignBit, DL, VT));
17907
17908 assert(Op0.getOpcode() == ISD::FABS);
17909 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17910 DAG.getConstant(~SignBit, DL, VT));
17911 }
17912 case ISD::ABS: {
17913 EVT VT = N->getValueType(0);
17914 SDValue N0 = N->getOperand(0);
17915 // abs (sext) -> zext (abs)
17916 // abs (zext) -> zext (handled elsewhere)
17917 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17918 SDValue Src = N0.getOperand(0);
17919 SDLoc DL(N);
17920 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17921 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17922 }
17923 break;
17924 }
17925 case ISD::ADD: {
17926 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17927 return V;
17928 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17929 return V;
17930 return performADDCombine(N, DCI, Subtarget);
17931 }
17932 case ISD::SUB: {
17933 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17934 return V;
17935 return performSUBCombine(N, DAG, Subtarget);
17936 }
17937 case ISD::AND:
17938 return performANDCombine(N, DCI, Subtarget);
17939 case ISD::OR: {
17940 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17941 return V;
17942 return performORCombine(N, DCI, Subtarget);
17943 }
17944 case ISD::XOR:
17945 return performXORCombine(N, DAG, Subtarget);
17946 case ISD::MUL:
17947 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17948 return V;
17949 return performMULCombine(N, DAG, DCI, Subtarget);
17950 case ISD::SDIV:
17951 case ISD::UDIV:
17952 case ISD::SREM:
17953 case ISD::UREM:
17954 if (SDValue V = combineBinOpOfZExt(N, DAG))
17955 return V;
17956 break;
17957 case ISD::FMUL: {
17958 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17959 SDValue N0 = N->getOperand(0);
17960 SDValue N1 = N->getOperand(1);
17961 if (N0->getOpcode() != ISD::FCOPYSIGN)
17962 std::swap(N0, N1);
17963 if (N0->getOpcode() != ISD::FCOPYSIGN)
17964 return SDValue();
17965 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17966 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17967 return SDValue();
17968 EVT VT = N->getValueType(0);
17969 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17970 return SDValue();
17971 SDValue Sign = N0->getOperand(1);
17972 if (Sign.getValueType() != VT)
17973 return SDValue();
17974 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17975 }
17976 case ISD::FADD:
17977 case ISD::UMAX:
17978 case ISD::UMIN:
17979 case ISD::SMAX:
17980 case ISD::SMIN:
17981 case ISD::FMAXNUM:
17982 case ISD::FMINNUM: {
17983 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17984 return V;
17985 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17986 return V;
17987 return SDValue();
17988 }
17989 case ISD::SETCC:
17990 return performSETCCCombine(N, DAG, Subtarget);
17992 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17993 case ISD::ZERO_EXTEND:
17994 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17995 // type legalization. This is safe because fp_to_uint produces poison if
17996 // it overflows.
17997 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17998 SDValue Src = N->getOperand(0);
17999 if (Src.getOpcode() == ISD::FP_TO_UINT &&
18000 isTypeLegal(Src.getOperand(0).getValueType()))
18001 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
18002 Src.getOperand(0));
18003 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
18004 isTypeLegal(Src.getOperand(1).getValueType())) {
18005 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
18006 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
18007 Src.getOperand(0), Src.getOperand(1));
18008 DCI.CombineTo(N, Res);
18009 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
18010 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
18011 return SDValue(N, 0); // Return N so it doesn't get rechecked.
18012 }
18013 }
18014 return SDValue();
18016 if (SDValue V = combineTruncOfSraSext(N, DAG))
18017 return V;
18018 return combineTruncToVnclip(N, DAG, Subtarget);
18019 case ISD::VP_TRUNCATE:
18020 return performVP_TRUNCATECombine(N, DAG, Subtarget);
18021 case ISD::TRUNCATE:
18022 return performTRUNCATECombine(N, DAG, Subtarget);
18023 case ISD::SELECT:
18024 return performSELECTCombine(N, DAG, Subtarget);
18025 case ISD::VSELECT:
18026 return performVSELECTCombine(N, DAG);
18028 case RISCVISD::CZERO_NEZ: {
18029 SDValue Val = N->getOperand(0);
18030 SDValue Cond = N->getOperand(1);
18031
18032 unsigned Opc = N->getOpcode();
18033
18034 // czero_eqz x, x -> x
18035 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
18036 return Val;
18037
18038 unsigned InvOpc =
18040
18041 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
18042 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
18043 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
18044 SDValue NewCond = Cond.getOperand(0);
18045 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
18046 if (DAG.MaskedValueIsZero(NewCond, Mask))
18047 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
18048 }
18049 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
18050 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
18051 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
18052 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
18053 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
18054 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18055 if (ISD::isIntEqualitySetCC(CCVal))
18056 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
18057 N->getValueType(0), Val, Cond.getOperand(0));
18058 }
18059 return SDValue();
18060 }
18061 case RISCVISD::SELECT_CC: {
18062 // Transform
18063 SDValue LHS = N->getOperand(0);
18064 SDValue RHS = N->getOperand(1);
18065 SDValue CC = N->getOperand(2);
18066 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18067 SDValue TrueV = N->getOperand(3);
18068 SDValue FalseV = N->getOperand(4);
18069 SDLoc DL(N);
18070 EVT VT = N->getValueType(0);
18071
18072 // If the True and False values are the same, we don't need a select_cc.
18073 if (TrueV == FalseV)
18074 return TrueV;
18075
18076 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
18077 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
18078 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
18079 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
18080 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
18081 if (CCVal == ISD::CondCode::SETGE)
18082 std::swap(TrueV, FalseV);
18083
18084 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
18085 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
18086 // Only handle simm12, if it is not in this range, it can be considered as
18087 // register.
18088 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18089 isInt<12>(TrueSImm - FalseSImm)) {
18090 SDValue SRA =
18091 DAG.getNode(ISD::SRA, DL, VT, LHS,
18092 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
18093 SDValue AND =
18094 DAG.getNode(ISD::AND, DL, VT, SRA,
18095 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
18096 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
18097 }
18098
18099 if (CCVal == ISD::CondCode::SETGE)
18100 std::swap(TrueV, FalseV);
18101 }
18102
18103 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18104 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
18105 {LHS, RHS, CC, TrueV, FalseV});
18106
18107 if (!Subtarget.hasConditionalMoveFusion()) {
18108 // (select c, -1, y) -> -c | y
18109 if (isAllOnesConstant(TrueV)) {
18110 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18111 SDValue Neg = DAG.getNegative(C, DL, VT);
18112 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
18113 }
18114 // (select c, y, -1) -> -!c | y
18115 if (isAllOnesConstant(FalseV)) {
18116 SDValue C =
18117 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18118 SDValue Neg = DAG.getNegative(C, DL, VT);
18119 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
18120 }
18121
18122 // (select c, 0, y) -> -!c & y
18123 if (isNullConstant(TrueV)) {
18124 SDValue C =
18125 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18126 SDValue Neg = DAG.getNegative(C, DL, VT);
18127 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
18128 }
18129 // (select c, y, 0) -> -c & y
18130 if (isNullConstant(FalseV)) {
18131 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18132 SDValue Neg = DAG.getNegative(C, DL, VT);
18133 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
18134 }
18135 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
18136 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
18137 if (((isOneConstant(FalseV) && LHS == TrueV &&
18138 CCVal == ISD::CondCode::SETNE) ||
18139 (isOneConstant(TrueV) && LHS == FalseV &&
18140 CCVal == ISD::CondCode::SETEQ)) &&
18142 // freeze it to be safe.
18143 LHS = DAG.getFreeze(LHS);
18145 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
18146 }
18147 }
18148
18149 // If both true/false are an xor with 1, pull through the select.
18150 // This can occur after op legalization if both operands are setccs that
18151 // require an xor to invert.
18152 // FIXME: Generalize to other binary ops with identical operand?
18153 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
18154 TrueV.getOperand(1) == FalseV.getOperand(1) &&
18155 isOneConstant(TrueV.getOperand(1)) &&
18156 TrueV.hasOneUse() && FalseV.hasOneUse()) {
18157 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
18158 TrueV.getOperand(0), FalseV.getOperand(0));
18159 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
18160 }
18161
18162 return SDValue();
18163 }
18164 case RISCVISD::BR_CC: {
18165 SDValue LHS = N->getOperand(1);
18166 SDValue RHS = N->getOperand(2);
18167 SDValue CC = N->getOperand(3);
18168 SDLoc DL(N);
18169
18170 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18171 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18172 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18173
18174 return SDValue();
18175 }
18176 case ISD::BITREVERSE:
18177 return performBITREVERSECombine(N, DAG, Subtarget);
18178 case ISD::FP_TO_SINT:
18179 case ISD::FP_TO_UINT:
18180 return performFP_TO_INTCombine(N, DCI, Subtarget);
18183 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18184 case ISD::FCOPYSIGN: {
18185 EVT VT = N->getValueType(0);
18186 if (!VT.isVector())
18187 break;
18188 // There is a form of VFSGNJ which injects the negated sign of its second
18189 // operand. Try and bubble any FNEG up after the extend/round to produce
18190 // this optimized pattern. Avoid modifying cases where FP_ROUND and
18191 // TRUNC=1.
18192 SDValue In2 = N->getOperand(1);
18193 // Avoid cases where the extend/round has multiple uses, as duplicating
18194 // those is typically more expensive than removing a fneg.
18195 if (!In2.hasOneUse())
18196 break;
18197 if (In2.getOpcode() != ISD::FP_EXTEND &&
18198 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18199 break;
18200 In2 = In2.getOperand(0);
18201 if (In2.getOpcode() != ISD::FNEG)
18202 break;
18203 SDLoc DL(N);
18204 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18205 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18206 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18207 }
18208 case ISD::MGATHER: {
18209 const auto *MGN = cast<MaskedGatherSDNode>(N);
18210 const EVT VT = N->getValueType(0);
18211 SDValue Index = MGN->getIndex();
18212 SDValue ScaleOp = MGN->getScale();
18213 ISD::MemIndexType IndexType = MGN->getIndexType();
18214 assert(!MGN->isIndexScaled() &&
18215 "Scaled gather/scatter should not be formed");
18216
18217 SDLoc DL(N);
18218 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18219 return DAG.getMaskedGather(
18220 N->getVTList(), MGN->getMemoryVT(), DL,
18221 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18222 MGN->getBasePtr(), Index, ScaleOp},
18223 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18224
18225 if (narrowIndex(Index, IndexType, DAG))
18226 return DAG.getMaskedGather(
18227 N->getVTList(), MGN->getMemoryVT(), DL,
18228 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18229 MGN->getBasePtr(), Index, ScaleOp},
18230 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18231
18232 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18233 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18234 // The sequence will be XLenVT, not the type of Index. Tell
18235 // isSimpleVIDSequence this so we avoid overflow.
18236 if (std::optional<VIDSequence> SimpleVID =
18237 isSimpleVIDSequence(Index, Subtarget.getXLen());
18238 SimpleVID && SimpleVID->StepDenominator == 1) {
18239 const int64_t StepNumerator = SimpleVID->StepNumerator;
18240 const int64_t Addend = SimpleVID->Addend;
18241
18242 // Note: We don't need to check alignment here since (by assumption
18243 // from the existance of the gather), our offsets must be sufficiently
18244 // aligned.
18245
18246 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18247 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18248 assert(IndexType == ISD::UNSIGNED_SCALED);
18249 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18250 DAG.getSignedConstant(Addend, DL, PtrVT));
18251
18252 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18254 SDValue StridedLoad = DAG.getStridedLoadVP(
18255 VT, DL, MGN->getChain(), BasePtr,
18256 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18257 EVL, MGN->getMemOperand());
18258 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18259 StridedLoad, MGN->getPassThru(), EVL);
18260 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18261 DL);
18262 }
18263 }
18264
18265 SmallVector<int> ShuffleMask;
18266 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18267 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18268 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18269 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18270 MGN->getMask(), DAG.getUNDEF(VT),
18271 MGN->getMemoryVT(), MGN->getMemOperand(),
18273 SDValue Shuffle =
18274 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18275 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18276 }
18277
18278 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18279 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18280 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18281 SmallVector<SDValue> NewIndices;
18282 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18283 NewIndices.push_back(Index.getOperand(i));
18284 EVT IndexVT = Index.getValueType()
18285 .getHalfNumVectorElementsVT(*DAG.getContext());
18286 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18287
18288 unsigned ElementSize = VT.getScalarStoreSize();
18289 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18290 auto EltCnt = VT.getVectorElementCount();
18291 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18292 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18293 EltCnt.divideCoefficientBy(2));
18294 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18295 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18296 EltCnt.divideCoefficientBy(2));
18297 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18298
18299 SDValue Gather =
18300 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18301 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18302 Index, ScaleOp},
18303 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18304 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18305 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18306 }
18307 break;
18308 }
18309 case ISD::MSCATTER:{
18310 const auto *MSN = cast<MaskedScatterSDNode>(N);
18311 SDValue Index = MSN->getIndex();
18312 SDValue ScaleOp = MSN->getScale();
18313 ISD::MemIndexType IndexType = MSN->getIndexType();
18314 assert(!MSN->isIndexScaled() &&
18315 "Scaled gather/scatter should not be formed");
18316
18317 SDLoc DL(N);
18318 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18319 return DAG.getMaskedScatter(
18320 N->getVTList(), MSN->getMemoryVT(), DL,
18321 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18322 Index, ScaleOp},
18323 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18324
18325 if (narrowIndex(Index, IndexType, DAG))
18326 return DAG.getMaskedScatter(
18327 N->getVTList(), MSN->getMemoryVT(), DL,
18328 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18329 Index, ScaleOp},
18330 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18331
18332 EVT VT = MSN->getValue()->getValueType(0);
18333 SmallVector<int> ShuffleMask;
18334 if (!MSN->isTruncatingStore() &&
18335 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18336 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18337 DAG.getUNDEF(VT), ShuffleMask);
18338 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18339 DAG.getUNDEF(XLenVT), MSN->getMask(),
18340 MSN->getMemoryVT(), MSN->getMemOperand(),
18341 ISD::UNINDEXED, false);
18342 }
18343 break;
18344 }
18345 case ISD::VP_GATHER: {
18346 const auto *VPGN = cast<VPGatherSDNode>(N);
18347 SDValue Index = VPGN->getIndex();
18348 SDValue ScaleOp = VPGN->getScale();
18349 ISD::MemIndexType IndexType = VPGN->getIndexType();
18350 assert(!VPGN->isIndexScaled() &&
18351 "Scaled gather/scatter should not be formed");
18352
18353 SDLoc DL(N);
18354 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18355 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18356 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18357 ScaleOp, VPGN->getMask(),
18358 VPGN->getVectorLength()},
18359 VPGN->getMemOperand(), IndexType);
18360
18361 if (narrowIndex(Index, IndexType, DAG))
18362 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18363 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18364 ScaleOp, VPGN->getMask(),
18365 VPGN->getVectorLength()},
18366 VPGN->getMemOperand(), IndexType);
18367
18368 break;
18369 }
18370 case ISD::VP_SCATTER: {
18371 const auto *VPSN = cast<VPScatterSDNode>(N);
18372 SDValue Index = VPSN->getIndex();
18373 SDValue ScaleOp = VPSN->getScale();
18374 ISD::MemIndexType IndexType = VPSN->getIndexType();
18375 assert(!VPSN->isIndexScaled() &&
18376 "Scaled gather/scatter should not be formed");
18377
18378 SDLoc DL(N);
18379 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18380 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18381 {VPSN->getChain(), VPSN->getValue(),
18382 VPSN->getBasePtr(), Index, ScaleOp,
18383 VPSN->getMask(), VPSN->getVectorLength()},
18384 VPSN->getMemOperand(), IndexType);
18385
18386 if (narrowIndex(Index, IndexType, DAG))
18387 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18388 {VPSN->getChain(), VPSN->getValue(),
18389 VPSN->getBasePtr(), Index, ScaleOp,
18390 VPSN->getMask(), VPSN->getVectorLength()},
18391 VPSN->getMemOperand(), IndexType);
18392 break;
18393 }
18394 case RISCVISD::SHL_VL:
18395 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18396 return V;
18397 [[fallthrough]];
18398 case RISCVISD::SRA_VL:
18399 case RISCVISD::SRL_VL: {
18400 SDValue ShAmt = N->getOperand(1);
18402 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18403 SDLoc DL(N);
18404 SDValue VL = N->getOperand(4);
18405 EVT VT = N->getValueType(0);
18406 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18407 ShAmt.getOperand(1), VL);
18408 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18409 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18410 }
18411 break;
18412 }
18413 case ISD::SRA:
18414 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18415 return V;
18416 [[fallthrough]];
18417 case ISD::SRL:
18418 case ISD::SHL: {
18419 if (N->getOpcode() == ISD::SHL) {
18420 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18421 return V;
18422 }
18423 SDValue ShAmt = N->getOperand(1);
18425 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18426 SDLoc DL(N);
18427 EVT VT = N->getValueType(0);
18428 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18429 ShAmt.getOperand(1),
18430 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18431 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18432 }
18433 break;
18434 }
18435 case RISCVISD::ADD_VL:
18436 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18437 return V;
18438 return combineToVWMACC(N, DAG, Subtarget);
18443 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18444 case RISCVISD::SUB_VL:
18445 case RISCVISD::MUL_VL:
18446 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18455 return performVFMADD_VLCombine(N, DCI, Subtarget);
18456 case RISCVISD::FADD_VL:
18457 case RISCVISD::FSUB_VL:
18458 case RISCVISD::FMUL_VL:
18461 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18462 case ISD::LOAD:
18463 case ISD::STORE: {
18464 if (DCI.isAfterLegalizeDAG())
18465 if (SDValue V = performMemPairCombine(N, DCI))
18466 return V;
18467
18468 if (N->getOpcode() != ISD::STORE)
18469 break;
18470
18471 auto *Store = cast<StoreSDNode>(N);
18472 SDValue Chain = Store->getChain();
18473 EVT MemVT = Store->getMemoryVT();
18474 SDValue Val = Store->getValue();
18475 SDLoc DL(N);
18476
18477 bool IsScalarizable =
18478 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18479 Store->isSimple() &&
18480 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18481 isPowerOf2_64(MemVT.getSizeInBits()) &&
18482 MemVT.getSizeInBits() <= Subtarget.getXLen();
18483
18484 // If sufficiently aligned we can scalarize stores of constant vectors of
18485 // any power-of-two size up to XLen bits, provided that they aren't too
18486 // expensive to materialize.
18487 // vsetivli zero, 2, e8, m1, ta, ma
18488 // vmv.v.i v8, 4
18489 // vse64.v v8, (a0)
18490 // ->
18491 // li a1, 1028
18492 // sh a1, 0(a0)
18493 if (DCI.isBeforeLegalize() && IsScalarizable &&
18495 // Get the constant vector bits
18496 APInt NewC(Val.getValueSizeInBits(), 0);
18497 uint64_t EltSize = Val.getScalarValueSizeInBits();
18498 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18499 if (Val.getOperand(i).isUndef())
18500 continue;
18501 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18502 i * EltSize);
18503 }
18504 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18505
18506 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18507 true) <= 2 &&
18509 NewVT, *Store->getMemOperand())) {
18510 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18511 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18512 Store->getPointerInfo(), Store->getOriginalAlign(),
18513 Store->getMemOperand()->getFlags());
18514 }
18515 }
18516
18517 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18518 // vsetivli zero, 2, e16, m1, ta, ma
18519 // vle16.v v8, (a0)
18520 // vse16.v v8, (a1)
18521 if (auto *L = dyn_cast<LoadSDNode>(Val);
18522 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18523 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18524 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18525 L->getMemoryVT() == MemVT) {
18526 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18528 NewVT, *Store->getMemOperand()) &&
18530 NewVT, *L->getMemOperand())) {
18531 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18532 L->getPointerInfo(), L->getOriginalAlign(),
18533 L->getMemOperand()->getFlags());
18534 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18535 Store->getPointerInfo(), Store->getOriginalAlign(),
18536 Store->getMemOperand()->getFlags());
18537 }
18538 }
18539
18540 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18541 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18542 // any illegal types.
18543 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18544 (DCI.isAfterLegalizeDAG() &&
18546 isNullConstant(Val.getOperand(1)))) {
18547 SDValue Src = Val.getOperand(0);
18548 MVT VecVT = Src.getSimpleValueType();
18549 // VecVT should be scalable and memory VT should match the element type.
18550 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18551 MemVT == VecVT.getVectorElementType()) {
18552 SDLoc DL(N);
18553 MVT MaskVT = getMaskTypeFor(VecVT);
18554 return DAG.getStoreVP(
18555 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18556 DAG.getConstant(1, DL, MaskVT),
18557 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18558 Store->getMemOperand(), Store->getAddressingMode(),
18559 Store->isTruncatingStore(), /*IsCompress*/ false);
18560 }
18561 }
18562
18563 break;
18564 }
18565 case ISD::SPLAT_VECTOR: {
18566 EVT VT = N->getValueType(0);
18567 // Only perform this combine on legal MVT types.
18568 if (!isTypeLegal(VT))
18569 break;
18570 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18571 DAG, Subtarget))
18572 return Gather;
18573 break;
18574 }
18575 case ISD::BUILD_VECTOR:
18576 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18577 return V;
18578 break;
18580 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18581 return V;
18582 break;
18584 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18585 return V;
18586 break;
18588 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18589 return V;
18590 break;
18591 case RISCVISD::VFMV_V_F_VL: {
18592 const MVT VT = N->getSimpleValueType(0);
18593 SDValue Passthru = N->getOperand(0);
18594 SDValue Scalar = N->getOperand(1);
18595 SDValue VL = N->getOperand(2);
18596
18597 // If VL is 1, we can use vfmv.s.f.
18598 if (isOneConstant(VL))
18599 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18600 break;
18601 }
18602 case RISCVISD::VMV_V_X_VL: {
18603 const MVT VT = N->getSimpleValueType(0);
18604 SDValue Passthru = N->getOperand(0);
18605 SDValue Scalar = N->getOperand(1);
18606 SDValue VL = N->getOperand(2);
18607
18608 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18609 // scalar input.
18610 unsigned ScalarSize = Scalar.getValueSizeInBits();
18611 unsigned EltWidth = VT.getScalarSizeInBits();
18612 if (ScalarSize > EltWidth && Passthru.isUndef())
18613 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18614 return SDValue(N, 0);
18615
18616 // If VL is 1 and the scalar value won't benefit from immediate, we can
18617 // use vmv.s.x.
18618 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18619 if (isOneConstant(VL) &&
18620 (!Const || Const->isZero() ||
18621 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18622 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18623
18624 break;
18625 }
18626 case RISCVISD::VFMV_S_F_VL: {
18627 SDValue Src = N->getOperand(1);
18628 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18629 // into an undef vector.
18630 // TODO: Could use a vslide or vmv.v.v for non-undef.
18631 if (N->getOperand(0).isUndef() &&
18632 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18633 isNullConstant(Src.getOperand(1)) &&
18634 Src.getOperand(0).getValueType().isScalableVector()) {
18635 EVT VT = N->getValueType(0);
18636 EVT SrcVT = Src.getOperand(0).getValueType();
18638 // Widths match, just return the original vector.
18639 if (SrcVT == VT)
18640 return Src.getOperand(0);
18641 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18642 }
18643 [[fallthrough]];
18644 }
18645 case RISCVISD::VMV_S_X_VL: {
18646 const MVT VT = N->getSimpleValueType(0);
18647 SDValue Passthru = N->getOperand(0);
18648 SDValue Scalar = N->getOperand(1);
18649 SDValue VL = N->getOperand(2);
18650
18651 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18652 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18653 return Scalar.getOperand(0);
18654
18655 // Use M1 or smaller to avoid over constraining register allocation
18656 const MVT M1VT = getLMUL1VT(VT);
18657 if (M1VT.bitsLT(VT)) {
18658 SDValue M1Passthru =
18659 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18660 DAG.getVectorIdxConstant(0, DL));
18661 SDValue Result =
18662 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18663 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18664 DAG.getVectorIdxConstant(0, DL));
18665 return Result;
18666 }
18667
18668 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18669 // higher would involve overly constraining the register allocator for
18670 // no purpose.
18671 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18672 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18673 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18674 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18675
18676 break;
18677 }
18678 case RISCVISD::VMV_X_S: {
18679 SDValue Vec = N->getOperand(0);
18680 MVT VecVT = N->getOperand(0).getSimpleValueType();
18681 const MVT M1VT = getLMUL1VT(VecVT);
18682 if (M1VT.bitsLT(VecVT)) {
18683 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18684 DAG.getVectorIdxConstant(0, DL));
18685 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18686 }
18687 break;
18688 }
18692 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18693 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18694 switch (IntNo) {
18695 // By default we do not combine any intrinsic.
18696 default:
18697 return SDValue();
18698 case Intrinsic::riscv_vcpop:
18699 case Intrinsic::riscv_vcpop_mask:
18700 case Intrinsic::riscv_vfirst:
18701 case Intrinsic::riscv_vfirst_mask: {
18702 SDValue VL = N->getOperand(2);
18703 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18704 IntNo == Intrinsic::riscv_vfirst_mask)
18705 VL = N->getOperand(3);
18706 if (!isNullConstant(VL))
18707 return SDValue();
18708 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18709 SDLoc DL(N);
18710 EVT VT = N->getValueType(0);
18711 if (IntNo == Intrinsic::riscv_vfirst ||
18712 IntNo == Intrinsic::riscv_vfirst_mask)
18713 return DAG.getAllOnesConstant(DL, VT);
18714 return DAG.getConstant(0, DL, VT);
18715 }
18716 }
18717 }
18718 case ISD::EXPERIMENTAL_VP_REVERSE:
18719 return performVP_REVERSECombine(N, DAG, Subtarget);
18720 case ISD::VP_STORE:
18721 return performVP_STORECombine(N, DAG, Subtarget);
18722 case ISD::BITCAST: {
18724 SDValue N0 = N->getOperand(0);
18725 EVT VT = N->getValueType(0);
18726 EVT SrcVT = N0.getValueType();
18727 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18728 unsigned NF = VT.getRISCVVectorTupleNumFields();
18729 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18730 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18731 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18732
18733 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18734
18735 SDValue Result = DAG.getUNDEF(VT);
18736 for (unsigned i = 0; i < NF; ++i)
18737 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18738 DAG.getVectorIdxConstant(i, DL));
18739 return Result;
18740 }
18741 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18742 // type, widen both sides to avoid a trip through memory.
18743 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18744 VT.isScalarInteger()) {
18745 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18746 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18747 Ops[0] = N0;
18748 SDLoc DL(N);
18749 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18750 N0 = DAG.getBitcast(MVT::i8, N0);
18751 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18752 }
18753
18754 return SDValue();
18755 }
18756 case ISD::CTPOP:
18757 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18758 return V;
18759 break;
18760 }
18761
18762 return SDValue();
18763}
18764
18766 EVT XVT, unsigned KeptBits) const {
18767 // For vectors, we don't have a preference..
18768 if (XVT.isVector())
18769 return false;
18770
18771 if (XVT != MVT::i32 && XVT != MVT::i64)
18772 return false;
18773
18774 // We can use sext.w for RV64 or an srai 31 on RV32.
18775 if (KeptBits == 32 || KeptBits == 64)
18776 return true;
18777
18778 // With Zbb we can use sext.h/sext.b.
18779 return Subtarget.hasStdExtZbb() &&
18780 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18781 KeptBits == 16);
18782}
18783
18785 const SDNode *N, CombineLevel Level) const {
18786 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18787 N->getOpcode() == ISD::SRL) &&
18788 "Expected shift op");
18789
18790 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18791 // materialised in fewer instructions than `(OP _, c1)`:
18792 //
18793 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18794 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18795 SDValue N0 = N->getOperand(0);
18796 EVT Ty = N0.getValueType();
18797
18798 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18799 // LD/ST, it can still complete the folding optimization operation performed
18800 // above.
18801 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18802 for (SDNode *Use : X->users()) {
18803 // This use is the one we're on right now. Skip it
18804 if (Use == User || Use->getOpcode() == ISD::SELECT)
18805 continue;
18806 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18807 return false;
18808 }
18809 return true;
18810 };
18811
18812 if (Ty.isScalarInteger() &&
18813 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18814 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18815 return isUsedByLdSt(N0.getNode(), N);
18816
18817 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18818 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18819
18820 // Bail if we might break a sh{1,2,3}add pattern.
18821 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18822 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18823 N->user_begin()->getOpcode() == ISD::ADD &&
18824 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18825 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18826 return false;
18827
18828 if (C1 && C2) {
18829 const APInt &C1Int = C1->getAPIntValue();
18830 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18831
18832 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18833 // and the combine should happen, to potentially allow further combines
18834 // later.
18835 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18836 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18837 return true;
18838
18839 // We can materialise `c1` in an add immediate, so it's "free", and the
18840 // combine should be prevented.
18841 if (C1Int.getSignificantBits() <= 64 &&
18843 return false;
18844
18845 // Neither constant will fit into an immediate, so find materialisation
18846 // costs.
18847 int C1Cost =
18848 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18849 /*CompressionCost*/ true);
18850 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18851 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18852 /*CompressionCost*/ true);
18853
18854 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18855 // combine should be prevented.
18856 if (C1Cost < ShiftedC1Cost)
18857 return false;
18858 }
18859 }
18860
18861 if (!N0->hasOneUse())
18862 return false;
18863
18864 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18865 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18866 !N0->getOperand(0)->hasOneUse())
18867 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18868
18869 return true;
18870}
18871
18873 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18874 TargetLoweringOpt &TLO) const {
18875 // Delay this optimization as late as possible.
18876 if (!TLO.LegalOps)
18877 return false;
18878
18879 EVT VT = Op.getValueType();
18880 if (VT.isVector())
18881 return false;
18882
18883 unsigned Opcode = Op.getOpcode();
18884 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18885 return false;
18886
18887 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18888 if (!C)
18889 return false;
18890
18891 const APInt &Mask = C->getAPIntValue();
18892
18893 // Clear all non-demanded bits initially.
18894 APInt ShrunkMask = Mask & DemandedBits;
18895
18896 // Try to make a smaller immediate by setting undemanded bits.
18897
18898 APInt ExpandedMask = Mask | ~DemandedBits;
18899
18900 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18901 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18902 };
18903 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18904 if (NewMask == Mask)
18905 return true;
18906 SDLoc DL(Op);
18907 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18908 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18909 Op.getOperand(0), NewC);
18910 return TLO.CombineTo(Op, NewOp);
18911 };
18912
18913 // If the shrunk mask fits in sign extended 12 bits, let the target
18914 // independent code apply it.
18915 if (ShrunkMask.isSignedIntN(12))
18916 return false;
18917
18918 // And has a few special cases for zext.
18919 if (Opcode == ISD::AND) {
18920 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18921 // otherwise use SLLI + SRLI.
18922 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18923 if (IsLegalMask(NewMask))
18924 return UseMask(NewMask);
18925
18926 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18927 if (VT == MVT::i64) {
18928 APInt NewMask = APInt(64, 0xffffffff);
18929 if (IsLegalMask(NewMask))
18930 return UseMask(NewMask);
18931 }
18932 }
18933
18934 // For the remaining optimizations, we need to be able to make a negative
18935 // number through a combination of mask and undemanded bits.
18936 if (!ExpandedMask.isNegative())
18937 return false;
18938
18939 // What is the fewest number of bits we need to represent the negative number.
18940 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18941
18942 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18943 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18944 // If we can't create a simm12, we shouldn't change opaque constants.
18945 APInt NewMask = ShrunkMask;
18946 if (MinSignedBits <= 12)
18947 NewMask.setBitsFrom(11);
18948 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18949 NewMask.setBitsFrom(31);
18950 else
18951 return false;
18952
18953 // Check that our new mask is a subset of the demanded mask.
18954 assert(IsLegalMask(NewMask));
18955 return UseMask(NewMask);
18956}
18957
18958static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18959 static const uint64_t GREVMasks[] = {
18960 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18961 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18962
18963 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18964 unsigned Shift = 1 << Stage;
18965 if (ShAmt & Shift) {
18966 uint64_t Mask = GREVMasks[Stage];
18967 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18968 if (IsGORC)
18969 Res |= x;
18970 x = Res;
18971 }
18972 }
18973
18974 return x;
18975}
18976
18978 KnownBits &Known,
18979 const APInt &DemandedElts,
18980 const SelectionDAG &DAG,
18981 unsigned Depth) const {
18982 unsigned BitWidth = Known.getBitWidth();
18983 unsigned Opc = Op.getOpcode();
18984 assert((Opc >= ISD::BUILTIN_OP_END ||
18985 Opc == ISD::INTRINSIC_WO_CHAIN ||
18986 Opc == ISD::INTRINSIC_W_CHAIN ||
18987 Opc == ISD::INTRINSIC_VOID) &&
18988 "Should use MaskedValueIsZero if you don't know whether Op"
18989 " is a target node!");
18990
18991 Known.resetAll();
18992 switch (Opc) {
18993 default: break;
18994 case RISCVISD::SELECT_CC: {
18995 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18996 // If we don't know any bits, early out.
18997 if (Known.isUnknown())
18998 break;
18999 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
19000
19001 // Only known if known in both the LHS and RHS.
19002 Known = Known.intersectWith(Known2);
19003 break;
19004 }
19007 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19008 // Result is either all zero or operand 0. We can propagate zeros, but not
19009 // ones.
19010 Known.One.clearAllBits();
19011 break;
19012 case RISCVISD::REMUW: {
19013 KnownBits Known2;
19014 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19015 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19016 // We only care about the lower 32 bits.
19017 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
19018 // Restore the original width by sign extending.
19019 Known = Known.sext(BitWidth);
19020 break;
19021 }
19022 case RISCVISD::DIVUW: {
19023 KnownBits Known2;
19024 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19025 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19026 // We only care about the lower 32 bits.
19027 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
19028 // Restore the original width by sign extending.
19029 Known = Known.sext(BitWidth);
19030 break;
19031 }
19032 case RISCVISD::SLLW: {
19033 KnownBits Known2;
19034 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
19035 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
19036 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
19037 // Restore the original width by sign extending.
19038 Known = Known.sext(BitWidth);
19039 break;
19040 }
19041 case RISCVISD::CTZW: {
19042 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19043 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
19044 unsigned LowBits = llvm::bit_width(PossibleTZ);
19045 Known.Zero.setBitsFrom(LowBits);
19046 break;
19047 }
19048 case RISCVISD::CLZW: {
19049 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19050 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
19051 unsigned LowBits = llvm::bit_width(PossibleLZ);
19052 Known.Zero.setBitsFrom(LowBits);
19053 break;
19054 }
19055 case RISCVISD::BREV8:
19056 case RISCVISD::ORC_B: {
19057 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
19058 // control value of 7 is equivalent to brev8 and orc.b.
19059 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19060 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
19061 // To compute zeros, we need to invert the value and invert it back after.
19062 Known.Zero =
19063 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
19064 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
19065 break;
19066 }
19067 case RISCVISD::READ_VLENB: {
19068 // We can use the minimum and maximum VLEN values to bound VLENB. We
19069 // know VLEN must be a power of two.
19070 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
19071 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
19072 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
19073 Known.Zero.setLowBits(Log2_32(MinVLenB));
19074 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
19075 if (MaxVLenB == MinVLenB)
19076 Known.One.setBit(Log2_32(MinVLenB));
19077 break;
19078 }
19079 case RISCVISD::FCLASS: {
19080 // fclass will only set one of the low 10 bits.
19081 Known.Zero.setBitsFrom(10);
19082 break;
19083 }
19086 unsigned IntNo =
19087 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
19088 switch (IntNo) {
19089 default:
19090 // We can't do anything for most intrinsics.
19091 break;
19092 case Intrinsic::riscv_vsetvli:
19093 case Intrinsic::riscv_vsetvlimax: {
19094 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19095 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
19096 RISCVII::VLMUL VLMUL =
19097 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
19098 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
19099 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
19100 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
19101 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19102
19103 // Result of vsetvli must be not larger than AVL.
19104 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
19105 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
19106
19107 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
19108 if (BitWidth > KnownZeroFirstBit)
19109 Known.Zero.setBitsFrom(KnownZeroFirstBit);
19110 break;
19111 }
19112 }
19113 break;
19114 }
19115 }
19116}
19117
19119 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19120 unsigned Depth) const {
19121 switch (Op.getOpcode()) {
19122 default:
19123 break;
19124 case RISCVISD::SELECT_CC: {
19125 unsigned Tmp =
19126 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
19127 if (Tmp == 1) return 1; // Early out.
19128 unsigned Tmp2 =
19129 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
19130 return std::min(Tmp, Tmp2);
19131 }
19134 // Output is either all zero or operand 0. We can propagate sign bit count
19135 // from operand 0.
19136 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19137 case RISCVISD::ABSW: {
19138 // We expand this at isel to negw+max. The result will have 33 sign bits
19139 // if the input has at least 33 sign bits.
19140 unsigned Tmp =
19141 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19142 if (Tmp < 33) return 1;
19143 return 33;
19144 }
19145 case RISCVISD::SLLW:
19146 case RISCVISD::SRAW:
19147 case RISCVISD::SRLW:
19148 case RISCVISD::DIVW:
19149 case RISCVISD::DIVUW:
19150 case RISCVISD::REMUW:
19151 case RISCVISD::ROLW:
19152 case RISCVISD::RORW:
19157 // TODO: As the result is sign-extended, this is conservatively correct. A
19158 // more precise answer could be calculated for SRAW depending on known
19159 // bits in the shift amount.
19160 return 33;
19161 case RISCVISD::VMV_X_S: {
19162 // The number of sign bits of the scalar result is computed by obtaining the
19163 // element type of the input vector operand, subtracting its width from the
19164 // XLEN, and then adding one (sign bit within the element type). If the
19165 // element type is wider than XLen, the least-significant XLEN bits are
19166 // taken.
19167 unsigned XLen = Subtarget.getXLen();
19168 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19169 if (EltBits <= XLen)
19170 return XLen - EltBits + 1;
19171 break;
19172 }
19174 unsigned IntNo = Op.getConstantOperandVal(1);
19175 switch (IntNo) {
19176 default:
19177 break;
19178 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19179 case Intrinsic::riscv_masked_atomicrmw_add_i64:
19180 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19181 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19182 case Intrinsic::riscv_masked_atomicrmw_max_i64:
19183 case Intrinsic::riscv_masked_atomicrmw_min_i64:
19184 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19185 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19186 case Intrinsic::riscv_masked_cmpxchg_i64:
19187 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19188 // narrow atomic operation. These are implemented using atomic
19189 // operations at the minimum supported atomicrmw/cmpxchg width whose
19190 // result is then sign extended to XLEN. With +A, the minimum width is
19191 // 32 for both 64 and 32.
19192 assert(Subtarget.getXLen() == 64);
19194 assert(Subtarget.hasStdExtA());
19195 return 33;
19196 }
19197 break;
19198 }
19199 }
19200
19201 return 1;
19202}
19203
19205 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19206 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19207
19208 // TODO: Add more target nodes.
19209 switch (Op.getOpcode()) {
19211 // Integer select_cc cannot create poison.
19212 // TODO: What are the FP poison semantics?
19213 // TODO: This instruction blocks poison from the unselected operand, can
19214 // we do anything with that?
19215 return !Op.getValueType().isInteger();
19216 }
19218 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19219}
19220
19221const Constant *
19223 assert(Ld && "Unexpected null LoadSDNode");
19224 if (!ISD::isNormalLoad(Ld))
19225 return nullptr;
19226
19227 SDValue Ptr = Ld->getBasePtr();
19228
19229 // Only constant pools with no offset are supported.
19230 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19231 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19232 if (!CNode || CNode->isMachineConstantPoolEntry() ||
19233 CNode->getOffset() != 0)
19234 return nullptr;
19235
19236 return CNode;
19237 };
19238
19239 // Simple case, LLA.
19240 if (Ptr.getOpcode() == RISCVISD::LLA) {
19241 auto *CNode = GetSupportedConstantPool(Ptr);
19242 if (!CNode || CNode->getTargetFlags() != 0)
19243 return nullptr;
19244
19245 return CNode->getConstVal();
19246 }
19247
19248 // Look for a HI and ADD_LO pair.
19249 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19250 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19251 return nullptr;
19252
19253 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19254 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19255
19256 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19257 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19258 return nullptr;
19259
19260 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19261 return nullptr;
19262
19263 return CNodeLo->getConstVal();
19264}
19265
19267 MachineBasicBlock *BB) {
19268 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19269
19270 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19271 // Should the count have wrapped while it was being read, we need to try
19272 // again.
19273 // For example:
19274 // ```
19275 // read:
19276 // csrrs x3, counterh # load high word of counter
19277 // csrrs x2, counter # load low word of counter
19278 // csrrs x4, counterh # load high word of counter
19279 // bne x3, x4, read # check if high word reads match, otherwise try again
19280 // ```
19281
19282 MachineFunction &MF = *BB->getParent();
19283 const BasicBlock *LLVMBB = BB->getBasicBlock();
19285
19286 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19287 MF.insert(It, LoopMBB);
19288
19289 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19290 MF.insert(It, DoneMBB);
19291
19292 // Transfer the remainder of BB and its successor edges to DoneMBB.
19293 DoneMBB->splice(DoneMBB->begin(), BB,
19294 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19296
19297 BB->addSuccessor(LoopMBB);
19298
19300 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19301 Register LoReg = MI.getOperand(0).getReg();
19302 Register HiReg = MI.getOperand(1).getReg();
19303 int64_t LoCounter = MI.getOperand(2).getImm();
19304 int64_t HiCounter = MI.getOperand(3).getImm();
19305 DebugLoc DL = MI.getDebugLoc();
19306
19308 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19309 .addImm(HiCounter)
19310 .addReg(RISCV::X0);
19311 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19312 .addImm(LoCounter)
19313 .addReg(RISCV::X0);
19314 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19315 .addImm(HiCounter)
19316 .addReg(RISCV::X0);
19317
19318 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19319 .addReg(HiReg)
19320 .addReg(ReadAgainReg)
19321 .addMBB(LoopMBB);
19322
19323 LoopMBB->addSuccessor(LoopMBB);
19324 LoopMBB->addSuccessor(DoneMBB);
19325
19326 MI.eraseFromParent();
19327
19328 return DoneMBB;
19329}
19330
19333 const RISCVSubtarget &Subtarget) {
19334 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19335
19336 MachineFunction &MF = *BB->getParent();
19337 DebugLoc DL = MI.getDebugLoc();
19340 Register LoReg = MI.getOperand(0).getReg();
19341 Register HiReg = MI.getOperand(1).getReg();
19342 Register SrcReg = MI.getOperand(2).getReg();
19343
19344 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19345 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19346
19347 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19348 RI, Register());
19350 MachineMemOperand *MMOLo =
19354 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19355 .addFrameIndex(FI)
19356 .addImm(0)
19357 .addMemOperand(MMOLo);
19358 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19359 .addFrameIndex(FI)
19360 .addImm(4)
19361 .addMemOperand(MMOHi);
19362 MI.eraseFromParent(); // The pseudo instruction is gone now.
19363 return BB;
19364}
19365
19368 const RISCVSubtarget &Subtarget) {
19369 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19370 "Unexpected instruction");
19371
19372 MachineFunction &MF = *BB->getParent();
19373 DebugLoc DL = MI.getDebugLoc();
19376 Register DstReg = MI.getOperand(0).getReg();
19377 Register LoReg = MI.getOperand(1).getReg();
19378 Register HiReg = MI.getOperand(2).getReg();
19379
19380 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19381 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19382
19384 MachineMemOperand *MMOLo =
19388 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19389 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19390 .addFrameIndex(FI)
19391 .addImm(0)
19392 .addMemOperand(MMOLo);
19393 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19394 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19395 .addFrameIndex(FI)
19396 .addImm(4)
19397 .addMemOperand(MMOHi);
19398 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19399 MI.eraseFromParent(); // The pseudo instruction is gone now.
19400 return BB;
19401}
19402
19404 switch (MI.getOpcode()) {
19405 default:
19406 return false;
19407 case RISCV::Select_GPR_Using_CC_GPR:
19408 case RISCV::Select_GPR_Using_CC_Imm:
19409 case RISCV::Select_FPR16_Using_CC_GPR:
19410 case RISCV::Select_FPR16INX_Using_CC_GPR:
19411 case RISCV::Select_FPR32_Using_CC_GPR:
19412 case RISCV::Select_FPR32INX_Using_CC_GPR:
19413 case RISCV::Select_FPR64_Using_CC_GPR:
19414 case RISCV::Select_FPR64INX_Using_CC_GPR:
19415 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19416 return true;
19417 }
19418}
19419
19421 unsigned RelOpcode, unsigned EqOpcode,
19422 const RISCVSubtarget &Subtarget) {
19423 DebugLoc DL = MI.getDebugLoc();
19424 Register DstReg = MI.getOperand(0).getReg();
19425 Register Src1Reg = MI.getOperand(1).getReg();
19426 Register Src2Reg = MI.getOperand(2).getReg();
19428 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19430
19431 // Save the current FFLAGS.
19432 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19433
19434 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19435 .addReg(Src1Reg)
19436 .addReg(Src2Reg);
19439
19440 // Restore the FFLAGS.
19441 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19442 .addReg(SavedFFlags, RegState::Kill);
19443
19444 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19445 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19446 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19447 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19450
19451 // Erase the pseudoinstruction.
19452 MI.eraseFromParent();
19453 return BB;
19454}
19455
19456static MachineBasicBlock *
19458 MachineBasicBlock *ThisMBB,
19459 const RISCVSubtarget &Subtarget) {
19460 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19461 // Without this, custom-inserter would have generated:
19462 //
19463 // A
19464 // | \
19465 // | B
19466 // | /
19467 // C
19468 // | \
19469 // | D
19470 // | /
19471 // E
19472 //
19473 // A: X = ...; Y = ...
19474 // B: empty
19475 // C: Z = PHI [X, A], [Y, B]
19476 // D: empty
19477 // E: PHI [X, C], [Z, D]
19478 //
19479 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19480 //
19481 // A
19482 // | \
19483 // | C
19484 // | /|
19485 // |/ |
19486 // | |
19487 // | D
19488 // | /
19489 // E
19490 //
19491 // A: X = ...; Y = ...
19492 // D: empty
19493 // E: PHI [X, A], [X, C], [Y, D]
19494
19495 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19496 const DebugLoc &DL = First.getDebugLoc();
19497 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19498 MachineFunction *F = ThisMBB->getParent();
19499 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19500 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19501 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19502 MachineFunction::iterator It = ++ThisMBB->getIterator();
19503 F->insert(It, FirstMBB);
19504 F->insert(It, SecondMBB);
19505 F->insert(It, SinkMBB);
19506
19507 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19508 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19510 ThisMBB->end());
19511 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19512
19513 // Fallthrough block for ThisMBB.
19514 ThisMBB->addSuccessor(FirstMBB);
19515 // Fallthrough block for FirstMBB.
19516 FirstMBB->addSuccessor(SecondMBB);
19517 ThisMBB->addSuccessor(SinkMBB);
19518 FirstMBB->addSuccessor(SinkMBB);
19519 // This is fallthrough.
19520 SecondMBB->addSuccessor(SinkMBB);
19521
19522 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19523 Register FLHS = First.getOperand(1).getReg();
19524 Register FRHS = First.getOperand(2).getReg();
19525 // Insert appropriate branch.
19526 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19527 .addReg(FLHS)
19528 .addReg(FRHS)
19529 .addMBB(SinkMBB);
19530
19531 Register SLHS = Second.getOperand(1).getReg();
19532 Register SRHS = Second.getOperand(2).getReg();
19533 Register Op1Reg4 = First.getOperand(4).getReg();
19534 Register Op1Reg5 = First.getOperand(5).getReg();
19535
19536 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19537 // Insert appropriate branch.
19538 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19539 .addReg(SLHS)
19540 .addReg(SRHS)
19541 .addMBB(SinkMBB);
19542
19543 Register DestReg = Second.getOperand(0).getReg();
19544 Register Op2Reg4 = Second.getOperand(4).getReg();
19545 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19546 .addReg(Op2Reg4)
19547 .addMBB(ThisMBB)
19548 .addReg(Op1Reg4)
19549 .addMBB(FirstMBB)
19550 .addReg(Op1Reg5)
19551 .addMBB(SecondMBB);
19552
19553 // Now remove the Select_FPRX_s.
19554 First.eraseFromParent();
19555 Second.eraseFromParent();
19556 return SinkMBB;
19557}
19558
19561 const RISCVSubtarget &Subtarget) {
19562 // To "insert" Select_* instructions, we actually have to insert the triangle
19563 // control-flow pattern. The incoming instructions know the destination vreg
19564 // to set, the condition code register to branch on, the true/false values to
19565 // select between, and the condcode to use to select the appropriate branch.
19566 //
19567 // We produce the following control flow:
19568 // HeadMBB
19569 // | \
19570 // | IfFalseMBB
19571 // | /
19572 // TailMBB
19573 //
19574 // When we find a sequence of selects we attempt to optimize their emission
19575 // by sharing the control flow. Currently we only handle cases where we have
19576 // multiple selects with the exact same condition (same LHS, RHS and CC).
19577 // The selects may be interleaved with other instructions if the other
19578 // instructions meet some requirements we deem safe:
19579 // - They are not pseudo instructions.
19580 // - They are debug instructions. Otherwise,
19581 // - They do not have side-effects, do not access memory and their inputs do
19582 // not depend on the results of the select pseudo-instructions.
19583 // The TrueV/FalseV operands of the selects cannot depend on the result of
19584 // previous selects in the sequence.
19585 // These conditions could be further relaxed. See the X86 target for a
19586 // related approach and more information.
19587 //
19588 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19589 // is checked here and handled by a separate function -
19590 // EmitLoweredCascadedSelect.
19591
19592 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19593 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19594 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19595 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19596 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19597 Next->getOperand(5).isKill())
19598 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19599
19600 Register LHS = MI.getOperand(1).getReg();
19601 Register RHS;
19602 if (MI.getOperand(2).isReg())
19603 RHS = MI.getOperand(2).getReg();
19604 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19605
19606 SmallVector<MachineInstr *, 4> SelectDebugValues;
19607 SmallSet<Register, 4> SelectDests;
19608 SelectDests.insert(MI.getOperand(0).getReg());
19609
19610 MachineInstr *LastSelectPseudo = &MI;
19611 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19612 SequenceMBBI != E; ++SequenceMBBI) {
19613 if (SequenceMBBI->isDebugInstr())
19614 continue;
19615 if (isSelectPseudo(*SequenceMBBI)) {
19616 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19617 !SequenceMBBI->getOperand(2).isReg() ||
19618 SequenceMBBI->getOperand(2).getReg() != RHS ||
19619 SequenceMBBI->getOperand(3).getImm() != CC ||
19620 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19621 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19622 break;
19623 LastSelectPseudo = &*SequenceMBBI;
19624 SequenceMBBI->collectDebugValues(SelectDebugValues);
19625 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19626 continue;
19627 }
19628 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19629 SequenceMBBI->mayLoadOrStore() ||
19630 SequenceMBBI->usesCustomInsertionHook())
19631 break;
19632 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19633 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19634 }))
19635 break;
19636 }
19637
19638 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19639 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19640 DebugLoc DL = MI.getDebugLoc();
19642
19643 MachineBasicBlock *HeadMBB = BB;
19644 MachineFunction *F = BB->getParent();
19645 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19646 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19647
19648 F->insert(I, IfFalseMBB);
19649 F->insert(I, TailMBB);
19650
19651 // Set the call frame size on entry to the new basic blocks.
19652 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19653 IfFalseMBB->setCallFrameSize(CallFrameSize);
19654 TailMBB->setCallFrameSize(CallFrameSize);
19655
19656 // Transfer debug instructions associated with the selects to TailMBB.
19657 for (MachineInstr *DebugInstr : SelectDebugValues) {
19658 TailMBB->push_back(DebugInstr->removeFromParent());
19659 }
19660
19661 // Move all instructions after the sequence to TailMBB.
19662 TailMBB->splice(TailMBB->end(), HeadMBB,
19663 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19664 // Update machine-CFG edges by transferring all successors of the current
19665 // block to the new block which will contain the Phi nodes for the selects.
19666 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19667 // Set the successors for HeadMBB.
19668 HeadMBB->addSuccessor(IfFalseMBB);
19669 HeadMBB->addSuccessor(TailMBB);
19670
19671 // Insert appropriate branch.
19672 if (MI.getOperand(2).isImm())
19673 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19674 .addReg(LHS)
19675 .addImm(MI.getOperand(2).getImm())
19676 .addMBB(TailMBB);
19677 else
19678 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19679 .addReg(LHS)
19680 .addReg(RHS)
19681 .addMBB(TailMBB);
19682
19683 // IfFalseMBB just falls through to TailMBB.
19684 IfFalseMBB->addSuccessor(TailMBB);
19685
19686 // Create PHIs for all of the select pseudo-instructions.
19687 auto SelectMBBI = MI.getIterator();
19688 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19689 auto InsertionPoint = TailMBB->begin();
19690 while (SelectMBBI != SelectEnd) {
19691 auto Next = std::next(SelectMBBI);
19692 if (isSelectPseudo(*SelectMBBI)) {
19693 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19694 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19695 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19696 .addReg(SelectMBBI->getOperand(4).getReg())
19697 .addMBB(HeadMBB)
19698 .addReg(SelectMBBI->getOperand(5).getReg())
19699 .addMBB(IfFalseMBB);
19700 SelectMBBI->eraseFromParent();
19701 }
19702 SelectMBBI = Next;
19703 }
19704
19705 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19706 return TailMBB;
19707}
19708
19709// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19710static const RISCV::RISCVMaskedPseudoInfo *
19711lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19713 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19714 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19716 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19717 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19718 return Masked;
19719}
19720
19723 unsigned CVTXOpc) {
19724 DebugLoc DL = MI.getDebugLoc();
19725
19727
19729 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19730
19731 // Save the old value of FFLAGS.
19732 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19733
19734 assert(MI.getNumOperands() == 7);
19735
19736 // Emit a VFCVT_X_F
19737 const TargetRegisterInfo *TRI =
19739 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19740 Register Tmp = MRI.createVirtualRegister(RC);
19741 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19742 .add(MI.getOperand(1))
19743 .add(MI.getOperand(2))
19744 .add(MI.getOperand(3))
19745 .add(MachineOperand::CreateImm(7)) // frm = DYN
19746 .add(MI.getOperand(4))
19747 .add(MI.getOperand(5))
19748 .add(MI.getOperand(6))
19749 .add(MachineOperand::CreateReg(RISCV::FRM,
19750 /*IsDef*/ false,
19751 /*IsImp*/ true));
19752
19753 // Emit a VFCVT_F_X
19754 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19755 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19756 // There is no E8 variant for VFCVT_F_X.
19757 assert(Log2SEW >= 4);
19758 unsigned CVTFOpc =
19759 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19760 ->MaskedPseudo;
19761
19762 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19763 .add(MI.getOperand(0))
19764 .add(MI.getOperand(1))
19765 .addReg(Tmp)
19766 .add(MI.getOperand(3))
19767 .add(MachineOperand::CreateImm(7)) // frm = DYN
19768 .add(MI.getOperand(4))
19769 .add(MI.getOperand(5))
19770 .add(MI.getOperand(6))
19771 .add(MachineOperand::CreateReg(RISCV::FRM,
19772 /*IsDef*/ false,
19773 /*IsImp*/ true));
19774
19775 // Restore FFLAGS.
19776 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19777 .addReg(SavedFFLAGS, RegState::Kill);
19778
19779 // Erase the pseudoinstruction.
19780 MI.eraseFromParent();
19781 return BB;
19782}
19783
19785 const RISCVSubtarget &Subtarget) {
19786 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19787 const TargetRegisterClass *RC;
19788 switch (MI.getOpcode()) {
19789 default:
19790 llvm_unreachable("Unexpected opcode");
19791 case RISCV::PseudoFROUND_H:
19792 CmpOpc = RISCV::FLT_H;
19793 F2IOpc = RISCV::FCVT_W_H;
19794 I2FOpc = RISCV::FCVT_H_W;
19795 FSGNJOpc = RISCV::FSGNJ_H;
19796 FSGNJXOpc = RISCV::FSGNJX_H;
19797 RC = &RISCV::FPR16RegClass;
19798 break;
19799 case RISCV::PseudoFROUND_H_INX:
19800 CmpOpc = RISCV::FLT_H_INX;
19801 F2IOpc = RISCV::FCVT_W_H_INX;
19802 I2FOpc = RISCV::FCVT_H_W_INX;
19803 FSGNJOpc = RISCV::FSGNJ_H_INX;
19804 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19805 RC = &RISCV::GPRF16RegClass;
19806 break;
19807 case RISCV::PseudoFROUND_S:
19808 CmpOpc = RISCV::FLT_S;
19809 F2IOpc = RISCV::FCVT_W_S;
19810 I2FOpc = RISCV::FCVT_S_W;
19811 FSGNJOpc = RISCV::FSGNJ_S;
19812 FSGNJXOpc = RISCV::FSGNJX_S;
19813 RC = &RISCV::FPR32RegClass;
19814 break;
19815 case RISCV::PseudoFROUND_S_INX:
19816 CmpOpc = RISCV::FLT_S_INX;
19817 F2IOpc = RISCV::FCVT_W_S_INX;
19818 I2FOpc = RISCV::FCVT_S_W_INX;
19819 FSGNJOpc = RISCV::FSGNJ_S_INX;
19820 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19821 RC = &RISCV::GPRF32RegClass;
19822 break;
19823 case RISCV::PseudoFROUND_D:
19824 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19825 CmpOpc = RISCV::FLT_D;
19826 F2IOpc = RISCV::FCVT_L_D;
19827 I2FOpc = RISCV::FCVT_D_L;
19828 FSGNJOpc = RISCV::FSGNJ_D;
19829 FSGNJXOpc = RISCV::FSGNJX_D;
19830 RC = &RISCV::FPR64RegClass;
19831 break;
19832 case RISCV::PseudoFROUND_D_INX:
19833 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19834 CmpOpc = RISCV::FLT_D_INX;
19835 F2IOpc = RISCV::FCVT_L_D_INX;
19836 I2FOpc = RISCV::FCVT_D_L_INX;
19837 FSGNJOpc = RISCV::FSGNJ_D_INX;
19838 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19839 RC = &RISCV::GPRRegClass;
19840 break;
19841 }
19842
19843 const BasicBlock *BB = MBB->getBasicBlock();
19844 DebugLoc DL = MI.getDebugLoc();
19846
19848 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19849 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19850
19851 F->insert(I, CvtMBB);
19852 F->insert(I, DoneMBB);
19853 // Move all instructions after the sequence to DoneMBB.
19854 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19855 MBB->end());
19856 // Update machine-CFG edges by transferring all successors of the current
19857 // block to the new block which will contain the Phi nodes for the selects.
19859 // Set the successors for MBB.
19860 MBB->addSuccessor(CvtMBB);
19861 MBB->addSuccessor(DoneMBB);
19862
19863 Register DstReg = MI.getOperand(0).getReg();
19864 Register SrcReg = MI.getOperand(1).getReg();
19865 Register MaxReg = MI.getOperand(2).getReg();
19866 int64_t FRM = MI.getOperand(3).getImm();
19867
19868 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19870
19871 Register FabsReg = MRI.createVirtualRegister(RC);
19872 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19873
19874 // Compare the FP value to the max value.
19875 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19876 auto MIB =
19877 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19880
19881 // Insert branch.
19882 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19883 .addReg(CmpReg)
19884 .addReg(RISCV::X0)
19885 .addMBB(DoneMBB);
19886
19887 CvtMBB->addSuccessor(DoneMBB);
19888
19889 // Convert to integer.
19890 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19891 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19894
19895 // Convert back to FP.
19896 Register I2FReg = MRI.createVirtualRegister(RC);
19897 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19900
19901 // Restore the sign bit.
19902 Register CvtReg = MRI.createVirtualRegister(RC);
19903 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19904
19905 // Merge the results.
19906 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19907 .addReg(SrcReg)
19908 .addMBB(MBB)
19909 .addReg(CvtReg)
19910 .addMBB(CvtMBB);
19911
19912 MI.eraseFromParent();
19913 return DoneMBB;
19914}
19915
19918 MachineBasicBlock *BB) const {
19919 switch (MI.getOpcode()) {
19920 default:
19921 llvm_unreachable("Unexpected instr type to insert");
19922 case RISCV::ReadCounterWide:
19923 assert(!Subtarget.is64Bit() &&
19924 "ReadCounterWide is only to be used on riscv32");
19925 return emitReadCounterWidePseudo(MI, BB);
19926 case RISCV::Select_GPR_Using_CC_GPR:
19927 case RISCV::Select_GPR_Using_CC_Imm:
19928 case RISCV::Select_FPR16_Using_CC_GPR:
19929 case RISCV::Select_FPR16INX_Using_CC_GPR:
19930 case RISCV::Select_FPR32_Using_CC_GPR:
19931 case RISCV::Select_FPR32INX_Using_CC_GPR:
19932 case RISCV::Select_FPR64_Using_CC_GPR:
19933 case RISCV::Select_FPR64INX_Using_CC_GPR:
19934 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19935 return emitSelectPseudo(MI, BB, Subtarget);
19936 case RISCV::BuildPairF64Pseudo:
19937 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19938 case RISCV::SplitF64Pseudo:
19939 return emitSplitF64Pseudo(MI, BB, Subtarget);
19940 case RISCV::PseudoQuietFLE_H:
19941 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19942 case RISCV::PseudoQuietFLE_H_INX:
19943 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19944 case RISCV::PseudoQuietFLT_H:
19945 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19946 case RISCV::PseudoQuietFLT_H_INX:
19947 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19948 case RISCV::PseudoQuietFLE_S:
19949 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19950 case RISCV::PseudoQuietFLE_S_INX:
19951 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19952 case RISCV::PseudoQuietFLT_S:
19953 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19954 case RISCV::PseudoQuietFLT_S_INX:
19955 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19956 case RISCV::PseudoQuietFLE_D:
19957 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19958 case RISCV::PseudoQuietFLE_D_INX:
19959 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19960 case RISCV::PseudoQuietFLE_D_IN32X:
19961 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19962 Subtarget);
19963 case RISCV::PseudoQuietFLT_D:
19964 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19965 case RISCV::PseudoQuietFLT_D_INX:
19966 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19967 case RISCV::PseudoQuietFLT_D_IN32X:
19968 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19969 Subtarget);
19970
19971 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19972 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19973 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19974 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19975 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19976 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19977 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19978 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19979 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19980 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19981 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19982 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19983 case RISCV::PseudoFROUND_H:
19984 case RISCV::PseudoFROUND_H_INX:
19985 case RISCV::PseudoFROUND_S:
19986 case RISCV::PseudoFROUND_S_INX:
19987 case RISCV::PseudoFROUND_D:
19988 case RISCV::PseudoFROUND_D_INX:
19989 case RISCV::PseudoFROUND_D_IN32X:
19990 return emitFROUND(MI, BB, Subtarget);
19991 case RISCV::PROBED_STACKALLOC_DYN:
19992 return emitDynamicProbedAlloc(MI, BB);
19993 case TargetOpcode::STATEPOINT:
19994 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19995 // while jal call instruction (where statepoint will be lowered at the end)
19996 // has implicit def. This def is early-clobber as it will be set at
19997 // the moment of the call and earlier than any use is read.
19998 // Add this implicit dead def here as a workaround.
19999 MI.addOperand(*MI.getMF(),
20001 RISCV::X1, /*isDef*/ true,
20002 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
20003 /*isUndef*/ false, /*isEarlyClobber*/ true));
20004 [[fallthrough]];
20005 case TargetOpcode::STACKMAP:
20006 case TargetOpcode::PATCHPOINT:
20007 if (!Subtarget.is64Bit())
20008 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
20009 "supported on 64-bit targets");
20010 return emitPatchPoint(MI, BB);
20011 }
20012}
20013
20015 SDNode *Node) const {
20016 // Add FRM dependency to any instructions with dynamic rounding mode.
20017 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
20018 if (Idx < 0) {
20019 // Vector pseudos have FRM index indicated by TSFlags.
20020 Idx = RISCVII::getFRMOpNum(MI.getDesc());
20021 if (Idx < 0)
20022 return;
20023 }
20024 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
20025 return;
20026 // If the instruction already reads FRM, don't add another read.
20027 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
20028 return;
20029 MI.addOperand(
20030 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
20031}
20032
20033void RISCVTargetLowering::analyzeInputArgs(
20034 MachineFunction &MF, CCState &CCInfo,
20035 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
20036 RISCVCCAssignFn Fn) const {
20037 unsigned NumArgs = Ins.size();
20039
20040 for (unsigned i = 0; i != NumArgs; ++i) {
20041 MVT ArgVT = Ins[i].VT;
20042 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
20043
20044 Type *ArgTy = nullptr;
20045 if (IsRet)
20046 ArgTy = FType->getReturnType();
20047 else if (Ins[i].isOrigArg())
20048 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
20049
20050 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
20051 /*IsFixed=*/true, IsRet, ArgTy)) {
20052 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
20053 << ArgVT << '\n');
20054 llvm_unreachable(nullptr);
20055 }
20056 }
20057}
20058
20059void RISCVTargetLowering::analyzeOutputArgs(
20060 MachineFunction &MF, CCState &CCInfo,
20061 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
20062 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
20063 unsigned NumArgs = Outs.size();
20064
20065 for (unsigned i = 0; i != NumArgs; i++) {
20066 MVT ArgVT = Outs[i].VT;
20067 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20068 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
20069
20070 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
20071 Outs[i].IsFixed, IsRet, OrigTy)) {
20072 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
20073 << ArgVT << "\n");
20074 llvm_unreachable(nullptr);
20075 }
20076 }
20077}
20078
20079// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
20080// values.
20082 const CCValAssign &VA, const SDLoc &DL,
20083 const RISCVSubtarget &Subtarget) {
20084 if (VA.needsCustom()) {
20085 if (VA.getLocVT().isInteger() &&
20086 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20087 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
20088 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
20089 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
20091 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
20092 llvm_unreachable("Unexpected Custom handling.");
20093 }
20094
20095 switch (VA.getLocInfo()) {
20096 default:
20097 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20098 case CCValAssign::Full:
20099 break;
20100 case CCValAssign::BCvt:
20101 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
20102 break;
20103 }
20104 return Val;
20105}
20106
20107// The caller is responsible for loading the full value if the argument is
20108// passed with CCValAssign::Indirect.
20110 const CCValAssign &VA, const SDLoc &DL,
20111 const ISD::InputArg &In,
20112 const RISCVTargetLowering &TLI) {
20115 EVT LocVT = VA.getLocVT();
20116 SDValue Val;
20117 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
20118 Register VReg = RegInfo.createVirtualRegister(RC);
20119 RegInfo.addLiveIn(VA.getLocReg(), VReg);
20120 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
20121
20122 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
20123 if (In.isOrigArg()) {
20124 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
20125 if (OrigArg->getType()->isIntegerTy()) {
20126 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
20127 // An input zero extended from i31 can also be considered sign extended.
20128 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
20129 (BitWidth < 32 && In.Flags.isZExt())) {
20131 RVFI->addSExt32Register(VReg);
20132 }
20133 }
20134 }
20135
20137 return Val;
20138
20139 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
20140}
20141
20143 const CCValAssign &VA, const SDLoc &DL,
20144 const RISCVSubtarget &Subtarget) {
20145 EVT LocVT = VA.getLocVT();
20146
20147 if (VA.needsCustom()) {
20148 if (LocVT.isInteger() &&
20149 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20150 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
20151 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
20152 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
20153 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
20154 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
20155 llvm_unreachable("Unexpected Custom handling.");
20156 }
20157
20158 switch (VA.getLocInfo()) {
20159 default:
20160 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20161 case CCValAssign::Full:
20162 break;
20163 case CCValAssign::BCvt:
20164 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
20165 break;
20166 }
20167 return Val;
20168}
20169
20170// The caller is responsible for loading the full value if the argument is
20171// passed with CCValAssign::Indirect.
20173 const CCValAssign &VA, const SDLoc &DL) {
20175 MachineFrameInfo &MFI = MF.getFrameInfo();
20176 EVT LocVT = VA.getLocVT();
20177 EVT ValVT = VA.getValVT();
20179 if (VA.getLocInfo() == CCValAssign::Indirect) {
20180 // When the value is a scalable vector, we save the pointer which points to
20181 // the scalable vector value in the stack. The ValVT will be the pointer
20182 // type, instead of the scalable vector type.
20183 ValVT = LocVT;
20184 }
20185 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20186 /*IsImmutable=*/true);
20187 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20188 SDValue Val;
20189
20191 switch (VA.getLocInfo()) {
20192 default:
20193 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20194 case CCValAssign::Full:
20196 case CCValAssign::BCvt:
20197 break;
20198 }
20199 Val = DAG.getExtLoad(
20200 ExtType, DL, LocVT, Chain, FIN,
20202 return Val;
20203}
20204
20206 const CCValAssign &VA,
20207 const CCValAssign &HiVA,
20208 const SDLoc &DL) {
20209 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20210 "Unexpected VA");
20212 MachineFrameInfo &MFI = MF.getFrameInfo();
20214
20215 assert(VA.isRegLoc() && "Expected register VA assignment");
20216
20217 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20218 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20219 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20220 SDValue Hi;
20221 if (HiVA.isMemLoc()) {
20222 // Second half of f64 is passed on the stack.
20223 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20224 /*IsImmutable=*/true);
20225 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20226 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20228 } else {
20229 // Second half of f64 is passed in another GPR.
20230 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20231 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20232 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20233 }
20234 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20235}
20236
20237// Transform physical registers into virtual registers.
20239 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20240 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20241 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20242
20244
20245 switch (CallConv) {
20246 default:
20247 report_fatal_error("Unsupported calling convention");
20248 case CallingConv::C:
20249 case CallingConv::Fast:
20251 case CallingConv::GRAAL:
20253 break;
20254 case CallingConv::GHC:
20255 if (Subtarget.hasStdExtE())
20256 report_fatal_error("GHC calling convention is not supported on RVE!");
20257 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20258 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20259 "(Zdinx/D) instruction set extensions");
20260 }
20261
20262 const Function &Func = MF.getFunction();
20263 if (Func.hasFnAttribute("interrupt")) {
20264 if (!Func.arg_empty())
20266 "Functions with the interrupt attribute cannot have arguments!");
20267
20268 StringRef Kind =
20269 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20270
20271 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20273 "Function interrupt attribute argument not supported!");
20274 }
20275
20276 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20277 MVT XLenVT = Subtarget.getXLenVT();
20278 unsigned XLenInBytes = Subtarget.getXLen() / 8;
20279 // Used with vargs to acumulate store chains.
20280 std::vector<SDValue> OutChains;
20281
20282 // Assign locations to all of the incoming arguments.
20284 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20285
20286 if (CallConv == CallingConv::GHC)
20288 else
20289 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20291 : CC_RISCV);
20292
20293 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20294 CCValAssign &VA = ArgLocs[i];
20295 SDValue ArgValue;
20296 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20297 // case.
20298 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20299 assert(VA.needsCustom());
20300 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20301 } else if (VA.isRegLoc())
20302 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20303 else
20304 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20305
20306 if (VA.getLocInfo() == CCValAssign::Indirect) {
20307 // If the original argument was split and passed by reference (e.g. i128
20308 // on RV32), we need to load all parts of it here (using the same
20309 // address). Vectors may be partly split to registers and partly to the
20310 // stack, in which case the base address is partly offset and subsequent
20311 // stores are relative to that.
20312 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20314 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20315 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20316 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20317 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20318 CCValAssign &PartVA = ArgLocs[i + 1];
20319 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20320 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20321 if (PartVA.getValVT().isScalableVector())
20322 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20323 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20324 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20326 ++i;
20327 ++InsIdx;
20328 }
20329 continue;
20330 }
20331 InVals.push_back(ArgValue);
20332 }
20333
20334 if (any_of(ArgLocs,
20335 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20336 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20337
20338 if (IsVarArg) {
20339 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20340 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20341 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20342 MachineFrameInfo &MFI = MF.getFrameInfo();
20343 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20345
20346 // Size of the vararg save area. For now, the varargs save area is either
20347 // zero or large enough to hold a0-a7.
20348 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20349 int FI;
20350
20351 // If all registers are allocated, then all varargs must be passed on the
20352 // stack and we don't need to save any argregs.
20353 if (VarArgsSaveSize == 0) {
20354 int VaArgOffset = CCInfo.getStackSize();
20355 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20356 } else {
20357 int VaArgOffset = -VarArgsSaveSize;
20358 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20359
20360 // If saving an odd number of registers then create an extra stack slot to
20361 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20362 // offsets to even-numbered registered remain 2*XLEN-aligned.
20363 if (Idx % 2) {
20365 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20366 VarArgsSaveSize += XLenInBytes;
20367 }
20368
20369 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20370
20371 // Copy the integer registers that may have been used for passing varargs
20372 // to the vararg save area.
20373 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20374 const Register Reg = RegInfo.createVirtualRegister(RC);
20375 RegInfo.addLiveIn(ArgRegs[I], Reg);
20376 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20377 SDValue Store = DAG.getStore(
20378 Chain, DL, ArgValue, FIN,
20379 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20380 OutChains.push_back(Store);
20381 FIN =
20382 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20383 }
20384 }
20385
20386 // Record the frame index of the first variable argument
20387 // which is a value necessary to VASTART.
20388 RVFI->setVarArgsFrameIndex(FI);
20389 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20390 }
20391
20392 // All stores are grouped in one node to allow the matching between
20393 // the size of Ins and InVals. This only happens for vararg functions.
20394 if (!OutChains.empty()) {
20395 OutChains.push_back(Chain);
20396 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20397 }
20398
20399 return Chain;
20400}
20401
20402/// isEligibleForTailCallOptimization - Check whether the call is eligible
20403/// for tail call optimization.
20404/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20405bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20406 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20407 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20408
20409 auto CalleeCC = CLI.CallConv;
20410 auto &Outs = CLI.Outs;
20411 auto &Caller = MF.getFunction();
20412 auto CallerCC = Caller.getCallingConv();
20413
20414 // Exception-handling functions need a special set of instructions to
20415 // indicate a return to the hardware. Tail-calling another function would
20416 // probably break this.
20417 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20418 // should be expanded as new function attributes are introduced.
20419 if (Caller.hasFnAttribute("interrupt"))
20420 return false;
20421
20422 // Do not tail call opt if the stack is used to pass parameters.
20423 if (CCInfo.getStackSize() != 0)
20424 return false;
20425
20426 // Do not tail call opt if any parameters need to be passed indirectly.
20427 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20428 // passed indirectly. So the address of the value will be passed in a
20429 // register, or if not available, then the address is put on the stack. In
20430 // order to pass indirectly, space on the stack often needs to be allocated
20431 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20432 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20433 // are passed CCValAssign::Indirect.
20434 for (auto &VA : ArgLocs)
20435 if (VA.getLocInfo() == CCValAssign::Indirect)
20436 return false;
20437
20438 // Do not tail call opt if either caller or callee uses struct return
20439 // semantics.
20440 auto IsCallerStructRet = Caller.hasStructRetAttr();
20441 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20442 if (IsCallerStructRet || IsCalleeStructRet)
20443 return false;
20444
20445 // The callee has to preserve all registers the caller needs to preserve.
20446 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20447 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20448 if (CalleeCC != CallerCC) {
20449 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20450 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20451 return false;
20452 }
20453
20454 // Byval parameters hand the function a pointer directly into the stack area
20455 // we want to reuse during a tail call. Working around this *is* possible
20456 // but less efficient and uglier in LowerCall.
20457 for (auto &Arg : Outs)
20458 if (Arg.Flags.isByVal())
20459 return false;
20460
20461 return true;
20462}
20463
20465 return DAG.getDataLayout().getPrefTypeAlign(
20466 VT.getTypeForEVT(*DAG.getContext()));
20467}
20468
20469// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20470// and output parameter nodes.
20472 SmallVectorImpl<SDValue> &InVals) const {
20473 SelectionDAG &DAG = CLI.DAG;
20474 SDLoc &DL = CLI.DL;
20476 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20478 SDValue Chain = CLI.Chain;
20479 SDValue Callee = CLI.Callee;
20480 bool &IsTailCall = CLI.IsTailCall;
20481 CallingConv::ID CallConv = CLI.CallConv;
20482 bool IsVarArg = CLI.IsVarArg;
20483 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20484 MVT XLenVT = Subtarget.getXLenVT();
20485
20487
20488 // Analyze the operands of the call, assigning locations to each operand.
20490 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20491
20492 if (CallConv == CallingConv::GHC) {
20493 if (Subtarget.hasStdExtE())
20494 report_fatal_error("GHC calling convention is not supported on RVE!");
20495 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20496 } else
20497 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20499 : CC_RISCV);
20500
20501 // Check if it's really possible to do a tail call.
20502 if (IsTailCall)
20503 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20504
20505 if (IsTailCall)
20506 ++NumTailCalls;
20507 else if (CLI.CB && CLI.CB->isMustTailCall())
20508 report_fatal_error("failed to perform tail call elimination on a call "
20509 "site marked musttail");
20510
20511 // Get a count of how many bytes are to be pushed on the stack.
20512 unsigned NumBytes = ArgCCInfo.getStackSize();
20513
20514 // Create local copies for byval args
20515 SmallVector<SDValue, 8> ByValArgs;
20516 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20517 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20518 if (!Flags.isByVal())
20519 continue;
20520
20521 SDValue Arg = OutVals[i];
20522 unsigned Size = Flags.getByValSize();
20523 Align Alignment = Flags.getNonZeroByValAlign();
20524
20525 int FI =
20526 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20527 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20528 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20529
20530 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20531 /*IsVolatile=*/false,
20532 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20534 ByValArgs.push_back(FIPtr);
20535 }
20536
20537 if (!IsTailCall)
20538 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20539
20540 // Copy argument values to their designated locations.
20542 SmallVector<SDValue, 8> MemOpChains;
20543 SDValue StackPtr;
20544 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20545 ++i, ++OutIdx) {
20546 CCValAssign &VA = ArgLocs[i];
20547 SDValue ArgValue = OutVals[OutIdx];
20548 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20549
20550 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20551 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20552 assert(VA.isRegLoc() && "Expected register VA assignment");
20553 assert(VA.needsCustom());
20554 SDValue SplitF64 = DAG.getNode(
20555 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20556 SDValue Lo = SplitF64.getValue(0);
20557 SDValue Hi = SplitF64.getValue(1);
20558
20559 Register RegLo = VA.getLocReg();
20560 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20561
20562 // Get the CCValAssign for the Hi part.
20563 CCValAssign &HiVA = ArgLocs[++i];
20564
20565 if (HiVA.isMemLoc()) {
20566 // Second half of f64 is passed on the stack.
20567 if (!StackPtr.getNode())
20568 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20570 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20571 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20572 // Emit the store.
20573 MemOpChains.push_back(DAG.getStore(
20574 Chain, DL, Hi, Address,
20576 } else {
20577 // Second half of f64 is passed in another GPR.
20578 Register RegHigh = HiVA.getLocReg();
20579 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20580 }
20581 continue;
20582 }
20583
20584 // Promote the value if needed.
20585 // For now, only handle fully promoted and indirect arguments.
20586 if (VA.getLocInfo() == CCValAssign::Indirect) {
20587 // Store the argument in a stack slot and pass its address.
20588 Align StackAlign =
20589 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20590 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20591 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20592 // If the original argument was split (e.g. i128), we need
20593 // to store the required parts of it here (and pass just one address).
20594 // Vectors may be partly split to registers and partly to the stack, in
20595 // which case the base address is partly offset and subsequent stores are
20596 // relative to that.
20597 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20598 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20599 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20600 // Calculate the total size to store. We don't have access to what we're
20601 // actually storing other than performing the loop and collecting the
20602 // info.
20604 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20605 SDValue PartValue = OutVals[OutIdx + 1];
20606 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20607 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20608 EVT PartVT = PartValue.getValueType();
20609 if (PartVT.isScalableVector())
20610 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20611 StoredSize += PartVT.getStoreSize();
20612 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20613 Parts.push_back(std::make_pair(PartValue, Offset));
20614 ++i;
20615 ++OutIdx;
20616 }
20617 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20618 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20619 MemOpChains.push_back(
20620 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20622 for (const auto &Part : Parts) {
20623 SDValue PartValue = Part.first;
20624 SDValue PartOffset = Part.second;
20626 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20627 MemOpChains.push_back(
20628 DAG.getStore(Chain, DL, PartValue, Address,
20630 }
20631 ArgValue = SpillSlot;
20632 } else {
20633 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20634 }
20635
20636 // Use local copy if it is a byval arg.
20637 if (Flags.isByVal())
20638 ArgValue = ByValArgs[j++];
20639
20640 if (VA.isRegLoc()) {
20641 // Queue up the argument copies and emit them at the end.
20642 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20643 } else {
20644 assert(VA.isMemLoc() && "Argument not register or memory");
20645 assert(!IsTailCall && "Tail call not allowed if stack is used "
20646 "for passing parameters");
20647
20648 // Work out the address of the stack slot.
20649 if (!StackPtr.getNode())
20650 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20652 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20654
20655 // Emit the store.
20656 MemOpChains.push_back(
20657 DAG.getStore(Chain, DL, ArgValue, Address,
20659 }
20660 }
20661
20662 // Join the stores, which are independent of one another.
20663 if (!MemOpChains.empty())
20664 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20665
20666 SDValue Glue;
20667
20668 // Build a sequence of copy-to-reg nodes, chained and glued together.
20669 for (auto &Reg : RegsToPass) {
20670 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20671 Glue = Chain.getValue(1);
20672 }
20673
20674 // Validate that none of the argument registers have been marked as
20675 // reserved, if so report an error. Do the same for the return address if this
20676 // is not a tailcall.
20677 validateCCReservedRegs(RegsToPass, MF);
20678 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20680 MF.getFunction(),
20681 "Return address register required, but has been reserved."});
20682
20683 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20684 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20685 // split it and then direct call can be matched by PseudoCALL.
20686 bool CalleeIsLargeExternalSymbol = false;
20688 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20689 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20690 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20691 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20692 CalleeIsLargeExternalSymbol = true;
20693 }
20694 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20695 const GlobalValue *GV = S->getGlobal();
20696 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20697 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20698 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20699 }
20700
20701 // The first call operand is the chain and the second is the target address.
20703 Ops.push_back(Chain);
20704 Ops.push_back(Callee);
20705
20706 // Add argument registers to the end of the list so that they are
20707 // known live into the call.
20708 for (auto &Reg : RegsToPass)
20709 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20710
20711 // Add a register mask operand representing the call-preserved registers.
20712 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20713 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20714 assert(Mask && "Missing call preserved mask for calling convention");
20715 Ops.push_back(DAG.getRegisterMask(Mask));
20716
20717 // Glue the call to the argument copies, if any.
20718 if (Glue.getNode())
20719 Ops.push_back(Glue);
20720
20721 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20722 "Unexpected CFI type for a direct call");
20723
20724 // Emit the call.
20725 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20726
20727 // Use software guarded branch for large code model non-indirect calls
20728 // Tail call to external symbol will have a null CLI.CB and we need another
20729 // way to determine the callsite type
20730 bool NeedSWGuarded = false;
20732 Subtarget.hasStdExtZicfilp() &&
20733 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20734 NeedSWGuarded = true;
20735
20736 if (IsTailCall) {
20738 unsigned CallOpc =
20739 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20740 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20741 if (CLI.CFIType)
20742 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20743 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20744 return Ret;
20745 }
20746
20747 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20748 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20749 if (CLI.CFIType)
20750 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20751 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20752 Glue = Chain.getValue(1);
20753
20754 // Mark the end of the call, which is glued to the call itself.
20755 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20756 Glue = Chain.getValue(1);
20757
20758 // Assign locations to each value returned by this call.
20760 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20761 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20762
20763 // Copy all of the result registers out of their specified physreg.
20764 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20765 auto &VA = RVLocs[i];
20766 // Copy the value out
20767 SDValue RetValue =
20768 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20769 // Glue the RetValue to the end of the call sequence
20770 Chain = RetValue.getValue(1);
20771 Glue = RetValue.getValue(2);
20772
20773 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20774 assert(VA.needsCustom());
20775 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20776 MVT::i32, Glue);
20777 Chain = RetValue2.getValue(1);
20778 Glue = RetValue2.getValue(2);
20779 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20780 RetValue2);
20781 } else
20782 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20783
20784 InVals.push_back(RetValue);
20785 }
20786
20787 return Chain;
20788}
20789
20791 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20792 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
20793 const Type *RetTy) const {
20795 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20796
20797 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20798 MVT VT = Outs[i].VT;
20799 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20800 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20801 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20802 return false;
20803 }
20804 return true;
20805}
20806
20807SDValue
20809 bool IsVarArg,
20811 const SmallVectorImpl<SDValue> &OutVals,
20812 const SDLoc &DL, SelectionDAG &DAG) const {
20814 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20815
20816 // Stores the assignment of the return value to a location.
20818
20819 // Info about the registers and stack slot.
20820 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20821 *DAG.getContext());
20822
20823 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20824 nullptr, CC_RISCV);
20825
20826 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20827 report_fatal_error("GHC functions return void only");
20828
20829 SDValue Glue;
20830 SmallVector<SDValue, 4> RetOps(1, Chain);
20831
20832 // Copy the result values into the output registers.
20833 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20834 SDValue Val = OutVals[OutIdx];
20835 CCValAssign &VA = RVLocs[i];
20836 assert(VA.isRegLoc() && "Can only return in registers!");
20837
20838 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20839 // Handle returning f64 on RV32D with a soft float ABI.
20840 assert(VA.isRegLoc() && "Expected return via registers");
20841 assert(VA.needsCustom());
20842 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20843 DAG.getVTList(MVT::i32, MVT::i32), Val);
20844 SDValue Lo = SplitF64.getValue(0);
20845 SDValue Hi = SplitF64.getValue(1);
20846 Register RegLo = VA.getLocReg();
20847 Register RegHi = RVLocs[++i].getLocReg();
20848
20849 if (STI.isRegisterReservedByUser(RegLo) ||
20850 STI.isRegisterReservedByUser(RegHi))
20852 MF.getFunction(),
20853 "Return value register required, but has been reserved."});
20854
20855 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20856 Glue = Chain.getValue(1);
20857 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20858 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20859 Glue = Chain.getValue(1);
20860 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20861 } else {
20862 // Handle a 'normal' return.
20863 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20864 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20865
20866 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20868 MF.getFunction(),
20869 "Return value register required, but has been reserved."});
20870
20871 // Guarantee that all emitted copies are stuck together.
20872 Glue = Chain.getValue(1);
20873 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20874 }
20875 }
20876
20877 RetOps[0] = Chain; // Update chain.
20878
20879 // Add the glue node if we have it.
20880 if (Glue.getNode()) {
20881 RetOps.push_back(Glue);
20882 }
20883
20884 if (any_of(RVLocs,
20885 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20886 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20887
20888 unsigned RetOpc = RISCVISD::RET_GLUE;
20889 // Interrupt service routines use different return instructions.
20890 const Function &Func = DAG.getMachineFunction().getFunction();
20891 if (Func.hasFnAttribute("interrupt")) {
20892 if (!Func.getReturnType()->isVoidTy())
20894 "Functions with the interrupt attribute must have void return type!");
20895
20897 StringRef Kind =
20898 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20899
20900 if (Kind == "supervisor")
20901 RetOpc = RISCVISD::SRET_GLUE;
20902 else
20903 RetOpc = RISCVISD::MRET_GLUE;
20904 }
20905
20906 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20907}
20908
20909void RISCVTargetLowering::validateCCReservedRegs(
20910 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20911 MachineFunction &MF) const {
20912 const Function &F = MF.getFunction();
20913 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20914
20915 if (llvm::any_of(Regs, [&STI](auto Reg) {
20916 return STI.isRegisterReservedByUser(Reg.first);
20917 }))
20918 F.getContext().diagnose(DiagnosticInfoUnsupported{
20919 F, "Argument register required, but has been reserved."});
20920}
20921
20922// Check if the result of the node is only used as a return value, as
20923// otherwise we can't perform a tail-call.
20925 if (N->getNumValues() != 1)
20926 return false;
20927 if (!N->hasNUsesOfValue(1, 0))
20928 return false;
20929
20930 SDNode *Copy = *N->user_begin();
20931
20932 if (Copy->getOpcode() == ISD::BITCAST) {
20933 return isUsedByReturnOnly(Copy, Chain);
20934 }
20935
20936 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20937 // with soft float ABIs.
20938 if (Copy->getOpcode() != ISD::CopyToReg) {
20939 return false;
20940 }
20941
20942 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20943 // isn't safe to perform a tail call.
20944 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20945 return false;
20946
20947 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20948 bool HasRet = false;
20949 for (SDNode *Node : Copy->users()) {
20950 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20951 return false;
20952 HasRet = true;
20953 }
20954 if (!HasRet)
20955 return false;
20956
20957 Chain = Copy->getOperand(0);
20958 return true;
20959}
20960
20962 return CI->isTailCall();
20963}
20964
20965const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20966#define NODE_NAME_CASE(NODE) \
20967 case RISCVISD::NODE: \
20968 return "RISCVISD::" #NODE;
20969 // clang-format off
20970 switch ((RISCVISD::NodeType)Opcode) {
20972 break;
20973 NODE_NAME_CASE(RET_GLUE)
20974 NODE_NAME_CASE(SRET_GLUE)
20975 NODE_NAME_CASE(MRET_GLUE)
20976 NODE_NAME_CASE(CALL)
20977 NODE_NAME_CASE(TAIL)
20978 NODE_NAME_CASE(SELECT_CC)
20979 NODE_NAME_CASE(BR_CC)
20980 NODE_NAME_CASE(BuildGPRPair)
20981 NODE_NAME_CASE(SplitGPRPair)
20982 NODE_NAME_CASE(BuildPairF64)
20983 NODE_NAME_CASE(SplitF64)
20984 NODE_NAME_CASE(ADD_LO)
20985 NODE_NAME_CASE(HI)
20986 NODE_NAME_CASE(LLA)
20987 NODE_NAME_CASE(ADD_TPREL)
20988 NODE_NAME_CASE(MULHSU)
20989 NODE_NAME_CASE(SHL_ADD)
20990 NODE_NAME_CASE(SLLW)
20991 NODE_NAME_CASE(SRAW)
20992 NODE_NAME_CASE(SRLW)
20993 NODE_NAME_CASE(DIVW)
20994 NODE_NAME_CASE(DIVUW)
20995 NODE_NAME_CASE(REMUW)
20996 NODE_NAME_CASE(ROLW)
20997 NODE_NAME_CASE(RORW)
20998 NODE_NAME_CASE(CLZW)
20999 NODE_NAME_CASE(CTZW)
21000 NODE_NAME_CASE(ABSW)
21001 NODE_NAME_CASE(FMV_H_X)
21002 NODE_NAME_CASE(FMV_X_ANYEXTH)
21003 NODE_NAME_CASE(FMV_X_SIGNEXTH)
21004 NODE_NAME_CASE(FMV_W_X_RV64)
21005 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
21006 NODE_NAME_CASE(FCVT_X)
21007 NODE_NAME_CASE(FCVT_XU)
21008 NODE_NAME_CASE(FCVT_W_RV64)
21009 NODE_NAME_CASE(FCVT_WU_RV64)
21010 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
21011 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
21012 NODE_NAME_CASE(FROUND)
21013 NODE_NAME_CASE(FCLASS)
21014 NODE_NAME_CASE(FSGNJX)
21015 NODE_NAME_CASE(FMAX)
21016 NODE_NAME_CASE(FMIN)
21017 NODE_NAME_CASE(FLI)
21018 NODE_NAME_CASE(READ_COUNTER_WIDE)
21019 NODE_NAME_CASE(BREV8)
21020 NODE_NAME_CASE(ORC_B)
21021 NODE_NAME_CASE(ZIP)
21022 NODE_NAME_CASE(UNZIP)
21023 NODE_NAME_CASE(CLMUL)
21024 NODE_NAME_CASE(CLMULH)
21025 NODE_NAME_CASE(CLMULR)
21026 NODE_NAME_CASE(MOPR)
21027 NODE_NAME_CASE(MOPRR)
21028 NODE_NAME_CASE(SHA256SIG0)
21029 NODE_NAME_CASE(SHA256SIG1)
21030 NODE_NAME_CASE(SHA256SUM0)
21031 NODE_NAME_CASE(SHA256SUM1)
21032 NODE_NAME_CASE(SM4KS)
21033 NODE_NAME_CASE(SM4ED)
21034 NODE_NAME_CASE(SM3P0)
21035 NODE_NAME_CASE(SM3P1)
21036 NODE_NAME_CASE(TH_LWD)
21037 NODE_NAME_CASE(TH_LWUD)
21038 NODE_NAME_CASE(TH_LDD)
21039 NODE_NAME_CASE(TH_SWD)
21040 NODE_NAME_CASE(TH_SDD)
21041 NODE_NAME_CASE(VMV_V_V_VL)
21042 NODE_NAME_CASE(VMV_V_X_VL)
21043 NODE_NAME_CASE(VFMV_V_F_VL)
21044 NODE_NAME_CASE(VMV_X_S)
21045 NODE_NAME_CASE(VMV_S_X_VL)
21046 NODE_NAME_CASE(VFMV_S_F_VL)
21047 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
21048 NODE_NAME_CASE(READ_VLENB)
21049 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
21050 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
21051 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
21052 NODE_NAME_CASE(VSLIDEUP_VL)
21053 NODE_NAME_CASE(VSLIDE1UP_VL)
21054 NODE_NAME_CASE(VSLIDEDOWN_VL)
21055 NODE_NAME_CASE(VSLIDE1DOWN_VL)
21056 NODE_NAME_CASE(VFSLIDE1UP_VL)
21057 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
21058 NODE_NAME_CASE(VID_VL)
21059 NODE_NAME_CASE(VFNCVT_ROD_VL)
21060 NODE_NAME_CASE(VECREDUCE_ADD_VL)
21061 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
21062 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
21063 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
21064 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
21065 NODE_NAME_CASE(VECREDUCE_AND_VL)
21066 NODE_NAME_CASE(VECREDUCE_OR_VL)
21067 NODE_NAME_CASE(VECREDUCE_XOR_VL)
21068 NODE_NAME_CASE(VECREDUCE_FADD_VL)
21069 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
21070 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
21071 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
21072 NODE_NAME_CASE(ADD_VL)
21073 NODE_NAME_CASE(AND_VL)
21074 NODE_NAME_CASE(MUL_VL)
21075 NODE_NAME_CASE(OR_VL)
21076 NODE_NAME_CASE(SDIV_VL)
21077 NODE_NAME_CASE(SHL_VL)
21078 NODE_NAME_CASE(SREM_VL)
21079 NODE_NAME_CASE(SRA_VL)
21080 NODE_NAME_CASE(SRL_VL)
21081 NODE_NAME_CASE(ROTL_VL)
21082 NODE_NAME_CASE(ROTR_VL)
21083 NODE_NAME_CASE(SUB_VL)
21084 NODE_NAME_CASE(UDIV_VL)
21085 NODE_NAME_CASE(UREM_VL)
21086 NODE_NAME_CASE(XOR_VL)
21087 NODE_NAME_CASE(AVGFLOORS_VL)
21088 NODE_NAME_CASE(AVGFLOORU_VL)
21089 NODE_NAME_CASE(AVGCEILS_VL)
21090 NODE_NAME_CASE(AVGCEILU_VL)
21091 NODE_NAME_CASE(SADDSAT_VL)
21092 NODE_NAME_CASE(UADDSAT_VL)
21093 NODE_NAME_CASE(SSUBSAT_VL)
21094 NODE_NAME_CASE(USUBSAT_VL)
21095 NODE_NAME_CASE(FADD_VL)
21096 NODE_NAME_CASE(FSUB_VL)
21097 NODE_NAME_CASE(FMUL_VL)
21098 NODE_NAME_CASE(FDIV_VL)
21099 NODE_NAME_CASE(FNEG_VL)
21100 NODE_NAME_CASE(FABS_VL)
21101 NODE_NAME_CASE(FSQRT_VL)
21102 NODE_NAME_CASE(FCLASS_VL)
21103 NODE_NAME_CASE(VFMADD_VL)
21104 NODE_NAME_CASE(VFNMADD_VL)
21105 NODE_NAME_CASE(VFMSUB_VL)
21106 NODE_NAME_CASE(VFNMSUB_VL)
21107 NODE_NAME_CASE(VFWMADD_VL)
21108 NODE_NAME_CASE(VFWNMADD_VL)
21109 NODE_NAME_CASE(VFWMSUB_VL)
21110 NODE_NAME_CASE(VFWNMSUB_VL)
21111 NODE_NAME_CASE(FCOPYSIGN_VL)
21112 NODE_NAME_CASE(SMIN_VL)
21113 NODE_NAME_CASE(SMAX_VL)
21114 NODE_NAME_CASE(UMIN_VL)
21115 NODE_NAME_CASE(UMAX_VL)
21116 NODE_NAME_CASE(BITREVERSE_VL)
21117 NODE_NAME_CASE(BSWAP_VL)
21118 NODE_NAME_CASE(CTLZ_VL)
21119 NODE_NAME_CASE(CTTZ_VL)
21120 NODE_NAME_CASE(CTPOP_VL)
21121 NODE_NAME_CASE(VFMIN_VL)
21122 NODE_NAME_CASE(VFMAX_VL)
21123 NODE_NAME_CASE(MULHS_VL)
21124 NODE_NAME_CASE(MULHU_VL)
21125 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
21126 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
21127 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
21128 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
21129 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
21130 NODE_NAME_CASE(SINT_TO_FP_VL)
21131 NODE_NAME_CASE(UINT_TO_FP_VL)
21132 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
21133 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
21134 NODE_NAME_CASE(FP_EXTEND_VL)
21135 NODE_NAME_CASE(FP_ROUND_VL)
21136 NODE_NAME_CASE(STRICT_FADD_VL)
21137 NODE_NAME_CASE(STRICT_FSUB_VL)
21138 NODE_NAME_CASE(STRICT_FMUL_VL)
21139 NODE_NAME_CASE(STRICT_FDIV_VL)
21140 NODE_NAME_CASE(STRICT_FSQRT_VL)
21141 NODE_NAME_CASE(STRICT_VFMADD_VL)
21142 NODE_NAME_CASE(STRICT_VFNMADD_VL)
21143 NODE_NAME_CASE(STRICT_VFMSUB_VL)
21144 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
21145 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
21146 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
21147 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
21148 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
21149 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
21150 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
21151 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
21152 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
21153 NODE_NAME_CASE(STRICT_FSETCC_VL)
21154 NODE_NAME_CASE(STRICT_FSETCCS_VL)
21155 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
21156 NODE_NAME_CASE(VWMUL_VL)
21157 NODE_NAME_CASE(VWMULU_VL)
21158 NODE_NAME_CASE(VWMULSU_VL)
21159 NODE_NAME_CASE(VWADD_VL)
21160 NODE_NAME_CASE(VWADDU_VL)
21161 NODE_NAME_CASE(VWSUB_VL)
21162 NODE_NAME_CASE(VWSUBU_VL)
21163 NODE_NAME_CASE(VWADD_W_VL)
21164 NODE_NAME_CASE(VWADDU_W_VL)
21165 NODE_NAME_CASE(VWSUB_W_VL)
21166 NODE_NAME_CASE(VWSUBU_W_VL)
21167 NODE_NAME_CASE(VWSLL_VL)
21168 NODE_NAME_CASE(VFWMUL_VL)
21169 NODE_NAME_CASE(VFWADD_VL)
21170 NODE_NAME_CASE(VFWSUB_VL)
21171 NODE_NAME_CASE(VFWADD_W_VL)
21172 NODE_NAME_CASE(VFWSUB_W_VL)
21173 NODE_NAME_CASE(VWMACC_VL)
21174 NODE_NAME_CASE(VWMACCU_VL)
21175 NODE_NAME_CASE(VWMACCSU_VL)
21176 NODE_NAME_CASE(SETCC_VL)
21177 NODE_NAME_CASE(VMERGE_VL)
21178 NODE_NAME_CASE(VMAND_VL)
21179 NODE_NAME_CASE(VMOR_VL)
21180 NODE_NAME_CASE(VMXOR_VL)
21181 NODE_NAME_CASE(VMCLR_VL)
21182 NODE_NAME_CASE(VMSET_VL)
21183 NODE_NAME_CASE(VRGATHER_VX_VL)
21184 NODE_NAME_CASE(VRGATHER_VV_VL)
21185 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
21186 NODE_NAME_CASE(VSEXT_VL)
21187 NODE_NAME_CASE(VZEXT_VL)
21188 NODE_NAME_CASE(VCPOP_VL)
21189 NODE_NAME_CASE(VFIRST_VL)
21190 NODE_NAME_CASE(READ_CSR)
21191 NODE_NAME_CASE(WRITE_CSR)
21192 NODE_NAME_CASE(SWAP_CSR)
21193 NODE_NAME_CASE(CZERO_EQZ)
21194 NODE_NAME_CASE(CZERO_NEZ)
21195 NODE_NAME_CASE(SW_GUARDED_BRIND)
21196 NODE_NAME_CASE(SW_GUARDED_CALL)
21197 NODE_NAME_CASE(SW_GUARDED_TAIL)
21198 NODE_NAME_CASE(TUPLE_INSERT)
21199 NODE_NAME_CASE(TUPLE_EXTRACT)
21200 NODE_NAME_CASE(SF_VC_XV_SE)
21201 NODE_NAME_CASE(SF_VC_IV_SE)
21202 NODE_NAME_CASE(SF_VC_VV_SE)
21203 NODE_NAME_CASE(SF_VC_FV_SE)
21204 NODE_NAME_CASE(SF_VC_XVV_SE)
21205 NODE_NAME_CASE(SF_VC_IVV_SE)
21206 NODE_NAME_CASE(SF_VC_VVV_SE)
21207 NODE_NAME_CASE(SF_VC_FVV_SE)
21208 NODE_NAME_CASE(SF_VC_XVW_SE)
21209 NODE_NAME_CASE(SF_VC_IVW_SE)
21210 NODE_NAME_CASE(SF_VC_VVW_SE)
21211 NODE_NAME_CASE(SF_VC_FVW_SE)
21212 NODE_NAME_CASE(SF_VC_V_X_SE)
21213 NODE_NAME_CASE(SF_VC_V_I_SE)
21214 NODE_NAME_CASE(SF_VC_V_XV_SE)
21215 NODE_NAME_CASE(SF_VC_V_IV_SE)
21216 NODE_NAME_CASE(SF_VC_V_VV_SE)
21217 NODE_NAME_CASE(SF_VC_V_FV_SE)
21218 NODE_NAME_CASE(SF_VC_V_XVV_SE)
21219 NODE_NAME_CASE(SF_VC_V_IVV_SE)
21220 NODE_NAME_CASE(SF_VC_V_VVV_SE)
21221 NODE_NAME_CASE(SF_VC_V_FVV_SE)
21222 NODE_NAME_CASE(SF_VC_V_XVW_SE)
21223 NODE_NAME_CASE(SF_VC_V_IVW_SE)
21224 NODE_NAME_CASE(SF_VC_V_VVW_SE)
21225 NODE_NAME_CASE(SF_VC_V_FVW_SE)
21226 NODE_NAME_CASE(PROBED_ALLOCA)
21227 }
21228 // clang-format on
21229 return nullptr;
21230#undef NODE_NAME_CASE
21231}
21232
21233/// getConstraintType - Given a constraint letter, return the type of
21234/// constraint it is for this target.
21237 if (Constraint.size() == 1) {
21238 switch (Constraint[0]) {
21239 default:
21240 break;
21241 case 'f':
21242 case 'R':
21243 return C_RegisterClass;
21244 case 'I':
21245 case 'J':
21246 case 'K':
21247 return C_Immediate;
21248 case 'A':
21249 return C_Memory;
21250 case 's':
21251 case 'S': // A symbolic address
21252 return C_Other;
21253 }
21254 } else {
21255 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
21256 return C_RegisterClass;
21257 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
21258 return C_RegisterClass;
21259 }
21260 return TargetLowering::getConstraintType(Constraint);
21261}
21262
21263std::pair<unsigned, const TargetRegisterClass *>
21265 StringRef Constraint,
21266 MVT VT) const {
21267 // First, see if this is a constraint that directly corresponds to a RISC-V
21268 // register class.
21269 if (Constraint.size() == 1) {
21270 switch (Constraint[0]) {
21271 case 'r':
21272 // TODO: Support fixed vectors up to XLen for P extension?
21273 if (VT.isVector())
21274 break;
21275 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21276 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21277 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21278 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21279 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21280 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21281 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21282 case 'f':
21283 if (VT == MVT::f16) {
21284 if (Subtarget.hasStdExtZfhmin())
21285 return std::make_pair(0U, &RISCV::FPR16RegClass);
21286 if (Subtarget.hasStdExtZhinxmin())
21287 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21288 } else if (VT == MVT::f32) {
21289 if (Subtarget.hasStdExtF())
21290 return std::make_pair(0U, &RISCV::FPR32RegClass);
21291 if (Subtarget.hasStdExtZfinx())
21292 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21293 } else if (VT == MVT::f64) {
21294 if (Subtarget.hasStdExtD())
21295 return std::make_pair(0U, &RISCV::FPR64RegClass);
21296 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21297 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21298 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21299 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21300 }
21301 break;
21302 case 'R':
21303 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21304 default:
21305 break;
21306 }
21307 } else if (Constraint == "vr") {
21308 for (const auto *RC :
21309 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21310 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21311 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21312 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21313 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21314 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21315 &RISCV::VRN2M4RegClass}) {
21316 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21317 return std::make_pair(0U, RC);
21318 }
21319 } else if (Constraint == "vd") {
21320 for (const auto *RC :
21321 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21322 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21323 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21324 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21325 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21326 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21327 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21328 &RISCV::VRN2M4NoV0RegClass}) {
21329 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21330 return std::make_pair(0U, RC);
21331 }
21332 } else if (Constraint == "vm") {
21333 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21334 return std::make_pair(0U, &RISCV::VMV0RegClass);
21335 } else if (Constraint == "cr") {
21336 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21337 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21338 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21339 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21340 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21341 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21342 if (!VT.isVector())
21343 return std::make_pair(0U, &RISCV::GPRCRegClass);
21344 } else if (Constraint == "cR") {
21345 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21346 } else if (Constraint == "cf") {
21347 if (VT == MVT::f16) {
21348 if (Subtarget.hasStdExtZfhmin())
21349 return std::make_pair(0U, &RISCV::FPR16CRegClass);
21350 if (Subtarget.hasStdExtZhinxmin())
21351 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21352 } else if (VT == MVT::f32) {
21353 if (Subtarget.hasStdExtF())
21354 return std::make_pair(0U, &RISCV::FPR32CRegClass);
21355 if (Subtarget.hasStdExtZfinx())
21356 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21357 } else if (VT == MVT::f64) {
21358 if (Subtarget.hasStdExtD())
21359 return std::make_pair(0U, &RISCV::FPR64CRegClass);
21360 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21361 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21362 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21363 return std::make_pair(0U, &RISCV::GPRCRegClass);
21364 }
21365 }
21366
21367 // Clang will correctly decode the usage of register name aliases into their
21368 // official names. However, other frontends like `rustc` do not. This allows
21369 // users of these frontends to use the ABI names for registers in LLVM-style
21370 // register constraints.
21371 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21372 .Case("{zero}", RISCV::X0)
21373 .Case("{ra}", RISCV::X1)
21374 .Case("{sp}", RISCV::X2)
21375 .Case("{gp}", RISCV::X3)
21376 .Case("{tp}", RISCV::X4)
21377 .Case("{t0}", RISCV::X5)
21378 .Case("{t1}", RISCV::X6)
21379 .Case("{t2}", RISCV::X7)
21380 .Cases("{s0}", "{fp}", RISCV::X8)
21381 .Case("{s1}", RISCV::X9)
21382 .Case("{a0}", RISCV::X10)
21383 .Case("{a1}", RISCV::X11)
21384 .Case("{a2}", RISCV::X12)
21385 .Case("{a3}", RISCV::X13)
21386 .Case("{a4}", RISCV::X14)
21387 .Case("{a5}", RISCV::X15)
21388 .Case("{a6}", RISCV::X16)
21389 .Case("{a7}", RISCV::X17)
21390 .Case("{s2}", RISCV::X18)
21391 .Case("{s3}", RISCV::X19)
21392 .Case("{s4}", RISCV::X20)
21393 .Case("{s5}", RISCV::X21)
21394 .Case("{s6}", RISCV::X22)
21395 .Case("{s7}", RISCV::X23)
21396 .Case("{s8}", RISCV::X24)
21397 .Case("{s9}", RISCV::X25)
21398 .Case("{s10}", RISCV::X26)
21399 .Case("{s11}", RISCV::X27)
21400 .Case("{t3}", RISCV::X28)
21401 .Case("{t4}", RISCV::X29)
21402 .Case("{t5}", RISCV::X30)
21403 .Case("{t6}", RISCV::X31)
21404 .Default(RISCV::NoRegister);
21405 if (XRegFromAlias != RISCV::NoRegister)
21406 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21407
21408 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21409 // TableGen record rather than the AsmName to choose registers for InlineAsm
21410 // constraints, plus we want to match those names to the widest floating point
21411 // register type available, manually select floating point registers here.
21412 //
21413 // The second case is the ABI name of the register, so that frontends can also
21414 // use the ABI names in register constraint lists.
21415 if (Subtarget.hasStdExtF()) {
21416 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21417 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21418 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21419 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21420 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21421 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21422 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21423 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21424 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21425 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21426 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21427 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21428 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21429 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21430 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21431 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21432 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21433 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21434 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21435 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21436 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21437 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21438 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21439 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21440 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21441 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21442 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21443 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21444 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21445 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21446 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21447 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21448 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21449 .Default(RISCV::NoRegister);
21450 if (FReg != RISCV::NoRegister) {
21451 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21452 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21453 unsigned RegNo = FReg - RISCV::F0_F;
21454 unsigned DReg = RISCV::F0_D + RegNo;
21455 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21456 }
21457 if (VT == MVT::f32 || VT == MVT::Other)
21458 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21459 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21460 unsigned RegNo = FReg - RISCV::F0_F;
21461 unsigned HReg = RISCV::F0_H + RegNo;
21462 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21463 }
21464 }
21465 }
21466
21467 if (Subtarget.hasVInstructions()) {
21468 Register VReg = StringSwitch<Register>(Constraint.lower())
21469 .Case("{v0}", RISCV::V0)
21470 .Case("{v1}", RISCV::V1)
21471 .Case("{v2}", RISCV::V2)
21472 .Case("{v3}", RISCV::V3)
21473 .Case("{v4}", RISCV::V4)
21474 .Case("{v5}", RISCV::V5)
21475 .Case("{v6}", RISCV::V6)
21476 .Case("{v7}", RISCV::V7)
21477 .Case("{v8}", RISCV::V8)
21478 .Case("{v9}", RISCV::V9)
21479 .Case("{v10}", RISCV::V10)
21480 .Case("{v11}", RISCV::V11)
21481 .Case("{v12}", RISCV::V12)
21482 .Case("{v13}", RISCV::V13)
21483 .Case("{v14}", RISCV::V14)
21484 .Case("{v15}", RISCV::V15)
21485 .Case("{v16}", RISCV::V16)
21486 .Case("{v17}", RISCV::V17)
21487 .Case("{v18}", RISCV::V18)
21488 .Case("{v19}", RISCV::V19)
21489 .Case("{v20}", RISCV::V20)
21490 .Case("{v21}", RISCV::V21)
21491 .Case("{v22}", RISCV::V22)
21492 .Case("{v23}", RISCV::V23)
21493 .Case("{v24}", RISCV::V24)
21494 .Case("{v25}", RISCV::V25)
21495 .Case("{v26}", RISCV::V26)
21496 .Case("{v27}", RISCV::V27)
21497 .Case("{v28}", RISCV::V28)
21498 .Case("{v29}", RISCV::V29)
21499 .Case("{v30}", RISCV::V30)
21500 .Case("{v31}", RISCV::V31)
21501 .Default(RISCV::NoRegister);
21502 if (VReg != RISCV::NoRegister) {
21503 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21504 return std::make_pair(VReg, &RISCV::VMRegClass);
21505 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21506 return std::make_pair(VReg, &RISCV::VRRegClass);
21507 for (const auto *RC :
21508 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21509 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21510 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21511 return std::make_pair(VReg, RC);
21512 }
21513 }
21514 }
21515 }
21516
21517 std::pair<Register, const TargetRegisterClass *> Res =
21519
21520 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21521 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21522 // Subtarget into account.
21523 if (Res.second == &RISCV::GPRF16RegClass ||
21524 Res.second == &RISCV::GPRF32RegClass ||
21525 Res.second == &RISCV::GPRPairRegClass)
21526 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21527
21528 return Res;
21529}
21530
21533 // Currently only support length 1 constraints.
21534 if (ConstraintCode.size() == 1) {
21535 switch (ConstraintCode[0]) {
21536 case 'A':
21538 default:
21539 break;
21540 }
21541 }
21542
21543 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21544}
21545
21547 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21548 SelectionDAG &DAG) const {
21549 // Currently only support length 1 constraints.
21550 if (Constraint.size() == 1) {
21551 switch (Constraint[0]) {
21552 case 'I':
21553 // Validate & create a 12-bit signed immediate operand.
21554 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21555 uint64_t CVal = C->getSExtValue();
21556 if (isInt<12>(CVal))
21557 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21558 Subtarget.getXLenVT()));
21559 }
21560 return;
21561 case 'J':
21562 // Validate & create an integer zero operand.
21563 if (isNullConstant(Op))
21564 Ops.push_back(
21565 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21566 return;
21567 case 'K':
21568 // Validate & create a 5-bit unsigned immediate operand.
21569 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21570 uint64_t CVal = C->getZExtValue();
21571 if (isUInt<5>(CVal))
21572 Ops.push_back(
21573 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21574 }
21575 return;
21576 case 'S':
21578 return;
21579 default:
21580 break;
21581 }
21582 }
21583 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21584}
21585
21587 Instruction *Inst,
21588 AtomicOrdering Ord) const {
21589 if (Subtarget.hasStdExtZtso()) {
21590 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21591 return Builder.CreateFence(Ord);
21592 return nullptr;
21593 }
21594
21595 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21596 return Builder.CreateFence(Ord);
21597 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21598 return Builder.CreateFence(AtomicOrdering::Release);
21599 return nullptr;
21600}
21601
21603 Instruction *Inst,
21604 AtomicOrdering Ord) const {
21605 if (Subtarget.hasStdExtZtso()) {
21606 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21607 return Builder.CreateFence(Ord);
21608 return nullptr;
21609 }
21610
21611 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21612 return Builder.CreateFence(AtomicOrdering::Acquire);
21613 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21616 return nullptr;
21617}
21618
21621 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21622 // point operations can't be used in an lr/sc sequence without breaking the
21623 // forward-progress guarantee.
21624 if (AI->isFloatingPointOperation() ||
21630
21631 // Don't expand forced atomics, we want to have __sync libcalls instead.
21632 if (Subtarget.hasForcedAtomics())
21634
21635 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21636 if (AI->getOperation() == AtomicRMWInst::Nand) {
21637 if (Subtarget.hasStdExtZacas() &&
21638 (Size >= 32 || Subtarget.hasStdExtZabha()))
21640 if (Size < 32)
21642 }
21643
21644 if (Size < 32 && !Subtarget.hasStdExtZabha())
21646
21648}
21649
21650static Intrinsic::ID
21652 if (XLen == 32) {
21653 switch (BinOp) {
21654 default:
21655 llvm_unreachable("Unexpected AtomicRMW BinOp");
21657 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21658 case AtomicRMWInst::Add:
21659 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21660 case AtomicRMWInst::Sub:
21661 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21663 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21664 case AtomicRMWInst::Max:
21665 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21666 case AtomicRMWInst::Min:
21667 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21669 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21671 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21672 }
21673 }
21674
21675 if (XLen == 64) {
21676 switch (BinOp) {
21677 default:
21678 llvm_unreachable("Unexpected AtomicRMW BinOp");
21680 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21681 case AtomicRMWInst::Add:
21682 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21683 case AtomicRMWInst::Sub:
21684 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21686 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21687 case AtomicRMWInst::Max:
21688 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21689 case AtomicRMWInst::Min:
21690 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21692 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21694 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21695 }
21696 }
21697
21698 llvm_unreachable("Unexpected XLen\n");
21699}
21700
21702 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21703 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21704 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21705 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21706 // mask, as this produces better code than the LR/SC loop emitted by
21707 // int_riscv_masked_atomicrmw_xchg.
21708 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21709 isa<ConstantInt>(AI->getValOperand())) {
21710 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21711 if (CVal->isZero())
21712 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21713 Builder.CreateNot(Mask, "Inv_Mask"),
21714 AI->getAlign(), Ord);
21715 if (CVal->isMinusOne())
21716 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21717 AI->getAlign(), Ord);
21718 }
21719
21720 unsigned XLen = Subtarget.getXLen();
21721 Value *Ordering =
21722 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21723 Type *Tys[] = {AlignedAddr->getType()};
21725 AI->getModule(),
21727
21728 if (XLen == 64) {
21729 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21730 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21731 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21732 }
21733
21734 Value *Result;
21735
21736 // Must pass the shift amount needed to sign extend the loaded value prior
21737 // to performing a signed comparison for min/max. ShiftAmt is the number of
21738 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21739 // is the number of bits to left+right shift the value in order to
21740 // sign-extend.
21741 if (AI->getOperation() == AtomicRMWInst::Min ||
21743 const DataLayout &DL = AI->getDataLayout();
21744 unsigned ValWidth =
21745 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21746 Value *SextShamt =
21747 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21748 Result = Builder.CreateCall(LrwOpScwLoop,
21749 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21750 } else {
21751 Result =
21752 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21753 }
21754
21755 if (XLen == 64)
21756 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21757 return Result;
21758}
21759
21762 AtomicCmpXchgInst *CI) const {
21763 // Don't expand forced atomics, we want to have __sync libcalls instead.
21764 if (Subtarget.hasForcedAtomics())
21766
21768 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21769 (Size == 8 || Size == 16))
21772}
21773
21775 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21776 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21777 unsigned XLen = Subtarget.getXLen();
21778 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21779 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21780 if (XLen == 64) {
21781 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21782 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21783 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21784 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21785 }
21786 Type *Tys[] = {AlignedAddr->getType()};
21787 Value *Result = Builder.CreateIntrinsic(
21788 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21789 if (XLen == 64)
21790 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21791 return Result;
21792}
21793
21795 EVT DataVT) const {
21796 // We have indexed loads for all supported EEW types. Indices are always
21797 // zero extended.
21798 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21799 isTypeLegal(Extend.getValueType()) &&
21800 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21801 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21802}
21803
21805 EVT VT) const {
21806 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21807 return false;
21808
21809 switch (FPVT.getSimpleVT().SimpleTy) {
21810 case MVT::f16:
21811 return Subtarget.hasStdExtZfhmin();
21812 case MVT::f32:
21813 return Subtarget.hasStdExtF();
21814 case MVT::f64:
21815 return Subtarget.hasStdExtD();
21816 default:
21817 return false;
21818 }
21819}
21820
21822 // If we are using the small code model, we can reduce size of jump table
21823 // entry to 4 bytes.
21824 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21827 }
21829}
21830
21832 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21833 unsigned uid, MCContext &Ctx) const {
21834 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21836 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21837}
21838
21840 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21841 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21842 // a power of two as well.
21843 // FIXME: This doesn't work for zve32, but that's already broken
21844 // elsewhere for the same reason.
21845 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21846 static_assert(RISCV::RVVBitsPerBlock == 64,
21847 "RVVBitsPerBlock changed, audit needed");
21848 return true;
21849}
21850
21852 SDValue &Offset,
21854 SelectionDAG &DAG) const {
21855 // Target does not support indexed loads.
21856 if (!Subtarget.hasVendorXTHeadMemIdx())
21857 return false;
21858
21859 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21860 return false;
21861
21862 Base = Op->getOperand(0);
21863 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21864 int64_t RHSC = RHS->getSExtValue();
21865 if (Op->getOpcode() == ISD::SUB)
21866 RHSC = -(uint64_t)RHSC;
21867
21868 // The constants that can be encoded in the THeadMemIdx instructions
21869 // are of the form (sign_extend(imm5) << imm2).
21870 bool isLegalIndexedOffset = false;
21871 for (unsigned i = 0; i < 4; i++)
21872 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21873 isLegalIndexedOffset = true;
21874 break;
21875 }
21876
21877 if (!isLegalIndexedOffset)
21878 return false;
21879
21880 Offset = Op->getOperand(1);
21881 return true;
21882 }
21883
21884 return false;
21885}
21886
21888 SDValue &Offset,
21890 SelectionDAG &DAG) const {
21891 EVT VT;
21892 SDValue Ptr;
21893 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21894 VT = LD->getMemoryVT();
21895 Ptr = LD->getBasePtr();
21896 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21897 VT = ST->getMemoryVT();
21898 Ptr = ST->getBasePtr();
21899 } else
21900 return false;
21901
21902 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21903 return false;
21904
21905 AM = ISD::PRE_INC;
21906 return true;
21907}
21908
21910 SDValue &Base,
21911 SDValue &Offset,
21913 SelectionDAG &DAG) const {
21914 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21915 if (Op->getOpcode() != ISD::ADD)
21916 return false;
21917
21918 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21919 Base = LS->getBasePtr();
21920 else
21921 return false;
21922
21923 if (Base == Op->getOperand(0))
21924 Offset = Op->getOperand(1);
21925 else if (Base == Op->getOperand(1))
21926 Offset = Op->getOperand(0);
21927 else
21928 return false;
21929
21930 AM = ISD::POST_INC;
21931 return true;
21932 }
21933
21934 EVT VT;
21935 SDValue Ptr;
21936 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21937 VT = LD->getMemoryVT();
21938 Ptr = LD->getBasePtr();
21939 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21940 VT = ST->getMemoryVT();
21941 Ptr = ST->getBasePtr();
21942 } else
21943 return false;
21944
21945 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21946 return false;
21947 // Post-indexing updates the base, so it's not a valid transform
21948 // if that's not the same as the load's pointer.
21949 if (Ptr != Base)
21950 return false;
21951
21952 AM = ISD::POST_INC;
21953 return true;
21954}
21955
21957 EVT VT) const {
21958 EVT SVT = VT.getScalarType();
21959
21960 if (!SVT.isSimple())
21961 return false;
21962
21963 switch (SVT.getSimpleVT().SimpleTy) {
21964 case MVT::f16:
21965 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21966 : Subtarget.hasStdExtZfhOrZhinx();
21967 case MVT::f32:
21968 return Subtarget.hasStdExtFOrZfinx();
21969 case MVT::f64:
21970 return Subtarget.hasStdExtDOrZdinx();
21971 default:
21972 break;
21973 }
21974
21975 return false;
21976}
21977
21979 // Zacas will use amocas.w which does not require extension.
21980 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21981}
21982
21984 const Constant *PersonalityFn) const {
21985 return RISCV::X10;
21986}
21987
21989 const Constant *PersonalityFn) const {
21990 return RISCV::X11;
21991}
21992
21994 // Return false to suppress the unnecessary extensions if the LibCall
21995 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21996 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21997 Type.getSizeInBits() < Subtarget.getXLen()))
21998 return false;
21999
22000 return true;
22001}
22002
22004 bool IsSigned) const {
22005 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
22006 return true;
22007
22008 return IsSigned;
22009}
22010
22012 SDValue C) const {
22013 // Check integral scalar types.
22014 if (!VT.isScalarInteger())
22015 return false;
22016
22017 // Omit the optimization if the sub target has the M extension and the data
22018 // size exceeds XLen.
22019 const bool HasZmmul = Subtarget.hasStdExtZmmul();
22020 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
22021 return false;
22022
22023 auto *ConstNode = cast<ConstantSDNode>(C);
22024 const APInt &Imm = ConstNode->getAPIntValue();
22025
22026 // Break the MUL to a SLLI and an ADD/SUB.
22027 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
22028 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
22029 return true;
22030
22031 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
22032 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
22033 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
22034 (Imm - 8).isPowerOf2()))
22035 return true;
22036
22037 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
22038 // a pair of LUI/ADDI.
22039 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
22040 ConstNode->hasOneUse()) {
22041 APInt ImmS = Imm.ashr(Imm.countr_zero());
22042 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
22043 (1 - ImmS).isPowerOf2())
22044 return true;
22045 }
22046
22047 return false;
22048}
22049
22051 SDValue ConstNode) const {
22052 // Let the DAGCombiner decide for vectors.
22053 EVT VT = AddNode.getValueType();
22054 if (VT.isVector())
22055 return true;
22056
22057 // Let the DAGCombiner decide for larger types.
22058 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
22059 return true;
22060
22061 // It is worse if c1 is simm12 while c1*c2 is not.
22062 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
22063 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
22064 const APInt &C1 = C1Node->getAPIntValue();
22065 const APInt &C2 = C2Node->getAPIntValue();
22066 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
22067 return false;
22068
22069 // Default to true and let the DAGCombiner decide.
22070 return true;
22071}
22072
22074 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
22075 unsigned *Fast) const {
22076 if (!VT.isVector()) {
22077 if (Fast)
22078 *Fast = Subtarget.enableUnalignedScalarMem();
22079 return Subtarget.enableUnalignedScalarMem();
22080 }
22081
22082 // All vector implementations must support element alignment
22083 EVT ElemVT = VT.getVectorElementType();
22084 if (Alignment >= ElemVT.getStoreSize()) {
22085 if (Fast)
22086 *Fast = 1;
22087 return true;
22088 }
22089
22090 // Note: We lower an unmasked unaligned vector access to an equally sized
22091 // e8 element type access. Given this, we effectively support all unmasked
22092 // misaligned accesses. TODO: Work through the codegen implications of
22093 // allowing such accesses to be formed, and considered fast.
22094 if (Fast)
22095 *Fast = Subtarget.enableUnalignedVectorMem();
22096 return Subtarget.enableUnalignedVectorMem();
22097}
22098
22099
22101 const AttributeList &FuncAttributes) const {
22102 if (!Subtarget.hasVInstructions())
22103 return MVT::Other;
22104
22105 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
22106 return MVT::Other;
22107
22108 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
22109 // has an expansion threshold, and we want the number of hardware memory
22110 // operations to correspond roughly to that threshold. LMUL>1 operations
22111 // are typically expanded linearly internally, and thus correspond to more
22112 // than one actual memory operation. Note that store merging and load
22113 // combining will typically form larger LMUL operations from the LMUL1
22114 // operations emitted here, and that's okay because combining isn't
22115 // introducing new memory operations; it's just merging existing ones.
22116 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
22117 if (Op.size() < MinVLenInBytes)
22118 // TODO: Figure out short memops. For the moment, do the default thing
22119 // which ends up using scalar sequences.
22120 return MVT::Other;
22121
22122 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
22123 // fixed vectors.
22124 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
22125 return MVT::Other;
22126
22127 // Prefer i8 for non-zero memset as it allows us to avoid materializing
22128 // a large scalar constant and instead use vmv.v.x/i to do the
22129 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
22130 // maximize the chance we can encode the size in the vsetvli.
22131 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
22132 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
22133
22134 // Do we have sufficient alignment for our preferred VT? If not, revert
22135 // to largest size allowed by our alignment criteria.
22136 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
22137 Align RequiredAlign(PreferredVT.getStoreSize());
22138 if (Op.isFixedDstAlign())
22139 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
22140 if (Op.isMemcpy())
22141 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
22142 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
22143 }
22144 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
22145}
22146
22148 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
22149 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
22150 bool IsABIRegCopy = CC.has_value();
22151 EVT ValueVT = Val.getValueType();
22152
22153 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22154 if ((ValueVT == PairVT ||
22155 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22156 ValueVT == MVT::f64)) &&
22157 NumParts == 1 && PartVT == MVT::Untyped) {
22158 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22159 MVT XLenVT = Subtarget.getXLenVT();
22160 if (ValueVT == MVT::f64)
22161 Val = DAG.getBitcast(MVT::i64, Val);
22162 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
22163 // Always creating an MVT::Untyped part, so always use
22164 // RISCVISD::BuildGPRPair.
22165 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
22166 return true;
22167 }
22168
22169 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22170 PartVT == MVT::f32) {
22171 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
22172 // nan, and cast to f32.
22173 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
22174 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
22175 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
22176 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
22177 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22178 Parts[0] = Val;
22179 return true;
22180 }
22181
22182 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
22183#ifndef NDEBUG
22184 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
22185 [[maybe_unused]] unsigned ValLMUL =
22187 ValNF * RISCV::RVVBitsPerBlock);
22188 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
22189 [[maybe_unused]] unsigned PartLMUL =
22191 PartNF * RISCV::RVVBitsPerBlock);
22192 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22193 "RISC-V vector tuple type only accepts same register class type "
22194 "TUPLE_INSERT");
22195#endif
22196
22197 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
22198 Val, DAG.getVectorIdxConstant(0, DL));
22199 Parts[0] = Val;
22200 return true;
22201 }
22202
22203 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22204 LLVMContext &Context = *DAG.getContext();
22205 EVT ValueEltVT = ValueVT.getVectorElementType();
22206 EVT PartEltVT = PartVT.getVectorElementType();
22207 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22208 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22209 if (PartVTBitSize % ValueVTBitSize == 0) {
22210 assert(PartVTBitSize >= ValueVTBitSize);
22211 // If the element types are different, bitcast to the same element type of
22212 // PartVT first.
22213 // Give an example here, we want copy a <vscale x 1 x i8> value to
22214 // <vscale x 4 x i16>.
22215 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
22216 // subvector, then we can bitcast to <vscale x 4 x i16>.
22217 if (ValueEltVT != PartEltVT) {
22218 if (PartVTBitSize > ValueVTBitSize) {
22219 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22220 assert(Count != 0 && "The number of element should not be zero.");
22221 EVT SameEltTypeVT =
22222 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22223 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
22224 DAG.getUNDEF(SameEltTypeVT), Val,
22225 DAG.getVectorIdxConstant(0, DL));
22226 }
22227 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22228 } else {
22229 Val =
22230 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
22231 Val, DAG.getVectorIdxConstant(0, DL));
22232 }
22233 Parts[0] = Val;
22234 return true;
22235 }
22236 }
22237
22238 return false;
22239}
22240
22242 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
22243 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
22244 bool IsABIRegCopy = CC.has_value();
22245
22246 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22247 if ((ValueVT == PairVT ||
22248 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22249 ValueVT == MVT::f64)) &&
22250 NumParts == 1 && PartVT == MVT::Untyped) {
22251 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22252 MVT XLenVT = Subtarget.getXLenVT();
22253
22254 SDValue Val = Parts[0];
22255 // Always starting with an MVT::Untyped part, so always use
22256 // RISCVISD::SplitGPRPair
22257 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
22258 Val);
22259 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
22260 Val.getValue(1));
22261 if (ValueVT == MVT::f64)
22262 Val = DAG.getBitcast(ValueVT, Val);
22263 return Val;
22264 }
22265
22266 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22267 PartVT == MVT::f32) {
22268 SDValue Val = Parts[0];
22269
22270 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
22271 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
22272 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
22273 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
22274 return Val;
22275 }
22276
22277 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22278 LLVMContext &Context = *DAG.getContext();
22279 SDValue Val = Parts[0];
22280 EVT ValueEltVT = ValueVT.getVectorElementType();
22281 EVT PartEltVT = PartVT.getVectorElementType();
22282 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22283 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22284 if (PartVTBitSize % ValueVTBitSize == 0) {
22285 assert(PartVTBitSize >= ValueVTBitSize);
22286 EVT SameEltTypeVT = ValueVT;
22287 // If the element types are different, convert it to the same element type
22288 // of PartVT.
22289 // Give an example here, we want copy a <vscale x 1 x i8> value from
22290 // <vscale x 4 x i16>.
22291 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22292 // then we can extract <vscale x 1 x i8>.
22293 if (ValueEltVT != PartEltVT) {
22294 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22295 assert(Count != 0 && "The number of element should not be zero.");
22296 SameEltTypeVT =
22297 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22298 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
22299 }
22300 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
22301 DAG.getVectorIdxConstant(0, DL));
22302 return Val;
22303 }
22304 }
22305 return SDValue();
22306}
22307
22309 // When aggressively optimizing for code size, we prefer to use a div
22310 // instruction, as it is usually smaller than the alternative sequence.
22311 // TODO: Add vector division?
22312 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22313 return OptSize && !VT.isVector();
22314}
22315
22317 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
22318 // some situation.
22319 unsigned Opc = N->getOpcode();
22320 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
22321 return false;
22322 return true;
22323}
22324
22325static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
22326 Module *M = IRB.GetInsertBlock()->getModule();
22327 Function *ThreadPointerFunc =
22328 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22329 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22330 IRB.CreateCall(ThreadPointerFunc), Offset);
22331}
22332
22334 // Fuchsia provides a fixed TLS slot for the stack cookie.
22335 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22336 if (Subtarget.isTargetFuchsia())
22337 return useTpOffset(IRB, -0x10);
22338
22339 // Android provides a fixed TLS slot for the stack cookie. See the definition
22340 // of TLS_SLOT_STACK_GUARD in
22341 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22342 if (Subtarget.isTargetAndroid())
22343 return useTpOffset(IRB, -0x18);
22344
22345 Module *M = IRB.GetInsertBlock()->getModule();
22346
22347 if (M->getStackProtectorGuard() == "tls") {
22348 // Users must specify the offset explicitly
22349 int Offset = M->getStackProtectorGuardOffset();
22350 return useTpOffset(IRB, Offset);
22351 }
22352
22354}
22355
22357 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22358 const DataLayout &DL) const {
22359 EVT VT = getValueType(DL, VTy);
22360 // Don't lower vlseg/vsseg for vector types that can't be split.
22361 if (!isTypeLegal(VT))
22362 return false;
22363
22365 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22366 Alignment))
22367 return false;
22368
22369 MVT ContainerVT = VT.getSimpleVT();
22370
22371 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22372 if (!Subtarget.useRVVForFixedLengthVectors())
22373 return false;
22374 // Sometimes the interleaved access pass picks up splats as interleaves of
22375 // one element. Don't lower these.
22376 if (FVTy->getNumElements() < 2)
22377 return false;
22378
22380 } else {
22381 // The intrinsics for scalable vectors are not overloaded on pointer type
22382 // and can only handle the default address space.
22383 if (AddrSpace)
22384 return false;
22385 }
22386
22387 // Need to make sure that EMUL * NFIELDS ≤ 8
22388 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22389 if (Fractional)
22390 return true;
22391 return Factor * LMUL <= 8;
22392}
22393
22395 Align Alignment) const {
22396 if (!Subtarget.hasVInstructions())
22397 return false;
22398
22399 // Only support fixed vectors if we know the minimum vector size.
22400 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22401 return false;
22402
22403 EVT ScalarType = DataType.getScalarType();
22404 if (!isLegalElementTypeForRVV(ScalarType))
22405 return false;
22406
22407 if (!Subtarget.enableUnalignedVectorMem() &&
22408 Alignment < ScalarType.getStoreSize())
22409 return false;
22410
22411 return true;
22412}
22413
22415 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22416 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22417 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22418 Intrinsic::riscv_seg8_load};
22419
22420/// Lower an interleaved load into a vlsegN intrinsic.
22421///
22422/// E.g. Lower an interleaved load (Factor = 2):
22423/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22424/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22425/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22426///
22427/// Into:
22428/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22429/// %ptr, i64 4)
22430/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22431/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22434 ArrayRef<unsigned> Indices, unsigned Factor) const {
22435 assert(Indices.size() == Shuffles.size());
22436
22437 IRBuilder<> Builder(LI);
22438
22439 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22440 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22442 LI->getDataLayout()))
22443 return false;
22444
22445 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22446
22447 // If the segment load is going to be performed segment at a time anyways
22448 // and there's only one element used, use a strided load instead. This
22449 // will be equally fast, and create less vector register pressure.
22450 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22451 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22452 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22453 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22454 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22455 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22456 Value *VL = Builder.getInt32(VTy->getNumElements());
22457
22458 CallInst *CI =
22459 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22460 {VTy, BasePtr->getType(), Stride->getType()},
22461 {BasePtr, Stride, Mask, VL});
22462 CI->addParamAttr(
22464 Shuffles[0]->replaceAllUsesWith(CI);
22465 return true;
22466 };
22467
22468 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22469
22470 CallInst *VlsegN = Builder.CreateIntrinsic(
22471 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22472 {LI->getPointerOperand(), VL});
22473
22474 for (unsigned i = 0; i < Shuffles.size(); i++) {
22475 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22476 Shuffles[i]->replaceAllUsesWith(SubVec);
22477 }
22478
22479 return true;
22480}
22481
22483 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22484 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22485 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22486 Intrinsic::riscv_seg8_store};
22487
22488/// Lower an interleaved store into a vssegN intrinsic.
22489///
22490/// E.g. Lower an interleaved store (Factor = 3):
22491/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22492/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22493/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22494///
22495/// Into:
22496/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22497/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22498/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22499/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22500/// %ptr, i32 4)
22501///
22502/// Note that the new shufflevectors will be removed and we'll only generate one
22503/// vsseg3 instruction in CodeGen.
22505 ShuffleVectorInst *SVI,
22506 unsigned Factor) const {
22507 IRBuilder<> Builder(SI);
22508 auto Mask = SVI->getShuffleMask();
22509 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22510 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22511 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22512 ShuffleVTy->getNumElements() / Factor);
22513 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22514 SI->getPointerAddressSpace(),
22515 SI->getDataLayout()))
22516 return false;
22517
22518 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22519
22520 unsigned Index;
22521 // If the segment store only has one active lane (i.e. the interleave is
22522 // just a spread shuffle), we can use a strided store instead. This will
22523 // be equally fast, and create less vector register pressure.
22524 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22525 isSpreadMask(Mask, Factor, Index)) {
22526 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22527 Value *Data = SVI->getOperand(0);
22528 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22529 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22530 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22531 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22532 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22533 Value *VL = Builder.getInt32(VTy->getNumElements());
22534
22535 CallInst *CI = Builder.CreateIntrinsic(
22536 Intrinsic::experimental_vp_strided_store,
22537 {Data->getType(), BasePtr->getType(), Stride->getType()},
22538 {Data, BasePtr, Stride, Mask, VL});
22539 CI->addParamAttr(
22540 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22541
22542 return true;
22543 }
22544
22546 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22547 {VTy, SI->getPointerOperandType(), XLenTy});
22548
22550
22551 for (unsigned i = 0; i < Factor; i++) {
22552 Value *Shuffle = Builder.CreateShuffleVector(
22553 SVI->getOperand(0), SVI->getOperand(1),
22554 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22555 Ops.push_back(Shuffle);
22556 }
22557 // This VL should be OK (should be executable in one vsseg instruction,
22558 // potentially under larger LMULs) because we checked that the fixed vector
22559 // type fits in isLegalInterleavedAccessType
22560 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22561 Ops.append({SI->getPointerOperand(), VL});
22562
22563 Builder.CreateCall(VssegNFunc, Ops);
22564
22565 return true;
22566}
22567
22569 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
22570 unsigned Factor = DeinterleaveValues.size();
22571 if (Factor > 8)
22572 return false;
22573
22574 assert(LI->isSimple());
22575 IRBuilder<> Builder(LI);
22576
22577 auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->getType());
22578
22579 const DataLayout &DL = LI->getDataLayout();
22580
22581 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22582 LI->getPointerAddressSpace(), DL))
22583 return false;
22584
22585 Value *Return;
22586 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22587
22588 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22589 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22590 Return =
22591 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22592 {ResVTy, LI->getPointerOperandType(), XLenTy},
22593 {LI->getPointerOperand(), VL});
22594 } else {
22595 static const Intrinsic::ID IntrIds[] = {
22596 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22597 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22598 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22599 Intrinsic::riscv_vlseg8};
22600
22601 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22602 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22603 Type *VecTupTy = TargetExtType::get(
22604 LI->getContext(), "riscv.vector.tuple",
22606 NumElts * SEW / 8),
22607 Factor);
22608
22609 Value *VL = Constant::getAllOnesValue(XLenTy);
22610
22611 Value *Vlseg = Builder.CreateIntrinsic(
22612 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22613 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22614 ConstantInt::get(XLenTy, Log2_64(SEW))});
22615
22616 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22617 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22618 for (unsigned i = 0; i < Factor; ++i) {
22619 Value *VecExtract = Builder.CreateIntrinsic(
22620 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22621 {Vlseg, Builder.getInt32(i)});
22622 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22623 }
22624 }
22625
22626 for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
22627 // We have to create a brand new ExtractValue to replace each
22628 // of these old ExtractValue instructions.
22629 Value *NewEV =
22630 Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
22631 DIV->replaceAllUsesWith(NewEV);
22632 }
22633
22634 return true;
22635}
22636
22638 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
22639 unsigned Factor = InterleaveValues.size();
22640 if (Factor > 8)
22641 return false;
22642
22643 assert(SI->isSimple());
22644 IRBuilder<> Builder(SI);
22645
22646 auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
22647 const DataLayout &DL = SI->getDataLayout();
22648
22649 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22650 SI->getPointerAddressSpace(), DL))
22651 return false;
22652
22653 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22654
22655 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22657 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22658 {InVTy, SI->getPointerOperandType(), XLenTy});
22659
22660 SmallVector<Value *, 10> Ops(InterleaveValues.begin(),
22661 InterleaveValues.end());
22662 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22663 Ops.append({SI->getPointerOperand(), VL});
22664
22665 Builder.CreateCall(VssegNFunc, Ops);
22666 } else {
22667 static const Intrinsic::ID IntrIds[] = {
22668 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22669 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22670 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22671 Intrinsic::riscv_vsseg8};
22672
22673 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22674 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22675 Type *VecTupTy = TargetExtType::get(
22676 SI->getContext(), "riscv.vector.tuple",
22677 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22678 NumElts * SEW / 8),
22679 Factor);
22680
22682 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22683
22684 Value *VL = Constant::getAllOnesValue(XLenTy);
22685
22686 Value *StoredVal = PoisonValue::get(VecTupTy);
22687 for (unsigned i = 0; i < Factor; ++i)
22688 StoredVal = Builder.CreateIntrinsic(
22689 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22690 {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
22691
22692 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22693 ConstantInt::get(XLenTy, Log2_64(SEW))});
22694 }
22695
22696 return true;
22697}
22698
22702 const TargetInstrInfo *TII) const {
22703 assert(MBBI->isCall() && MBBI->getCFIType() &&
22704 "Invalid call instruction for a KCFI check");
22705 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22706 MBBI->getOpcode()));
22707
22708 MachineOperand &Target = MBBI->getOperand(0);
22709 Target.setIsRenamable(false);
22710
22711 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22712 .addReg(Target.getReg())
22713 .addImm(MBBI->getCFIType())
22714 .getInstr();
22715}
22716
22717#define GET_REGISTER_MATCHER
22718#include "RISCVGenAsmMatcher.inc"
22719
22722 const MachineFunction &MF) const {
22724 if (Reg == RISCV::NoRegister)
22726 if (Reg == RISCV::NoRegister)
22728 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22729 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22730 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22731 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22732 StringRef(RegName) + "\"."));
22733 return Reg;
22734}
22735
22738 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22739
22740 if (NontemporalInfo == nullptr)
22742
22743 // 1 for default value work as __RISCV_NTLH_ALL
22744 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22745 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22746 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22747 // 5 -> __RISCV_NTLH_ALL
22748 int NontemporalLevel = 5;
22749 const MDNode *RISCVNontemporalInfo =
22750 I.getMetadata("riscv-nontemporal-domain");
22751 if (RISCVNontemporalInfo != nullptr)
22752 NontemporalLevel =
22753 cast<ConstantInt>(
22754 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22755 ->getValue())
22756 ->getZExtValue();
22757
22758 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22759 "RISC-V target doesn't support this non-temporal domain.");
22760
22761 NontemporalLevel -= 2;
22763 if (NontemporalLevel & 0b1)
22764 Flags |= MONontemporalBit0;
22765 if (NontemporalLevel & 0b10)
22766 Flags |= MONontemporalBit1;
22767
22768 return Flags;
22769}
22770
22773
22774 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22776 TargetFlags |= (NodeFlags & MONontemporalBit0);
22777 TargetFlags |= (NodeFlags & MONontemporalBit1);
22778 return TargetFlags;
22779}
22780
22782 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22783 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22784}
22785
22787 if (VT.isScalableVector())
22788 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22789 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22790 return true;
22791 return Subtarget.hasStdExtZbb() &&
22792 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22793}
22794
22796 ISD::CondCode Cond) const {
22797 return isCtpopFast(VT) ? 0 : 1;
22798}
22799
22801 const Instruction *I) const {
22802 if (Subtarget.hasStdExtZalasr()) {
22803 if (Subtarget.hasStdExtZtso()) {
22804 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22805 // should be lowered to plain load/store. The easiest way to do this is
22806 // to say we should insert fences for them, and the fence insertion code
22807 // will just not insert any fences
22808 auto *LI = dyn_cast<LoadInst>(I);
22809 auto *SI = dyn_cast<StoreInst>(I);
22810 if ((LI &&
22811 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22812 (SI &&
22813 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22814 // Here, this is a load or store which is seq_cst, and needs a .aq or
22815 // .rl therefore we shouldn't try to insert fences
22816 return false;
22817 }
22818 // Here, we are a TSO inst that isn't a seq_cst load/store
22819 return isa<LoadInst>(I) || isa<StoreInst>(I);
22820 }
22821 return false;
22822 }
22823 // Note that one specific case requires fence insertion for an
22824 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22825 // than this hook due to limitations in the interface here.
22826 return isa<LoadInst>(I) || isa<StoreInst>(I);
22827}
22828
22830
22831 // GISel support is in progress or complete for these opcodes.
22832 unsigned Op = Inst.getOpcode();
22833 if (Op == Instruction::Add || Op == Instruction::Sub ||
22834 Op == Instruction::And || Op == Instruction::Or ||
22835 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22836 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22837 Op == Instruction::Freeze || Op == Instruction::Store)
22838 return false;
22839
22840 if (Inst.getType()->isScalableTy())
22841 return true;
22842
22843 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22844 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22845 !isa<ReturnInst>(&Inst))
22846 return true;
22847
22848 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22849 if (AI->getAllocatedType()->isScalableTy())
22850 return true;
22851 }
22852
22853 return false;
22854}
22855
22856SDValue
22857RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22858 SelectionDAG &DAG,
22859 SmallVectorImpl<SDNode *> &Created) const {
22861 if (isIntDivCheap(N->getValueType(0), Attr))
22862 return SDValue(N, 0); // Lower SDIV as SDIV
22863
22864 // Only perform this transform if short forward branch opt is supported.
22865 if (!Subtarget.hasShortForwardBranchOpt())
22866 return SDValue();
22867 EVT VT = N->getValueType(0);
22868 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22869 return SDValue();
22870
22871 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22872 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22873 return SDValue();
22874 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22875}
22876
22877bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22878 EVT VT, const APInt &AndMask) const {
22879 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22880 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22882}
22883
22884unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22885 return Subtarget.getMinimumJumpTableEntries();
22886}
22887
22890 int JTI,
22891 SelectionDAG &DAG) const {
22892 if (Subtarget.hasStdExtZicfilp()) {
22893 // When Zicfilp enabled, we need to use software guarded branch for jump
22894 // table branch.
22895 SDValue Chain = Value;
22896 // Jump table debug info is only needed if CodeView is enabled.
22898 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22899 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22900 }
22901 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22902}
22903
22904// If an output pattern produces multiple instructions tablegen may pick an
22905// arbitrary type from an instructions destination register class to use for the
22906// VT of that MachineSDNode. This VT may be used to look up the representative
22907// register class. If the type isn't legal, the default implementation will
22908// not find a register class.
22909//
22910// Some integer types smaller than XLen are listed in the GPR register class to
22911// support isel patterns for GISel, but are not legal in SelectionDAG. The
22912// arbitrary type tablegen picks may be one of these smaller types.
22913//
22914// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22915// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22916std::pair<const TargetRegisterClass *, uint8_t>
22917RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22918 MVT VT) const {
22919 switch (VT.SimpleTy) {
22920 default:
22921 break;
22922 case MVT::i8:
22923 case MVT::i16:
22924 case MVT::i32:
22926 case MVT::bf16:
22927 case MVT::f16:
22929 }
22930
22932}
22933
22935
22936#define GET_RISCVVIntrinsicsTable_IMPL
22937#include "RISCVGenSearchableTables.inc"
22938
22939} // namespace llvm::RISCVVIntrinsicsTable
22940
22942
22943 // If the function specifically requests inline stack probes, emit them.
22944 if (MF.getFunction().hasFnAttribute("probe-stack"))
22945 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22946 "inline-asm";
22947
22948 return false;
22949}
22950
22952 Align StackAlign) const {
22953 // The default stack probe size is 4096 if the function has no
22954 // stack-probe-size attribute.
22955 const Function &Fn = MF.getFunction();
22956 unsigned StackProbeSize =
22957 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22958 // Round down to the stack alignment.
22959 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22960 return StackProbeSize ? StackProbeSize : StackAlign.value();
22961}
22962
22963SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
22964 SelectionDAG &DAG) const {
22966 if (!hasInlineStackProbe(MF))
22967 return SDValue();
22968
22969 MVT XLenVT = Subtarget.getXLenVT();
22970 // Get the inputs.
22971 SDValue Chain = Op.getOperand(0);
22972 SDValue Size = Op.getOperand(1);
22973
22975 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
22976 SDLoc dl(Op);
22977 EVT VT = Op.getValueType();
22978
22979 // Construct the new SP value in a GPR.
22980 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
22981 Chain = SP.getValue(1);
22982 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
22983 if (Align)
22984 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
22985 DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
22986
22987 // Set the real SP to the new value with a probing loop.
22988 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
22989 return DAG.getMergeValues({SP, Chain}, dl);
22990}
22991
22994 MachineBasicBlock *MBB) const {
22995 MachineFunction &MF = *MBB->getParent();
22996 MachineBasicBlock::iterator MBBI = MI.getIterator();
22998 Register TargetReg = MI.getOperand(1).getReg();
22999
23000 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
23001 bool IsRV64 = Subtarget.is64Bit();
23002 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
23003 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
23004 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
23005
23006 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
23007 MachineBasicBlock *LoopTestMBB =
23009 MF.insert(MBBInsertPoint, LoopTestMBB);
23011 MF.insert(MBBInsertPoint, ExitMBB);
23012 Register SPReg = RISCV::X2;
23013 Register ScratchReg =
23014 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
23015
23016 // ScratchReg = ProbeSize
23017 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
23018
23019 // LoopTest:
23020 // SUB SP, SP, ProbeSize
23021 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
23022 .addReg(SPReg)
23023 .addReg(ScratchReg);
23024
23025 // s[d|w] zero, 0(sp)
23026 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
23027 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
23028 .addReg(RISCV::X0)
23029 .addReg(SPReg)
23030 .addImm(0);
23031
23032 // BLT TargetReg, SP, LoopTest
23033 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
23034 .addReg(TargetReg)
23035 .addReg(SPReg)
23036 .addMBB(LoopTestMBB);
23037
23038 // Adjust with: MV SP, TargetReg.
23039 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
23040 .addReg(TargetReg)
23041 .addImm(0);
23042
23043 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
23045
23046 LoopTestMBB->addSuccessor(ExitMBB);
23047 LoopTestMBB->addSuccessor(LoopTestMBB);
23048 MBB->addSuccessor(LoopTestMBB);
23049
23050 MI.eraseFromParent();
23051 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
23052 return ExitMBB->begin()->getParent();
23053}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr Register SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:396
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:365
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:898
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:306
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:530
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1887
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1842
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:867
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:71
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:406
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
const RISCVFrameLowering * getFrameLowering() const override
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool lowerInterleaveIntrinsicToStore(StoreInst *SI, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, ArrayRef< Value * > DeinterleaveValues) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:802
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:397
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:761
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:505
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:891
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:797
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:874
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:907
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:87
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:758
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1572
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1572
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1655
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:732
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:556
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:395
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:404
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)