LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/IntrinsicsRISCV.h"
45#include "llvm/Support/Debug.h"
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "riscv-lower"
56
57STATISTIC(NumTailCalls, "Number of tail calls");
58
60 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
61 cl::desc("Give the maximum size (in number of nodes) of the web of "
62 "instructions that we will consider for VW expansion"),
63 cl::init(18));
64
65static cl::opt<bool>
66 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
67 cl::desc("Allow the formation of VW_W operations (e.g., "
68 "VWADD_W) with splat constants"),
69 cl::init(false));
70
72 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
73 cl::desc("Set the minimum number of repetitions of a divisor to allow "
74 "transformation to multiplications by the reciprocal"),
75 cl::init(2));
76
77static cl::opt<int>
79 cl::desc("Give the maximum number of instructions that we will "
80 "use for creating a floating-point immediate value"),
81 cl::init(2));
82
83static cl::opt<bool>
84 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
85 cl::desc("Swap add and addi in cases where the add may "
86 "be combined with a shift"),
87 cl::init(true));
88
90 const RISCVSubtarget &STI)
91 : TargetLowering(TM), Subtarget(STI) {
92
93 RISCVABI::ABI ABI = Subtarget.getTargetABI();
94 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
95
96 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
97 !Subtarget.hasStdExtF()) {
98 errs() << "Hard-float 'f' ABI can't be used for a target that "
99 "doesn't support the F instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
103 !Subtarget.hasStdExtD()) {
104 errs() << "Hard-float 'd' ABI can't be used for a target that "
105 "doesn't support the D instruction set extension (ignoring "
106 "target-abi)\n";
107 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
108 }
109
110 switch (ABI) {
111 default:
112 reportFatalUsageError("Don't know how to lower this ABI");
121 break;
122 }
123
124 MVT XLenVT = Subtarget.getXLenVT();
125
126 // Set up the register classes.
127 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
128
129 if (Subtarget.hasStdExtZfhmin())
130 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
131 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
132 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
133 if (Subtarget.hasStdExtF())
134 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
135 if (Subtarget.hasStdExtD())
136 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
137 if (Subtarget.hasStdExtZhinxmin())
138 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
139 if (Subtarget.hasStdExtZfinx())
140 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
141 if (Subtarget.hasStdExtZdinx()) {
142 if (Subtarget.is64Bit())
143 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
144 else
145 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
146 }
147
148 static const MVT::SimpleValueType BoolVecVTs[] = {
149 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
150 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
151 static const MVT::SimpleValueType IntVecVTs[] = {
152 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
153 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
154 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
155 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
156 MVT::nxv4i64, MVT::nxv8i64};
157 static const MVT::SimpleValueType F16VecVTs[] = {
158 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
159 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
160 static const MVT::SimpleValueType BF16VecVTs[] = {
161 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
162 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
163 static const MVT::SimpleValueType F32VecVTs[] = {
164 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
165 static const MVT::SimpleValueType F64VecVTs[] = {
166 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
167 static const MVT::SimpleValueType VecTupleVTs[] = {
168 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
169 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
170 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
171 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
172 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
173 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
174 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
175 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
176 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
177 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
178 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
179
180 if (Subtarget.hasVInstructions()) {
181 auto addRegClassForRVV = [this](MVT VT) {
182 // Disable the smallest fractional LMUL types if ELEN is less than
183 // RVVBitsPerBlock.
184 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
185 if (VT.getVectorMinNumElements() < MinElts)
186 return;
187
188 unsigned Size = VT.getSizeInBits().getKnownMinValue();
189 const TargetRegisterClass *RC;
191 RC = &RISCV::VRRegClass;
192 else if (Size == 2 * RISCV::RVVBitsPerBlock)
193 RC = &RISCV::VRM2RegClass;
194 else if (Size == 4 * RISCV::RVVBitsPerBlock)
195 RC = &RISCV::VRM4RegClass;
196 else if (Size == 8 * RISCV::RVVBitsPerBlock)
197 RC = &RISCV::VRM8RegClass;
198 else
199 llvm_unreachable("Unexpected size");
200
201 addRegisterClass(VT, RC);
202 };
203
204 for (MVT VT : BoolVecVTs)
205 addRegClassForRVV(VT);
206 for (MVT VT : IntVecVTs) {
207 if (VT.getVectorElementType() == MVT::i64 &&
208 !Subtarget.hasVInstructionsI64())
209 continue;
210 addRegClassForRVV(VT);
211 }
212
213 if (Subtarget.hasVInstructionsF16Minimal() ||
214 Subtarget.hasVendorXAndesVPackFPH())
215 for (MVT VT : F16VecVTs)
216 addRegClassForRVV(VT);
217
218 if (Subtarget.hasVInstructionsBF16Minimal() ||
219 Subtarget.hasVendorXAndesVBFHCvt())
220 for (MVT VT : BF16VecVTs)
221 addRegClassForRVV(VT);
222
223 if (Subtarget.hasVInstructionsF32())
224 for (MVT VT : F32VecVTs)
225 addRegClassForRVV(VT);
226
227 if (Subtarget.hasVInstructionsF64())
228 for (MVT VT : F64VecVTs)
229 addRegClassForRVV(VT);
230
231 if (Subtarget.useRVVForFixedLengthVectors()) {
232 auto addRegClassForFixedVectors = [this](MVT VT) {
233 MVT ContainerVT = getContainerForFixedLengthVector(VT);
234 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
235 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
236 addRegisterClass(VT, TRI.getRegClass(RCID));
237 };
239 if (useRVVForFixedLengthVectorVT(VT))
240 addRegClassForFixedVectors(VT);
241
243 if (useRVVForFixedLengthVectorVT(VT))
244 addRegClassForFixedVectors(VT);
245 }
246
247 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
248 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
254 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
261 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
268 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
275 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
278 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
279 }
280
281 // Compute derived properties from the register classes.
283
285
287 MVT::i1, Promote);
288 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
290 MVT::i1, Promote);
291
292 // TODO: add all necessary setOperationAction calls.
294
299
304 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
307 }
308
310
313
314 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
315 !Subtarget.hasVendorXAndesPerf())
317
319
320 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
321 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
322 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
323 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
324
325 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
328 }
329
330 if (Subtarget.is64Bit()) {
332
335 MVT::i32, Custom);
337 if (!Subtarget.hasStdExtZbb())
340 Custom);
342 }
343 if (!Subtarget.hasStdExtZmmul()) {
345 } else if (Subtarget.is64Bit()) {
348 } else {
350 }
351
352 if (!Subtarget.hasStdExtM()) {
354 Expand);
355 } else if (Subtarget.is64Bit()) {
357 {MVT::i8, MVT::i16, MVT::i32}, Custom);
358 }
359
362 Expand);
363
365 Custom);
366
367 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
368 if (Subtarget.is64Bit())
370 } else if (Subtarget.hasVendorXTHeadBb()) {
371 if (Subtarget.is64Bit())
374 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
376 } else {
378 }
379
380 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
381 // pattern match it directly in isel.
383 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
384 Subtarget.hasVendorXTHeadBb())
385 ? Legal
386 : Expand);
387
388 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
389 !Subtarget.is64Bit()) {
391 } else {
392 // Zbkb can use rev8+brev8 to implement bitreverse.
394 Subtarget.hasStdExtZbkb() ? Custom : Expand);
395 if (Subtarget.hasStdExtZbkb())
397 }
398
399 if (Subtarget.hasStdExtZbb() ||
400 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
402 Legal);
403 }
404
405 if (Subtarget.hasStdExtZbb() ||
406 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
407 if (Subtarget.is64Bit())
409 } else {
411 // TODO: These should be set to LibCall, but this currently breaks
412 // the Linux kernel build. See #101786. Lacks i128 tests, too.
413 if (Subtarget.is64Bit())
415 else
418 }
419
420 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
421 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
422 // We need the custom lowering to make sure that the resulting sequence
423 // for the 32bit case is efficient on 64bit targets.
424 if (Subtarget.is64Bit())
426 } else {
428 }
429
430 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
432 } else if (Subtarget.hasShortForwardBranchOpt()) {
433 // We can use PseudoCCSUB to implement ABS.
435 } else if (Subtarget.is64Bit()) {
437 }
438
439 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
440 !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
442
443 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
450 }
451
452 static const unsigned FPLegalNodeTypes[] = {
460
461 static const ISD::CondCode FPCCToExpand[] = {
465
466 static const unsigned FPOpToExpand[] = {
468 ISD::FREM};
469
470 static const unsigned FPRndMode[] = {
473
474 static const unsigned ZfhminZfbfminPromoteOps[] = {
484
485 if (Subtarget.hasStdExtZfbfmin()) {
491 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
498 }
499
500 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
501 if (Subtarget.hasStdExtZfhOrZhinx()) {
502 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
503 setOperationAction(FPRndMode, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
507 Subtarget.hasStdExtZfa() ? Legal : Custom);
508 if (Subtarget.hasStdExtZfa())
510 } else {
511 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
516 setOperationAction(Op, MVT::f16, Custom);
522 }
523
525
528 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
532
534 ISD::FNEARBYINT, MVT::f16,
535 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
540 MVT::f16, Promote);
541
542 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
543 // complete support for all operations in LegalizeDAG.
548 MVT::f16, Promote);
549
550 // We need to custom promote this.
551 if (Subtarget.is64Bit())
553 }
554
555 if (Subtarget.hasStdExtFOrZfinx()) {
556 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
557 setOperationAction(FPRndMode, MVT::f32,
558 Subtarget.hasStdExtZfa() ? Legal : Custom);
559 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
563 setOperationAction(FPOpToExpand, MVT::f32, Expand);
564 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
565 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
566 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
567 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
571 Subtarget.isSoftFPABI() ? LibCall : Custom);
576
577 if (Subtarget.hasStdExtZfa()) {
581 } else {
583 }
584 }
585
586 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
588
589 if (Subtarget.hasStdExtDOrZdinx()) {
590 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
591
592 if (!Subtarget.is64Bit())
594
595 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
596 !Subtarget.is64Bit()) {
599 }
600
601 if (Subtarget.hasStdExtZfa()) {
603 setOperationAction(FPRndMode, MVT::f64, Legal);
606 } else {
607 if (Subtarget.is64Bit())
608 setOperationAction(FPRndMode, MVT::f64, Custom);
609
611 }
612
615 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
621 setOperationAction(FPOpToExpand, MVT::f64, Expand);
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
623 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
624 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
625 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
629 Subtarget.isSoftFPABI() ? LibCall : Custom);
634 }
635
636 if (Subtarget.is64Bit()) {
639 MVT::i32, Custom);
641 }
642
643 if (Subtarget.hasStdExtFOrZfinx()) {
645 Custom);
646
647 // f16/bf16 require custom handling.
649 Custom);
651 Custom);
652
661 }
662
665 XLenVT, Custom);
666
668
669 if (Subtarget.is64Bit())
671
672 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
673 // Unfortunately this can't be determined just from the ISA naming string.
675 Subtarget.is64Bit() ? Legal : Custom);
677 Subtarget.is64Bit() ? Legal : Custom);
678
679 if (Subtarget.is64Bit()) {
682 }
683
686 if (Subtarget.is64Bit())
688
689 if (Subtarget.hasVendorXMIPSCBOP())
691 else if (Subtarget.hasStdExtZicbop())
693
694 if (Subtarget.hasStdExtA()) {
696 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
698 else
700 } else if (Subtarget.hasForcedAtomics()) {
702 } else {
704 }
705
707
709
710 if (getTargetMachine().getTargetTriple().isOSLinux()) {
711 // Custom lowering of llvm.clear_cache.
713 }
714
715 if (Subtarget.hasVInstructions()) {
717
719
720 // RVV intrinsics may have illegal operands.
721 // We also need to custom legalize vmv.x.s.
724 {MVT::i8, MVT::i16}, Custom);
725 if (Subtarget.is64Bit())
727 MVT::i32, Custom);
728 else
730 MVT::i64, Custom);
731
733 MVT::Other, Custom);
734
735 static const unsigned IntegerVPOps[] = {
736 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
737 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
738 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
739 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
740 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
741 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
742 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
743 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
744 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
745 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
746 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
747 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
748 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
749 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
750 ISD::EXPERIMENTAL_VP_SPLAT};
751
752 static const unsigned FloatingPointVPOps[] = {
753 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
754 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
755 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
756 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
757 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
758 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
759 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
760 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
761 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
762 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
763 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
764 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
765 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
766
767 static const unsigned IntegerVecReduceOps[] = {
771
772 static const unsigned FloatingPointVecReduceOps[] = {
775
776 static const unsigned FloatingPointLibCallOps[] = {
779
780 if (!Subtarget.is64Bit()) {
781 // We must custom-lower certain vXi64 operations on RV32 due to the vector
782 // element type being illegal.
784 MVT::i64, Custom);
785
786 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
787
788 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
789 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
790 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
791 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
792 MVT::i64, Custom);
793 }
794
795 for (MVT VT : BoolVecVTs) {
796 if (!isTypeLegal(VT))
797 continue;
798
800
801 // Mask VTs are custom-expanded into a series of standard nodes
805 VT, Custom);
806
808 Custom);
809
811 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
812 Expand);
813 setOperationAction(ISD::VP_MERGE, VT, Custom);
814
815 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
816 Custom);
817
818 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
819
822 Custom);
823
825 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
826 Custom);
827
828 // RVV has native int->float & float->int conversions where the
829 // element type sizes are within one power-of-two of each other. Any
830 // wider distances between type sizes have to be lowered as sequences
831 // which progressively narrow the gap in stages.
836 VT, Custom);
838 Custom);
839
840 // Expand all extending loads to types larger than this, and truncating
841 // stores from types larger than this.
843 setTruncStoreAction(VT, OtherVT, Expand);
845 OtherVT, Expand);
846 }
847
848 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
849 ISD::VP_TRUNCATE, ISD::VP_SETCC},
850 VT, Custom);
851
854
856
857 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
858 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
859 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
860
863 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
864 }
865
866 for (MVT VT : IntVecVTs) {
867 if (!isTypeLegal(VT))
868 continue;
869
872
873 // Vectors implement MULHS/MULHU.
875
876 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
877 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
879
881 Legal);
882
884
885 // Custom-lower extensions and truncations from/to mask types.
887 VT, Custom);
888
889 // RVV has native int->float & float->int conversions where the
890 // element type sizes are within one power-of-two of each other. Any
891 // wider distances between type sizes have to be lowered as sequences
892 // which progressively narrow the gap in stages.
897 VT, Custom);
899 Custom);
903 VT, Legal);
904
905 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
906 // nodes which truncate by one power of two at a time.
909 Custom);
910
911 // Custom-lower insert/extract operations to simplify patterns.
913 Custom);
914
915 // Custom-lower reduction operations to set up the corresponding custom
916 // nodes' operands.
917 setOperationAction(IntegerVecReduceOps, VT, Custom);
918
919 setOperationAction(IntegerVPOps, VT, Custom);
920
922
924 VT, Custom);
925
927 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
928 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
929 VT, Custom);
930 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
931
934 VT, Custom);
935
938
940
942 setTruncStoreAction(VT, OtherVT, Expand);
944 OtherVT, Expand);
945 }
946
949
950 // Splice
952
953 if (Subtarget.hasStdExtZvkb()) {
955 setOperationAction(ISD::VP_BSWAP, VT, Custom);
956 } else {
957 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
959 }
960
961 if (Subtarget.hasStdExtZvbb()) {
963 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
964 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
965 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
966 VT, Custom);
967 } else {
968 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
970 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
971 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
972 VT, Expand);
973
974 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
975 // range of f32.
976 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
977 if (isTypeLegal(FloatVT)) {
979 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
980 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
981 VT, Custom);
982 }
983 }
984
986 }
987
988 for (MVT VT : VecTupleVTs) {
989 if (!isTypeLegal(VT))
990 continue;
991
993 }
994
995 // Expand various CCs to best match the RVV ISA, which natively supports UNE
996 // but no other unordered comparisons, and supports all ordered comparisons
997 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
998 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
999 // and we pattern-match those back to the "original", swapping operands once
1000 // more. This way we catch both operations and both "vf" and "fv" forms with
1001 // fewer patterns.
1002 static const ISD::CondCode VFPCCToExpand[] = {
1006 };
1007
1008 // TODO: support more ops.
1009 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1014 ISD::FADD,
1015 ISD::FSUB,
1016 ISD::FMUL,
1017 ISD::FMA,
1018 ISD::FDIV,
1019 ISD::FSQRT,
1020 ISD::FCEIL,
1025 ISD::FRINT,
1028 ISD::SETCC,
1041
1042 // TODO: support more vp ops.
1043 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1044 ISD::VP_FADD,
1045 ISD::VP_FSUB,
1046 ISD::VP_FMUL,
1047 ISD::VP_FDIV,
1048 ISD::VP_FMA,
1049 ISD::VP_REDUCE_FMIN,
1050 ISD::VP_REDUCE_FMAX,
1051 ISD::VP_SQRT,
1052 ISD::VP_FMINNUM,
1053 ISD::VP_FMAXNUM,
1054 ISD::VP_FCEIL,
1055 ISD::VP_FFLOOR,
1056 ISD::VP_FROUND,
1057 ISD::VP_FROUNDEVEN,
1058 ISD::VP_FROUNDTOZERO,
1059 ISD::VP_FRINT,
1060 ISD::VP_FNEARBYINT,
1061 ISD::VP_SETCC,
1062 ISD::VP_FMINIMUM,
1063 ISD::VP_FMAXIMUM,
1064 ISD::VP_REDUCE_FMINIMUM,
1065 ISD::VP_REDUCE_FMAXIMUM};
1066
1067 // Sets common operation actions on RVV floating-point vector types.
1068 const auto SetCommonVFPActions = [&](MVT VT) {
1070 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1071 // sizes are within one power-of-two of each other. Therefore conversions
1072 // between vXf16 and vXf64 must be lowered as sequences which convert via
1073 // vXf32.
1077 // Custom-lower insert/extract operations to simplify patterns.
1079 Custom);
1080 // Expand various condition codes (explained above).
1081 setCondCodeAction(VFPCCToExpand, VT, Expand);
1082
1085 Legal);
1087
1091 VT, Custom);
1092
1093 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1094
1095 // Expand FP operations that need libcalls.
1096 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1097
1099
1101
1103 VT, Custom);
1104
1106 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1107 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1108 VT, Custom);
1109 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1110
1113
1116 VT, Custom);
1117
1120
1122 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1123 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1124
1125 setOperationAction(FloatingPointVPOps, VT, Custom);
1126
1128 Custom);
1131 VT, Legal);
1136 VT, Custom);
1137
1139 };
1140
1141 // Sets common extload/truncstore actions on RVV floating-point vector
1142 // types.
1143 const auto SetCommonVFPExtLoadTruncStoreActions =
1144 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1145 for (auto SmallVT : SmallerVTs) {
1146 setTruncStoreAction(VT, SmallVT, Expand);
1147 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1148 }
1149 };
1150
1151 // Sets common actions for f16 and bf16 for when there's only
1152 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1153 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1156 Custom);
1157 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1160 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1161 Custom);
1163 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1169 VT, Custom);
1170 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1171 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1172 MVT EltVT = VT.getVectorElementType();
1173 if (isTypeLegal(EltVT))
1174 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1176 VT, Custom);
1177 else
1178 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1179 EltVT, Custom);
1181 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1182 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1183 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1184 ISD::VP_SCATTER},
1185 VT, Custom);
1186 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1187
1191
1192 // Expand FP operations that need libcalls.
1193 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1194
1195 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1196 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1197 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1198 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1199 } else {
1200 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1201 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1202 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1203 }
1204 };
1205
1206 if (Subtarget.hasVInstructionsF16()) {
1207 for (MVT VT : F16VecVTs) {
1208 if (!isTypeLegal(VT))
1209 continue;
1210 SetCommonVFPActions(VT);
1211 }
1212 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1213 for (MVT VT : F16VecVTs) {
1214 if (!isTypeLegal(VT))
1215 continue;
1216 SetCommonPromoteToF32Actions(VT);
1217 }
1218 }
1219
1220 if (Subtarget.hasVInstructionsBF16Minimal()) {
1221 for (MVT VT : BF16VecVTs) {
1222 if (!isTypeLegal(VT))
1223 continue;
1224 SetCommonPromoteToF32Actions(VT);
1225 }
1226 }
1227
1228 if (Subtarget.hasVInstructionsF32()) {
1229 for (MVT VT : F32VecVTs) {
1230 if (!isTypeLegal(VT))
1231 continue;
1232 SetCommonVFPActions(VT);
1233 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1234 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1235 }
1236 }
1237
1238 if (Subtarget.hasVInstructionsF64()) {
1239 for (MVT VT : F64VecVTs) {
1240 if (!isTypeLegal(VT))
1241 continue;
1242 SetCommonVFPActions(VT);
1243 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1244 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1245 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1246 }
1247 }
1248
1249 if (Subtarget.useRVVForFixedLengthVectors()) {
1251 if (!useRVVForFixedLengthVectorVT(VT))
1252 continue;
1253
1254 // By default everything must be expanded.
1255 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1258 setTruncStoreAction(VT, OtherVT, Expand);
1260 OtherVT, Expand);
1261 }
1262
1263 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1264 // expansion to a build_vector of 0s.
1266
1267 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1269 Custom);
1270
1273 Custom);
1274
1276 VT, Custom);
1277
1279 VT, Custom);
1280
1282
1284
1286
1288
1291 Custom);
1292
1294
1297 Custom);
1298
1300 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1301 Custom);
1302
1304 {
1313 },
1314 VT, Custom);
1316 Custom);
1317
1319
1320 // Operations below are different for between masks and other vectors.
1321 if (VT.getVectorElementType() == MVT::i1) {
1322 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1323 ISD::OR, ISD::XOR},
1324 VT, Custom);
1325
1326 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1327 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1328 VT, Custom);
1329
1330 setOperationAction(ISD::VP_MERGE, VT, Custom);
1331
1332 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1333 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1334 continue;
1335 }
1336
1337 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1338 // it before type legalization for i64 vectors on RV32. It will then be
1339 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1340 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1341 // improvements first.
1342 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1345
1346 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1348 }
1349
1352
1353 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1354 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1355 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1356 ISD::VP_SCATTER},
1357 VT, Custom);
1358 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1359
1363 VT, Custom);
1364
1367
1369
1370 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1371 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1373
1377 VT, Custom);
1378
1380
1383
1384 // Custom-lower reduction operations to set up the corresponding custom
1385 // nodes' operands.
1389 VT, Custom);
1390
1391 setOperationAction(IntegerVPOps, VT, Custom);
1392
1393 if (Subtarget.hasStdExtZvkb())
1395
1396 if (Subtarget.hasStdExtZvbb()) {
1399 VT, Custom);
1400 } else {
1401 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1402 // range of f32.
1403 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1404 if (isTypeLegal(FloatVT))
1407 Custom);
1408 }
1409
1411 }
1412
1414 // There are no extending loads or truncating stores.
1415 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1416 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1417 setTruncStoreAction(VT, InnerVT, Expand);
1418 }
1419
1420 if (!useRVVForFixedLengthVectorVT(VT))
1421 continue;
1422
1423 // By default everything must be expanded.
1424 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1426
1427 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1428 // expansion to a build_vector of 0s.
1430
1435 VT, Custom);
1436 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1437 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1438
1440 VT, Custom);
1441
1444 VT, Custom);
1445 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1446 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1447 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1448 VT, Custom);
1449 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1450
1453 Custom);
1454
1455 if (VT.getVectorElementType() == MVT::f16 &&
1456 !Subtarget.hasVInstructionsF16()) {
1458 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1460 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1461 Custom);
1462 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1463 Custom);
1466 if (Subtarget.hasStdExtZfhmin()) {
1468 } else {
1469 // We need to custom legalize f16 build vectors if Zfhmin isn't
1470 // available.
1472 }
1476 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1477 // Don't promote f16 vector operations to f32 if f32 vector type is
1478 // not legal.
1479 // TODO: could split the f16 vector into two vectors and do promotion.
1480 if (!isTypeLegal(F32VecVT))
1481 continue;
1482 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1483 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1484 continue;
1485 }
1486
1487 if (VT.getVectorElementType() == MVT::bf16) {
1489 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1492 if (Subtarget.hasStdExtZfbfmin()) {
1494 } else {
1495 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1496 // available.
1498 }
1500 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1501 Custom);
1502 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1503 // Don't promote f16 vector operations to f32 if f32 vector type is
1504 // not legal.
1505 // TODO: could split the f16 vector into two vectors and do promotion.
1506 if (!isTypeLegal(F32VecVT))
1507 continue;
1508 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1509 // TODO: Promote VP ops to fp32.
1510 continue;
1511 }
1512
1514 Custom);
1515
1521 VT, Custom);
1522
1527 VT, Custom);
1528
1529 setCondCodeAction(VFPCCToExpand, VT, Expand);
1530
1533
1535
1536 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1537
1538 setOperationAction(FloatingPointVPOps, VT, Custom);
1539
1546 VT, Custom);
1547 }
1548
1549 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1550 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1551 if (Subtarget.is64Bit())
1553 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1555 if (Subtarget.hasStdExtZfbfmin())
1557 if (Subtarget.hasStdExtFOrZfinx())
1559 if (Subtarget.hasStdExtDOrZdinx())
1561 }
1562 }
1563
1564 if (Subtarget.hasStdExtA())
1566
1567 if (Subtarget.hasForcedAtomics()) {
1568 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1574 XLenVT, LibCall);
1575 }
1576
1577 if (Subtarget.hasVendorXTHeadMemIdx()) {
1578 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1579 setIndexedLoadAction(im, MVT::i8, Legal);
1580 setIndexedStoreAction(im, MVT::i8, Legal);
1581 setIndexedLoadAction(im, MVT::i16, Legal);
1582 setIndexedStoreAction(im, MVT::i16, Legal);
1583 setIndexedLoadAction(im, MVT::i32, Legal);
1584 setIndexedStoreAction(im, MVT::i32, Legal);
1585
1586 if (Subtarget.is64Bit()) {
1587 setIndexedLoadAction(im, MVT::i64, Legal);
1588 setIndexedStoreAction(im, MVT::i64, Legal);
1589 }
1590 }
1591 }
1592
1593 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1597
1601 }
1602
1603 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1604 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1605 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1610 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1611 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1612 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1613
1614 if (Subtarget.useRVVForFixedLengthVectors()) {
1616 if (VT.getVectorElementType() != MVT::i32 ||
1617 !useRVVForFixedLengthVectorVT(VT))
1618 continue;
1619 ElementCount EC = VT.getVectorElementCount();
1620 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1621 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1622 }
1623 }
1624 }
1625
1626 // Customize load and store operation for bf16 if zfh isn't enabled.
1627 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1628 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1630 }
1631
1632 // Function alignments.
1633 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1634 setMinFunctionAlignment(FunctionAlignment);
1635 // Set preferred alignments.
1638
1644
1645 if (Subtarget.hasStdExtFOrZfinx())
1647
1648 if (Subtarget.hasStdExtZbb())
1650
1651 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1652 Subtarget.hasVInstructions())
1654
1655 if (Subtarget.hasStdExtZbkb())
1657
1658 if (Subtarget.hasStdExtFOrZfinx())
1661 if (Subtarget.hasVInstructions())
1664 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1667 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1672
1673 if (Subtarget.hasVendorXTHeadMemPair())
1675 if (Subtarget.useRVVForFixedLengthVectors())
1677
1678 // Disable strict node mutation.
1679 IsStrictFPEnabled = true;
1680 EnableExtLdPromotion = true;
1681
1682 // Let the subtarget decide if a predictable select is more expensive than the
1683 // corresponding branch. This information is used in CGP/SelectOpt to decide
1684 // when to convert selects into branches.
1685 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1686
1687 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1688 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1689
1691 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1692 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1693
1695 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1696 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1697
1698 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1699 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1700}
1701
1703 LLVMContext &Context,
1704 EVT VT) const {
1705 if (!VT.isVector())
1706 return getPointerTy(DL);
1707 if (Subtarget.hasVInstructions() &&
1708 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1709 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1711}
1712
1713MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1714 return Subtarget.getXLenVT();
1715}
1716
1717// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1718bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1719 unsigned VF,
1720 bool IsScalable) const {
1721 if (!Subtarget.hasVInstructions())
1722 return true;
1723
1724 if (!IsScalable)
1725 return true;
1726
1727 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1728 return true;
1729
1730 // Don't allow VF=1 if those types are't legal.
1731 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1732 return true;
1733
1734 // VLEN=32 support is incomplete.
1735 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1736 return true;
1737
1738 // The maximum VF is for the smallest element width with LMUL=8.
1739 // VF must be a power of 2.
1740 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1741 return VF > MaxVF || !isPowerOf2_32(VF);
1742}
1743
1745 return !Subtarget.hasVInstructions() ||
1746 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1747}
1748
1750 const CallInst &I,
1751 MachineFunction &MF,
1752 unsigned Intrinsic) const {
1753 auto &DL = I.getDataLayout();
1754
1755 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1756 bool IsUnitStrided, bool UsePtrVal = false) {
1758 // We can't use ptrVal if the intrinsic can access memory before the
1759 // pointer. This means we can't use it for strided or indexed intrinsics.
1760 if (UsePtrVal)
1761 Info.ptrVal = I.getArgOperand(PtrOp);
1762 else
1763 Info.fallbackAddressSpace =
1764 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1765 Type *MemTy;
1766 if (IsStore) {
1767 // Store value is the first operand.
1768 MemTy = I.getArgOperand(0)->getType();
1769 } else {
1770 // Use return type. If it's segment load, return type is a struct.
1771 MemTy = I.getType();
1772 if (MemTy->isStructTy())
1773 MemTy = MemTy->getStructElementType(0);
1774 }
1775 if (!IsUnitStrided)
1776 MemTy = MemTy->getScalarType();
1777
1778 Info.memVT = getValueType(DL, MemTy);
1779 if (MemTy->isTargetExtTy()) {
1780 // RISC-V vector tuple type's alignment type should be its element type.
1781 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1782 MemTy = Type::getIntNTy(
1783 MemTy->getContext(),
1784 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1785 ->getZExtValue());
1786 Info.align = DL.getABITypeAlign(MemTy);
1787 } else {
1788 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1789 }
1791 Info.flags |=
1793 return true;
1794 };
1795
1796 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1798
1800 switch (Intrinsic) {
1801 default:
1802 return false;
1803 case Intrinsic::riscv_masked_atomicrmw_xchg:
1804 case Intrinsic::riscv_masked_atomicrmw_add:
1805 case Intrinsic::riscv_masked_atomicrmw_sub:
1806 case Intrinsic::riscv_masked_atomicrmw_nand:
1807 case Intrinsic::riscv_masked_atomicrmw_max:
1808 case Intrinsic::riscv_masked_atomicrmw_min:
1809 case Intrinsic::riscv_masked_atomicrmw_umax:
1810 case Intrinsic::riscv_masked_atomicrmw_umin:
1811 case Intrinsic::riscv_masked_cmpxchg:
1812 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1813 // narrow atomic operation. These will be expanded to an LR/SC loop that
1814 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1815 // will be used to modify the appropriate part of the 4 byte data and
1816 // preserve the rest.
1818 Info.memVT = MVT::i32;
1819 Info.ptrVal = I.getArgOperand(0);
1820 Info.offset = 0;
1821 Info.align = Align(4);
1824 return true;
1825 case Intrinsic::riscv_seg2_load_mask:
1826 case Intrinsic::riscv_seg3_load_mask:
1827 case Intrinsic::riscv_seg4_load_mask:
1828 case Intrinsic::riscv_seg5_load_mask:
1829 case Intrinsic::riscv_seg6_load_mask:
1830 case Intrinsic::riscv_seg7_load_mask:
1831 case Intrinsic::riscv_seg8_load_mask:
1832 case Intrinsic::riscv_sseg2_load_mask:
1833 case Intrinsic::riscv_sseg3_load_mask:
1834 case Intrinsic::riscv_sseg4_load_mask:
1835 case Intrinsic::riscv_sseg5_load_mask:
1836 case Intrinsic::riscv_sseg6_load_mask:
1837 case Intrinsic::riscv_sseg7_load_mask:
1838 case Intrinsic::riscv_sseg8_load_mask:
1839 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1840 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1841 case Intrinsic::riscv_seg2_store_mask:
1842 case Intrinsic::riscv_seg3_store_mask:
1843 case Intrinsic::riscv_seg4_store_mask:
1844 case Intrinsic::riscv_seg5_store_mask:
1845 case Intrinsic::riscv_seg6_store_mask:
1846 case Intrinsic::riscv_seg7_store_mask:
1847 case Intrinsic::riscv_seg8_store_mask:
1848 // Operands are (vec, ..., vec, ptr, mask, vl)
1849 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1850 /*IsStore*/ true,
1851 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1852 case Intrinsic::riscv_sseg2_store_mask:
1853 case Intrinsic::riscv_sseg3_store_mask:
1854 case Intrinsic::riscv_sseg4_store_mask:
1855 case Intrinsic::riscv_sseg5_store_mask:
1856 case Intrinsic::riscv_sseg6_store_mask:
1857 case Intrinsic::riscv_sseg7_store_mask:
1858 case Intrinsic::riscv_sseg8_store_mask:
1859 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1860 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1861 /*IsStore*/ true,
1862 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1863 case Intrinsic::riscv_vlm:
1864 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1865 /*IsStore*/ false,
1866 /*IsUnitStrided*/ true,
1867 /*UsePtrVal*/ true);
1868 case Intrinsic::riscv_vle:
1869 case Intrinsic::riscv_vle_mask:
1870 case Intrinsic::riscv_vleff:
1871 case Intrinsic::riscv_vleff_mask:
1872 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1873 /*IsStore*/ false,
1874 /*IsUnitStrided*/ true,
1875 /*UsePtrVal*/ true);
1876 case Intrinsic::riscv_vsm:
1877 case Intrinsic::riscv_vse:
1878 case Intrinsic::riscv_vse_mask:
1879 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1880 /*IsStore*/ true,
1881 /*IsUnitStrided*/ true,
1882 /*UsePtrVal*/ true);
1883 case Intrinsic::riscv_vlse:
1884 case Intrinsic::riscv_vlse_mask:
1885 case Intrinsic::riscv_vloxei:
1886 case Intrinsic::riscv_vloxei_mask:
1887 case Intrinsic::riscv_vluxei:
1888 case Intrinsic::riscv_vluxei_mask:
1889 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1890 /*IsStore*/ false,
1891 /*IsUnitStrided*/ false);
1892 case Intrinsic::riscv_vsse:
1893 case Intrinsic::riscv_vsse_mask:
1894 case Intrinsic::riscv_vsoxei:
1895 case Intrinsic::riscv_vsoxei_mask:
1896 case Intrinsic::riscv_vsuxei:
1897 case Intrinsic::riscv_vsuxei_mask:
1898 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1899 /*IsStore*/ true,
1900 /*IsUnitStrided*/ false);
1901 case Intrinsic::riscv_vlseg2:
1902 case Intrinsic::riscv_vlseg3:
1903 case Intrinsic::riscv_vlseg4:
1904 case Intrinsic::riscv_vlseg5:
1905 case Intrinsic::riscv_vlseg6:
1906 case Intrinsic::riscv_vlseg7:
1907 case Intrinsic::riscv_vlseg8:
1908 case Intrinsic::riscv_vlseg2ff:
1909 case Intrinsic::riscv_vlseg3ff:
1910 case Intrinsic::riscv_vlseg4ff:
1911 case Intrinsic::riscv_vlseg5ff:
1912 case Intrinsic::riscv_vlseg6ff:
1913 case Intrinsic::riscv_vlseg7ff:
1914 case Intrinsic::riscv_vlseg8ff:
1915 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1916 /*IsStore*/ false,
1917 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1918 case Intrinsic::riscv_vlseg2_mask:
1919 case Intrinsic::riscv_vlseg3_mask:
1920 case Intrinsic::riscv_vlseg4_mask:
1921 case Intrinsic::riscv_vlseg5_mask:
1922 case Intrinsic::riscv_vlseg6_mask:
1923 case Intrinsic::riscv_vlseg7_mask:
1924 case Intrinsic::riscv_vlseg8_mask:
1925 case Intrinsic::riscv_vlseg2ff_mask:
1926 case Intrinsic::riscv_vlseg3ff_mask:
1927 case Intrinsic::riscv_vlseg4ff_mask:
1928 case Intrinsic::riscv_vlseg5ff_mask:
1929 case Intrinsic::riscv_vlseg6ff_mask:
1930 case Intrinsic::riscv_vlseg7ff_mask:
1931 case Intrinsic::riscv_vlseg8ff_mask:
1932 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1933 /*IsStore*/ false,
1934 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1935 case Intrinsic::riscv_vlsseg2:
1936 case Intrinsic::riscv_vlsseg3:
1937 case Intrinsic::riscv_vlsseg4:
1938 case Intrinsic::riscv_vlsseg5:
1939 case Intrinsic::riscv_vlsseg6:
1940 case Intrinsic::riscv_vlsseg7:
1941 case Intrinsic::riscv_vlsseg8:
1942 case Intrinsic::riscv_vloxseg2:
1943 case Intrinsic::riscv_vloxseg3:
1944 case Intrinsic::riscv_vloxseg4:
1945 case Intrinsic::riscv_vloxseg5:
1946 case Intrinsic::riscv_vloxseg6:
1947 case Intrinsic::riscv_vloxseg7:
1948 case Intrinsic::riscv_vloxseg8:
1949 case Intrinsic::riscv_vluxseg2:
1950 case Intrinsic::riscv_vluxseg3:
1951 case Intrinsic::riscv_vluxseg4:
1952 case Intrinsic::riscv_vluxseg5:
1953 case Intrinsic::riscv_vluxseg6:
1954 case Intrinsic::riscv_vluxseg7:
1955 case Intrinsic::riscv_vluxseg8:
1956 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1957 /*IsStore*/ false,
1958 /*IsUnitStrided*/ false);
1959 case Intrinsic::riscv_vlsseg2_mask:
1960 case Intrinsic::riscv_vlsseg3_mask:
1961 case Intrinsic::riscv_vlsseg4_mask:
1962 case Intrinsic::riscv_vlsseg5_mask:
1963 case Intrinsic::riscv_vlsseg6_mask:
1964 case Intrinsic::riscv_vlsseg7_mask:
1965 case Intrinsic::riscv_vlsseg8_mask:
1966 case Intrinsic::riscv_vloxseg2_mask:
1967 case Intrinsic::riscv_vloxseg3_mask:
1968 case Intrinsic::riscv_vloxseg4_mask:
1969 case Intrinsic::riscv_vloxseg5_mask:
1970 case Intrinsic::riscv_vloxseg6_mask:
1971 case Intrinsic::riscv_vloxseg7_mask:
1972 case Intrinsic::riscv_vloxseg8_mask:
1973 case Intrinsic::riscv_vluxseg2_mask:
1974 case Intrinsic::riscv_vluxseg3_mask:
1975 case Intrinsic::riscv_vluxseg4_mask:
1976 case Intrinsic::riscv_vluxseg5_mask:
1977 case Intrinsic::riscv_vluxseg6_mask:
1978 case Intrinsic::riscv_vluxseg7_mask:
1979 case Intrinsic::riscv_vluxseg8_mask:
1980 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1981 /*IsStore*/ false,
1982 /*IsUnitStrided*/ false);
1983 case Intrinsic::riscv_vsseg2:
1984 case Intrinsic::riscv_vsseg3:
1985 case Intrinsic::riscv_vsseg4:
1986 case Intrinsic::riscv_vsseg5:
1987 case Intrinsic::riscv_vsseg6:
1988 case Intrinsic::riscv_vsseg7:
1989 case Intrinsic::riscv_vsseg8:
1990 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1991 /*IsStore*/ true,
1992 /*IsUnitStrided*/ false);
1993 case Intrinsic::riscv_vsseg2_mask:
1994 case Intrinsic::riscv_vsseg3_mask:
1995 case Intrinsic::riscv_vsseg4_mask:
1996 case Intrinsic::riscv_vsseg5_mask:
1997 case Intrinsic::riscv_vsseg6_mask:
1998 case Intrinsic::riscv_vsseg7_mask:
1999 case Intrinsic::riscv_vsseg8_mask:
2000 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2001 /*IsStore*/ true,
2002 /*IsUnitStrided*/ false);
2003 case Intrinsic::riscv_vssseg2:
2004 case Intrinsic::riscv_vssseg3:
2005 case Intrinsic::riscv_vssseg4:
2006 case Intrinsic::riscv_vssseg5:
2007 case Intrinsic::riscv_vssseg6:
2008 case Intrinsic::riscv_vssseg7:
2009 case Intrinsic::riscv_vssseg8:
2010 case Intrinsic::riscv_vsoxseg2:
2011 case Intrinsic::riscv_vsoxseg3:
2012 case Intrinsic::riscv_vsoxseg4:
2013 case Intrinsic::riscv_vsoxseg5:
2014 case Intrinsic::riscv_vsoxseg6:
2015 case Intrinsic::riscv_vsoxseg7:
2016 case Intrinsic::riscv_vsoxseg8:
2017 case Intrinsic::riscv_vsuxseg2:
2018 case Intrinsic::riscv_vsuxseg3:
2019 case Intrinsic::riscv_vsuxseg4:
2020 case Intrinsic::riscv_vsuxseg5:
2021 case Intrinsic::riscv_vsuxseg6:
2022 case Intrinsic::riscv_vsuxseg7:
2023 case Intrinsic::riscv_vsuxseg8:
2024 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2025 /*IsStore*/ true,
2026 /*IsUnitStrided*/ false);
2027 case Intrinsic::riscv_vssseg2_mask:
2028 case Intrinsic::riscv_vssseg3_mask:
2029 case Intrinsic::riscv_vssseg4_mask:
2030 case Intrinsic::riscv_vssseg5_mask:
2031 case Intrinsic::riscv_vssseg6_mask:
2032 case Intrinsic::riscv_vssseg7_mask:
2033 case Intrinsic::riscv_vssseg8_mask:
2034 case Intrinsic::riscv_vsoxseg2_mask:
2035 case Intrinsic::riscv_vsoxseg3_mask:
2036 case Intrinsic::riscv_vsoxseg4_mask:
2037 case Intrinsic::riscv_vsoxseg5_mask:
2038 case Intrinsic::riscv_vsoxseg6_mask:
2039 case Intrinsic::riscv_vsoxseg7_mask:
2040 case Intrinsic::riscv_vsoxseg8_mask:
2041 case Intrinsic::riscv_vsuxseg2_mask:
2042 case Intrinsic::riscv_vsuxseg3_mask:
2043 case Intrinsic::riscv_vsuxseg4_mask:
2044 case Intrinsic::riscv_vsuxseg5_mask:
2045 case Intrinsic::riscv_vsuxseg6_mask:
2046 case Intrinsic::riscv_vsuxseg7_mask:
2047 case Intrinsic::riscv_vsuxseg8_mask:
2048 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2049 /*IsStore*/ true,
2050 /*IsUnitStrided*/ false);
2051 }
2052}
2053
2055 const AddrMode &AM, Type *Ty,
2056 unsigned AS,
2057 Instruction *I) const {
2058 // No global is ever allowed as a base.
2059 if (AM.BaseGV)
2060 return false;
2061
2062 // None of our addressing modes allows a scalable offset
2063 if (AM.ScalableOffset)
2064 return false;
2065
2066 // RVV instructions only support register addressing.
2067 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2068 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2069
2070 // Require a 12-bit signed offset.
2071 if (!isInt<12>(AM.BaseOffs))
2072 return false;
2073
2074 switch (AM.Scale) {
2075 case 0: // "r+i" or just "i", depending on HasBaseReg.
2076 break;
2077 case 1:
2078 if (!AM.HasBaseReg) // allow "r+i".
2079 break;
2080 return false; // disallow "r+r" or "r+r+i".
2081 default:
2082 return false;
2083 }
2084
2085 return true;
2086}
2087
2089 return isInt<12>(Imm);
2090}
2091
2093 return isInt<12>(Imm);
2094}
2095
2096// On RV32, 64-bit integers are split into their high and low parts and held
2097// in two different registers, so the trunc is free since the low register can
2098// just be used.
2099// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2100// isTruncateFree?
2102 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2103 return false;
2104 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2105 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2106 return (SrcBits == 64 && DestBits == 32);
2107}
2108
2110 // We consider i64->i32 free on RV64 since we have good selection of W
2111 // instructions that make promoting operations back to i64 free in many cases.
2112 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2113 !DstVT.isInteger())
2114 return false;
2115 unsigned SrcBits = SrcVT.getSizeInBits();
2116 unsigned DestBits = DstVT.getSizeInBits();
2117 return (SrcBits == 64 && DestBits == 32);
2118}
2119
2121 EVT SrcVT = Val.getValueType();
2122 // free truncate from vnsrl and vnsra
2123 if (Subtarget.hasVInstructions() &&
2124 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2125 SrcVT.isVector() && VT2.isVector()) {
2126 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2127 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2128 if (SrcBits == DestBits * 2) {
2129 return true;
2130 }
2131 }
2132 return TargetLowering::isTruncateFree(Val, VT2);
2133}
2134
2136 // Zexts are free if they can be combined with a load.
2137 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2138 // poorly with type legalization of compares preferring sext.
2139 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2140 EVT MemVT = LD->getMemoryVT();
2141 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2142 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2143 LD->getExtensionType() == ISD::ZEXTLOAD))
2144 return true;
2145 }
2146
2147 return TargetLowering::isZExtFree(Val, VT2);
2148}
2149
2151 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2152}
2153
2155 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2156}
2157
2159 return Subtarget.hasStdExtZbb() ||
2160 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2161}
2162
2164 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2165 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2166}
2167
2169 const Instruction &AndI) const {
2170 // We expect to be able to match a bit extraction instruction if the Zbs
2171 // extension is supported and the mask is a power of two. However, we
2172 // conservatively return false if the mask would fit in an ANDI instruction,
2173 // on the basis that it's possible the sinking+duplication of the AND in
2174 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2175 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2176 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2177 return false;
2178 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2179 if (!Mask)
2180 return false;
2181 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2182}
2183
2185 EVT VT = Y.getValueType();
2186
2187 if (VT.isVector())
2188 return false;
2189
2190 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2191 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2192}
2193
2195 EVT VT = Y.getValueType();
2196
2197 if (!VT.isVector())
2198 return hasAndNotCompare(Y);
2199
2200 return Subtarget.hasStdExtZvkb();
2201}
2202
2204 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2205 if (Subtarget.hasStdExtZbs())
2206 return X.getValueType().isScalarInteger();
2207 auto *C = dyn_cast<ConstantSDNode>(Y);
2208 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2209 if (Subtarget.hasVendorXTHeadBs())
2210 return C != nullptr;
2211 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2212 return C && C->getAPIntValue().ule(10);
2213}
2214
2216 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2217 SDValue Y) const {
2218 if (SelectOpcode != ISD::VSELECT)
2219 return false;
2220
2221 // Only enable for rvv.
2222 if (!VT.isVector() || !Subtarget.hasVInstructions())
2223 return false;
2224
2225 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2226 return false;
2227
2228 return true;
2229}
2230
2232 Type *Ty) const {
2233 assert(Ty->isIntegerTy());
2234
2235 unsigned BitSize = Ty->getIntegerBitWidth();
2236 if (BitSize > Subtarget.getXLen())
2237 return false;
2238
2239 // Fast path, assume 32-bit immediates are cheap.
2240 int64_t Val = Imm.getSExtValue();
2241 if (isInt<32>(Val))
2242 return true;
2243
2244 // A constant pool entry may be more aligned than the load we're trying to
2245 // replace. If we don't support unaligned scalar mem, prefer the constant
2246 // pool.
2247 // TODO: Can the caller pass down the alignment?
2248 if (!Subtarget.enableUnalignedScalarMem())
2249 return true;
2250
2251 // Prefer to keep the load if it would require many instructions.
2252 // This uses the same threshold we use for constant pools but doesn't
2253 // check useConstantPoolForLargeInts.
2254 // TODO: Should we keep the load only when we're definitely going to emit a
2255 // constant pool?
2256
2258 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2259}
2260
2264 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2265 SelectionDAG &DAG) const {
2266 // One interesting pattern that we'd want to form is 'bit extract':
2267 // ((1 >> Y) & 1) ==/!= 0
2268 // But we also need to be careful not to try to reverse that fold.
2269
2270 // Is this '((1 >> Y) & 1)'?
2271 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2272 return false; // Keep the 'bit extract' pattern.
2273
2274 // Will this be '((1 >> Y) & 1)' after the transform?
2275 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2276 return true; // Do form the 'bit extract' pattern.
2277
2278 // If 'X' is a constant, and we transform, then we will immediately
2279 // try to undo the fold, thus causing endless combine loop.
2280 // So only do the transform if X is not a constant. This matches the default
2281 // implementation of this function.
2282 return !XC;
2283}
2284
2286 unsigned Opc = VecOp.getOpcode();
2287
2288 // Assume target opcodes can't be scalarized.
2289 // TODO - do we have any exceptions?
2290 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2291 return false;
2292
2293 // If the vector op is not supported, try to convert to scalar.
2294 EVT VecVT = VecOp.getValueType();
2296 return true;
2297
2298 // If the vector op is supported, but the scalar op is not, the transform may
2299 // not be worthwhile.
2300 // Permit a vector binary operation can be converted to scalar binary
2301 // operation which is custom lowered with illegal type.
2302 EVT ScalarVT = VecVT.getScalarType();
2303 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2304 isOperationCustom(Opc, ScalarVT);
2305}
2306
2308 const GlobalAddressSDNode *GA) const {
2309 // In order to maximise the opportunity for common subexpression elimination,
2310 // keep a separate ADD node for the global address offset instead of folding
2311 // it in the global address node. Later peephole optimisations may choose to
2312 // fold it back in when profitable.
2313 return false;
2314}
2315
2316// Returns 0-31 if the fli instruction is available for the type and this is
2317// legal FP immediate for the type. Returns -1 otherwise.
2319 if (!Subtarget.hasStdExtZfa())
2320 return -1;
2321
2322 bool IsSupportedVT = false;
2323 if (VT == MVT::f16) {
2324 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2325 } else if (VT == MVT::f32) {
2326 IsSupportedVT = true;
2327 } else if (VT == MVT::f64) {
2328 assert(Subtarget.hasStdExtD() && "Expect D extension");
2329 IsSupportedVT = true;
2330 }
2331
2332 if (!IsSupportedVT)
2333 return -1;
2334
2335 return RISCVLoadFPImm::getLoadFPImm(Imm);
2336}
2337
2339 bool ForCodeSize) const {
2340 bool IsLegalVT = false;
2341 if (VT == MVT::f16)
2342 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2343 else if (VT == MVT::f32)
2344 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2345 else if (VT == MVT::f64)
2346 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2347 else if (VT == MVT::bf16)
2348 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2349
2350 if (!IsLegalVT)
2351 return false;
2352
2353 if (getLegalZfaFPImm(Imm, VT) >= 0)
2354 return true;
2355
2356 // Some constants can be produced by fli+fneg.
2357 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2358 return true;
2359
2360 // Cannot create a 64 bit floating-point immediate value for rv32.
2361 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2362 // td can handle +0.0 or -0.0 already.
2363 // -0.0 can be created by fmv + fneg.
2364 return Imm.isZero();
2365 }
2366
2367 // Special case: fmv + fneg
2368 if (Imm.isNegZero())
2369 return true;
2370
2371 // Building an integer and then converting requires a fmv at the end of
2372 // the integer sequence. The fmv is not required for Zfinx.
2373 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2374 const int Cost =
2375 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2376 Subtarget.getXLen(), Subtarget);
2377 return Cost <= FPImmCost;
2378}
2379
2380// TODO: This is very conservative.
2382 unsigned Index) const {
2384 return false;
2385
2386 // Extracts from index 0 are just subreg extracts.
2387 if (Index == 0)
2388 return true;
2389
2390 // Only support extracting a fixed from a fixed vector for now.
2391 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2392 return false;
2393
2394 EVT EltVT = ResVT.getVectorElementType();
2395 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2396
2397 // The smallest type we can slide is i8.
2398 // TODO: We can extract index 0 from a mask vector without a slide.
2399 if (EltVT == MVT::i1)
2400 return false;
2401
2402 unsigned ResElts = ResVT.getVectorNumElements();
2403 unsigned SrcElts = SrcVT.getVectorNumElements();
2404
2405 unsigned MinVLen = Subtarget.getRealMinVLen();
2406 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2407
2408 // If we're extracting only data from the first VLEN bits of the source
2409 // then we can always do this with an m1 vslidedown.vx. Restricting the
2410 // Index ensures we can use a vslidedown.vi.
2411 // TODO: We can generalize this when the exact VLEN is known.
2412 if (Index + ResElts <= MinVLMAX && Index < 31)
2413 return true;
2414
2415 // Convervatively only handle extracting half of a vector.
2416 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2417 // the upper half of a vector until we have more test coverage.
2418 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2419 // a cheap extract. However, this case is important in practice for
2420 // shuffled extracts of longer vectors. How resolve?
2421 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2422}
2423
2425 CallingConv::ID CC,
2426 EVT VT) const {
2427 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2428 // We might still end up using a GPR but that will be decided based on ABI.
2429 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2430 !Subtarget.hasStdExtZfhminOrZhinxmin())
2431 return MVT::f32;
2432
2434
2435 return PartVT;
2436}
2437
2438unsigned
2440 std::optional<MVT> RegisterVT) const {
2441 // Pair inline assembly operand
2442 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2443 *RegisterVT == MVT::Untyped)
2444 return 1;
2445
2446 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2447}
2448
2450 CallingConv::ID CC,
2451 EVT VT) const {
2452 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2453 // We might still end up using a GPR but that will be decided based on ABI.
2454 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2455 !Subtarget.hasStdExtZfhminOrZhinxmin())
2456 return 1;
2457
2459}
2460
2462 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2463 unsigned &NumIntermediates, MVT &RegisterVT) const {
2465 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2466
2467 return NumRegs;
2468}
2469
2470// Changes the condition code and swaps operands if necessary, so the SetCC
2471// operation matches one of the comparisons supported directly by branches
2472// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2473// with 1/-1.
2474static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2475 ISD::CondCode &CC, SelectionDAG &DAG,
2476 const RISCVSubtarget &Subtarget) {
2477 // If this is a single bit test that can't be handled by ANDI, shift the
2478 // bit to be tested to the MSB and perform a signed compare with 0.
2479 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2480 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2481 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2482 // XAndesPerf supports branch on test bit.
2483 !Subtarget.hasVendorXAndesPerf()) {
2484 uint64_t Mask = LHS.getConstantOperandVal(1);
2485 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2486 unsigned ShAmt = 0;
2487 if (isPowerOf2_64(Mask)) {
2488 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2489 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2490 } else {
2491 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2492 }
2493
2494 LHS = LHS.getOperand(0);
2495 if (ShAmt != 0)
2496 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2497 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2498 return;
2499 }
2500 }
2501
2502 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2503 int64_t C = RHSC->getSExtValue();
2504 switch (CC) {
2505 default: break;
2506 case ISD::SETGT:
2507 // Convert X > -1 to X >= 0.
2508 if (C == -1) {
2509 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2510 CC = ISD::SETGE;
2511 return;
2512 }
2513 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2514 // We have a branch immediate instruction for SETGE but not SETGT.
2515 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2516 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2517 CC = ISD::SETGE;
2518 return;
2519 }
2520 break;
2521 case ISD::SETLT:
2522 // Convert X < 1 to 0 >= X.
2523 if (C == 1) {
2524 RHS = LHS;
2525 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2526 CC = ISD::SETGE;
2527 return;
2528 }
2529 break;
2530 case ISD::SETUGT:
2531 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2532 // We have a branch immediate instruction for SETUGE but not SETUGT.
2533 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2534 // immediate.
2535 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2536 CC = ISD::SETUGE;
2537 return;
2538 }
2539 break;
2540 }
2541 }
2542
2543 switch (CC) {
2544 default:
2545 break;
2546 case ISD::SETGT:
2547 case ISD::SETLE:
2548 case ISD::SETUGT:
2549 case ISD::SETULE:
2551 std::swap(LHS, RHS);
2552 break;
2553 }
2554}
2555
2557 if (VT.isRISCVVectorTuple()) {
2558 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2559 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2560 return RISCVVType::LMUL_F8;
2561 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2562 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2563 return RISCVVType::LMUL_F4;
2564 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2565 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2566 return RISCVVType::LMUL_F2;
2567 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2568 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2569 return RISCVVType::LMUL_1;
2570 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2571 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2572 return RISCVVType::LMUL_2;
2573 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2574 return RISCVVType::LMUL_4;
2575 llvm_unreachable("Invalid vector tuple type LMUL.");
2576 }
2577
2578 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2579 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2580 if (VT.getVectorElementType() == MVT::i1)
2581 KnownSize *= 8;
2582
2583 switch (KnownSize) {
2584 default:
2585 llvm_unreachable("Invalid LMUL.");
2586 case 8:
2587 return RISCVVType::LMUL_F8;
2588 case 16:
2589 return RISCVVType::LMUL_F4;
2590 case 32:
2591 return RISCVVType::LMUL_F2;
2592 case 64:
2593 return RISCVVType::LMUL_1;
2594 case 128:
2595 return RISCVVType::LMUL_2;
2596 case 256:
2597 return RISCVVType::LMUL_4;
2598 case 512:
2599 return RISCVVType::LMUL_8;
2600 }
2601}
2602
2604 switch (LMul) {
2605 default:
2606 llvm_unreachable("Invalid LMUL.");
2610 case RISCVVType::LMUL_1:
2611 return RISCV::VRRegClassID;
2612 case RISCVVType::LMUL_2:
2613 return RISCV::VRM2RegClassID;
2614 case RISCVVType::LMUL_4:
2615 return RISCV::VRM4RegClassID;
2616 case RISCVVType::LMUL_8:
2617 return RISCV::VRM8RegClassID;
2618 }
2619}
2620
2621unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2622 RISCVVType::VLMUL LMUL = getLMUL(VT);
2623 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2624 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2625 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2626 "Unexpected subreg numbering");
2627 return RISCV::sub_vrm1_0 + Index;
2628 }
2629 if (LMUL == RISCVVType::LMUL_2) {
2630 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2631 "Unexpected subreg numbering");
2632 return RISCV::sub_vrm2_0 + Index;
2633 }
2634 if (LMUL == RISCVVType::LMUL_4) {
2635 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2636 "Unexpected subreg numbering");
2637 return RISCV::sub_vrm4_0 + Index;
2638 }
2639 llvm_unreachable("Invalid vector type.");
2640}
2641
2643 if (VT.isRISCVVectorTuple()) {
2644 unsigned NF = VT.getRISCVVectorTupleNumFields();
2645 unsigned RegsPerField =
2646 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2647 (NF * RISCV::RVVBitsPerBlock));
2648 switch (RegsPerField) {
2649 case 1:
2650 if (NF == 2)
2651 return RISCV::VRN2M1RegClassID;
2652 if (NF == 3)
2653 return RISCV::VRN3M1RegClassID;
2654 if (NF == 4)
2655 return RISCV::VRN4M1RegClassID;
2656 if (NF == 5)
2657 return RISCV::VRN5M1RegClassID;
2658 if (NF == 6)
2659 return RISCV::VRN6M1RegClassID;
2660 if (NF == 7)
2661 return RISCV::VRN7M1RegClassID;
2662 if (NF == 8)
2663 return RISCV::VRN8M1RegClassID;
2664 break;
2665 case 2:
2666 if (NF == 2)
2667 return RISCV::VRN2M2RegClassID;
2668 if (NF == 3)
2669 return RISCV::VRN3M2RegClassID;
2670 if (NF == 4)
2671 return RISCV::VRN4M2RegClassID;
2672 break;
2673 case 4:
2674 assert(NF == 2);
2675 return RISCV::VRN2M4RegClassID;
2676 default:
2677 break;
2678 }
2679 llvm_unreachable("Invalid vector tuple type RegClass.");
2680 }
2681
2682 if (VT.getVectorElementType() == MVT::i1)
2683 return RISCV::VRRegClassID;
2684 return getRegClassIDForLMUL(getLMUL(VT));
2685}
2686
2687// Attempt to decompose a subvector insert/extract between VecVT and
2688// SubVecVT via subregister indices. Returns the subregister index that
2689// can perform the subvector insert/extract with the given element index, as
2690// well as the index corresponding to any leftover subvectors that must be
2691// further inserted/extracted within the register class for SubVecVT.
2692std::pair<unsigned, unsigned>
2694 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2695 const RISCVRegisterInfo *TRI) {
2696 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2697 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2698 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2699 "Register classes not ordered");
2700 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2701 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2702
2703 // If VecVT is a vector tuple type, either it's the tuple type with same
2704 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2705 if (VecVT.isRISCVVectorTuple()) {
2706 if (VecRegClassID == SubRegClassID)
2707 return {RISCV::NoSubRegister, 0};
2708
2709 assert(SubVecVT.isScalableVector() &&
2710 "Only allow scalable vector subvector.");
2711 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2712 "Invalid vector tuple insert/extract for vector and subvector with "
2713 "different LMUL.");
2714 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2715 }
2716
2717 // Try to compose a subregister index that takes us from the incoming
2718 // LMUL>1 register class down to the outgoing one. At each step we half
2719 // the LMUL:
2720 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2721 // Note that this is not guaranteed to find a subregister index, such as
2722 // when we are extracting from one VR type to another.
2723 unsigned SubRegIdx = RISCV::NoSubRegister;
2724 for (const unsigned RCID :
2725 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2726 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2727 VecVT = VecVT.getHalfNumVectorElementsVT();
2728 bool IsHi =
2729 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2730 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2731 getSubregIndexByMVT(VecVT, IsHi));
2732 if (IsHi)
2733 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2734 }
2735 return {SubRegIdx, InsertExtractIdx};
2736}
2737
2738// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2739// stores for those types.
2740bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2741 return !Subtarget.useRVVForFixedLengthVectors() ||
2742 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2743}
2744
2746 if (!ScalarTy.isSimple())
2747 return false;
2748 switch (ScalarTy.getSimpleVT().SimpleTy) {
2749 case MVT::iPTR:
2750 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2751 case MVT::i8:
2752 case MVT::i16:
2753 case MVT::i32:
2754 return true;
2755 case MVT::i64:
2756 return Subtarget.hasVInstructionsI64();
2757 case MVT::f16:
2758 return Subtarget.hasVInstructionsF16Minimal();
2759 case MVT::bf16:
2760 return Subtarget.hasVInstructionsBF16Minimal();
2761 case MVT::f32:
2762 return Subtarget.hasVInstructionsF32();
2763 case MVT::f64:
2764 return Subtarget.hasVInstructionsF64();
2765 default:
2766 return false;
2767 }
2768}
2769
2770
2771unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2772 return NumRepeatedDivisors;
2773}
2774
2776 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2777 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2778 "Unexpected opcode");
2779 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2780 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2782 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2783 if (!II)
2784 return SDValue();
2785 return Op.getOperand(II->VLOperand + 1 + HasChain);
2786}
2787
2789 const RISCVSubtarget &Subtarget) {
2790 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2791 if (!Subtarget.useRVVForFixedLengthVectors())
2792 return false;
2793
2794 // We only support a set of vector types with a consistent maximum fixed size
2795 // across all supported vector element types to avoid legalization issues.
2796 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2797 // fixed-length vector type we support is 1024 bytes.
2798 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2799 return false;
2800
2801 unsigned MinVLen = Subtarget.getRealMinVLen();
2802
2803 MVT EltVT = VT.getVectorElementType();
2804
2805 // Don't use RVV for vectors we cannot scalarize if required.
2806 switch (EltVT.SimpleTy) {
2807 // i1 is supported but has different rules.
2808 default:
2809 return false;
2810 case MVT::i1:
2811 // Masks can only use a single register.
2812 if (VT.getVectorNumElements() > MinVLen)
2813 return false;
2814 MinVLen /= 8;
2815 break;
2816 case MVT::i8:
2817 case MVT::i16:
2818 case MVT::i32:
2819 break;
2820 case MVT::i64:
2821 if (!Subtarget.hasVInstructionsI64())
2822 return false;
2823 break;
2824 case MVT::f16:
2825 if (!Subtarget.hasVInstructionsF16Minimal())
2826 return false;
2827 break;
2828 case MVT::bf16:
2829 if (!Subtarget.hasVInstructionsBF16Minimal())
2830 return false;
2831 break;
2832 case MVT::f32:
2833 if (!Subtarget.hasVInstructionsF32())
2834 return false;
2835 break;
2836 case MVT::f64:
2837 if (!Subtarget.hasVInstructionsF64())
2838 return false;
2839 break;
2840 }
2841
2842 // Reject elements larger than ELEN.
2843 if (EltVT.getSizeInBits() > Subtarget.getELen())
2844 return false;
2845
2846 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2847 // Don't use RVV for types that don't fit.
2848 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2849 return false;
2850
2851 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2852 // the base fixed length RVV support in place.
2853 if (!VT.isPow2VectorType())
2854 return false;
2855
2856 return true;
2857}
2858
2859bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2860 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2861}
2862
2863// Return the largest legal scalable vector type that matches VT's element type.
2865 const RISCVSubtarget &Subtarget) {
2866 // This may be called before legal types are setup.
2867 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2868 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2869 "Expected legal fixed length vector!");
2870
2871 unsigned MinVLen = Subtarget.getRealMinVLen();
2872 unsigned MaxELen = Subtarget.getELen();
2873
2874 MVT EltVT = VT.getVectorElementType();
2875 switch (EltVT.SimpleTy) {
2876 default:
2877 llvm_unreachable("unexpected element type for RVV container");
2878 case MVT::i1:
2879 case MVT::i8:
2880 case MVT::i16:
2881 case MVT::i32:
2882 case MVT::i64:
2883 case MVT::bf16:
2884 case MVT::f16:
2885 case MVT::f32:
2886 case MVT::f64: {
2887 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2888 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2889 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2890 unsigned NumElts =
2892 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2893 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2894 return MVT::getScalableVectorVT(EltVT, NumElts);
2895 }
2896 }
2897}
2898
2900 const RISCVSubtarget &Subtarget) {
2902 Subtarget);
2903}
2904
2906 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2907}
2908
2909// Grow V to consume an entire RVV register.
2911 const RISCVSubtarget &Subtarget) {
2912 assert(VT.isScalableVector() &&
2913 "Expected to convert into a scalable vector!");
2914 assert(V.getValueType().isFixedLengthVector() &&
2915 "Expected a fixed length vector operand!");
2916 SDLoc DL(V);
2917 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2918}
2919
2920// Shrink V so it's just big enough to maintain a VT's worth of data.
2922 const RISCVSubtarget &Subtarget) {
2924 "Expected to convert into a fixed length vector!");
2925 assert(V.getValueType().isScalableVector() &&
2926 "Expected a scalable vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getExtractSubvector(DL, VT, V, 0);
2929}
2930
2931/// Return the type of the mask type suitable for masking the provided
2932/// vector type. This is simply an i1 element type vector of the same
2933/// (possibly scalable) length.
2934static MVT getMaskTypeFor(MVT VecVT) {
2935 assert(VecVT.isVector());
2937 return MVT::getVectorVT(MVT::i1, EC);
2938}
2939
2940/// Creates an all ones mask suitable for masking a vector of type VecTy with
2941/// vector length VL. .
2942static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2943 SelectionDAG &DAG) {
2944 MVT MaskVT = getMaskTypeFor(VecVT);
2945 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2946}
2947
2948static std::pair<SDValue, SDValue>
2950 const RISCVSubtarget &Subtarget) {
2951 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2952 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2953 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2954 return {Mask, VL};
2955}
2956
2957static std::pair<SDValue, SDValue>
2958getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2959 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2960 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2961 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2962 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2963 return {Mask, VL};
2964}
2965
2966// Gets the two common "VL" operands: an all-ones mask and the vector length.
2967// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2968// the vector type that the fixed-length vector is contained in. Otherwise if
2969// VecVT is scalable, then ContainerVT should be the same as VecVT.
2970static std::pair<SDValue, SDValue>
2971getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2972 const RISCVSubtarget &Subtarget) {
2973 if (VecVT.isFixedLengthVector())
2974 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2975 Subtarget);
2976 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2977 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2978}
2979
2981 SelectionDAG &DAG) const {
2982 assert(VecVT.isScalableVector() && "Expected scalable vector");
2983 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2984 VecVT.getVectorElementCount());
2985}
2986
2987std::pair<unsigned, unsigned>
2989 const RISCVSubtarget &Subtarget) {
2990 assert(VecVT.isScalableVector() && "Expected scalable vector");
2991
2992 unsigned EltSize = VecVT.getScalarSizeInBits();
2993 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2994
2995 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2996 unsigned MaxVLMAX =
2997 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2998
2999 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3000 unsigned MinVLMAX =
3001 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3002
3003 return std::make_pair(MinVLMAX, MaxVLMAX);
3004}
3005
3006// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3007// of either is (currently) supported. This can get us into an infinite loop
3008// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3009// as a ..., etc.
3010// Until either (or both) of these can reliably lower any node, reporting that
3011// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3012// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3013// which is not desirable.
3015 EVT VT, unsigned DefinedValues) const {
3016 return false;
3017}
3018
3020 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3021 // implementation-defined.
3022 if (!VT.isVector())
3024 unsigned DLenFactor = Subtarget.getDLenFactor();
3025 unsigned Cost;
3026 if (VT.isScalableVector()) {
3027 unsigned LMul;
3028 bool Fractional;
3029 std::tie(LMul, Fractional) =
3031 if (Fractional)
3032 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3033 else
3034 Cost = (LMul * DLenFactor);
3035 } else {
3036 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3037 }
3038 return Cost;
3039}
3040
3041
3042/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3043/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3044/// be by default. VRGatherCostModel reflects available options. Note that
3045/// operand (index and possibly mask) are handled separately.
3047 auto LMULCost = getLMULCost(VT);
3048 bool Log2CostModel =
3050 if (Log2CostModel && LMULCost.isValid()) {
3051 unsigned Log = Log2_64(LMULCost.getValue());
3052 if (Log > 0)
3053 return LMULCost * Log;
3054 }
3055 return LMULCost * LMULCost;
3056}
3057
3058/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3059/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3060/// or may track the vrgather.vv cost. It is implementation-dependent.
3062 return getLMULCost(VT);
3063}
3064
3065/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3066/// for the type VT. (This does not cover the vslide1up or vslide1down
3067/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3068/// or may track the vrgather.vv cost. It is implementation-dependent.
3070 return getLMULCost(VT);
3071}
3072
3073/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3074/// for the type VT. (This does not cover the vslide1up or vslide1down
3075/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3076/// or may track the vrgather.vv cost. It is implementation-dependent.
3078 return getLMULCost(VT);
3079}
3080
3082 const RISCVSubtarget &Subtarget) {
3083 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3084 // bf16 conversions are always promoted to f32.
3085 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3086 Op.getValueType() == MVT::bf16) {
3087 bool IsStrict = Op->isStrictFPOpcode();
3088
3089 SDLoc DL(Op);
3090 if (IsStrict) {
3091 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3092 {Op.getOperand(0), Op.getOperand(1)});
3093 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3094 {Op.getValueType(), MVT::Other},
3095 {Val.getValue(1), Val.getValue(0),
3096 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3097 }
3098 return DAG.getNode(
3099 ISD::FP_ROUND, DL, Op.getValueType(),
3100 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3101 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3102 }
3103
3104 // Other operations are legal.
3105 return Op;
3106}
3107
3109 const RISCVSubtarget &Subtarget) {
3110 // RISC-V FP-to-int conversions saturate to the destination register size, but
3111 // don't produce 0 for nan. We can use a conversion instruction and fix the
3112 // nan case with a compare and a select.
3113 SDValue Src = Op.getOperand(0);
3114
3115 MVT DstVT = Op.getSimpleValueType();
3116 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3117
3118 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3119
3120 if (!DstVT.isVector()) {
3121 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3122 // the result.
3123 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3124 Src.getValueType() == MVT::bf16) {
3125 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3126 }
3127
3128 unsigned Opc;
3129 if (SatVT == DstVT)
3130 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3131 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3132 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3133 else
3134 return SDValue();
3135 // FIXME: Support other SatVTs by clamping before or after the conversion.
3136
3137 SDLoc DL(Op);
3138 SDValue FpToInt = DAG.getNode(
3139 Opc, DL, DstVT, Src,
3141
3142 if (Opc == RISCVISD::FCVT_WU_RV64)
3143 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3144
3145 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3146 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3148 }
3149
3150 // Vectors.
3151
3152 MVT DstEltVT = DstVT.getVectorElementType();
3153 MVT SrcVT = Src.getSimpleValueType();
3154 MVT SrcEltVT = SrcVT.getVectorElementType();
3155 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3156 unsigned DstEltSize = DstEltVT.getSizeInBits();
3157
3158 // Only handle saturating to the destination type.
3159 if (SatVT != DstEltVT)
3160 return SDValue();
3161
3162 MVT DstContainerVT = DstVT;
3163 MVT SrcContainerVT = SrcVT;
3164 if (DstVT.isFixedLengthVector()) {
3165 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3166 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3167 assert(DstContainerVT.getVectorElementCount() ==
3168 SrcContainerVT.getVectorElementCount() &&
3169 "Expected same element count");
3170 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3171 }
3172
3173 SDLoc DL(Op);
3174
3175 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3176
3177 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3178 {Src, Src, DAG.getCondCode(ISD::SETNE),
3179 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3180
3181 // Need to widen by more than 1 step, promote the FP type, then do a widening
3182 // convert.
3183 if (DstEltSize > (2 * SrcEltSize)) {
3184 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3185 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3186 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3187 }
3188
3189 MVT CvtContainerVT = DstContainerVT;
3190 MVT CvtEltVT = DstEltVT;
3191 if (SrcEltSize > (2 * DstEltSize)) {
3192 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3193 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3194 }
3195
3196 unsigned RVVOpc =
3197 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3198 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3199
3200 while (CvtContainerVT != DstContainerVT) {
3201 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3202 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3203 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3204 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3205 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3206 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3207 }
3208
3209 SDValue SplatZero = DAG.getNode(
3210 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3211 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3212 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3213 Res, DAG.getUNDEF(DstContainerVT), VL);
3214
3215 if (DstVT.isFixedLengthVector())
3216 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3217
3218 return Res;
3219}
3220
3222 const RISCVSubtarget &Subtarget) {
3223 bool IsStrict = Op->isStrictFPOpcode();
3224 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3225
3226 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3227 // bf16 conversions are always promoted to f32.
3228 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3229 SrcVal.getValueType() == MVT::bf16) {
3230 SDLoc DL(Op);
3231 if (IsStrict) {
3232 SDValue Ext =
3233 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3234 {Op.getOperand(0), SrcVal});
3235 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3236 {Ext.getValue(1), Ext.getValue(0)});
3237 }
3238 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3239 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3240 }
3241
3242 // Other operations are legal.
3243 return Op;
3244}
3245
3247 switch (Opc) {
3248 case ISD::FROUNDEVEN:
3250 case ISD::VP_FROUNDEVEN:
3251 return RISCVFPRndMode::RNE;
3252 case ISD::FTRUNC:
3253 case ISD::STRICT_FTRUNC:
3254 case ISD::VP_FROUNDTOZERO:
3255 return RISCVFPRndMode::RTZ;
3256 case ISD::FFLOOR:
3257 case ISD::STRICT_FFLOOR:
3258 case ISD::VP_FFLOOR:
3259 return RISCVFPRndMode::RDN;
3260 case ISD::FCEIL:
3261 case ISD::STRICT_FCEIL:
3262 case ISD::VP_FCEIL:
3263 return RISCVFPRndMode::RUP;
3264 case ISD::FROUND:
3265 case ISD::LROUND:
3266 case ISD::LLROUND:
3267 case ISD::STRICT_FROUND:
3268 case ISD::STRICT_LROUND:
3270 case ISD::VP_FROUND:
3271 return RISCVFPRndMode::RMM;
3272 case ISD::FRINT:
3273 case ISD::LRINT:
3274 case ISD::LLRINT:
3275 case ISD::STRICT_FRINT:
3276 case ISD::STRICT_LRINT:
3277 case ISD::STRICT_LLRINT:
3278 case ISD::VP_FRINT:
3279 case ISD::VP_LRINT:
3280 case ISD::VP_LLRINT:
3281 return RISCVFPRndMode::DYN;
3282 }
3283
3285}
3286
3287// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3288// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3289// the integer domain and back. Taking care to avoid converting values that are
3290// nan or already correct.
3291static SDValue
3293 const RISCVSubtarget &Subtarget) {
3294 MVT VT = Op.getSimpleValueType();
3295 assert(VT.isVector() && "Unexpected type");
3296
3297 SDLoc DL(Op);
3298
3299 SDValue Src = Op.getOperand(0);
3300
3301 // Freeze the source since we are increasing the number of uses.
3302 Src = DAG.getFreeze(Src);
3303
3304 MVT ContainerVT = VT;
3305 if (VT.isFixedLengthVector()) {
3306 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3307 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3308 }
3309
3310 SDValue Mask, VL;
3311 if (Op->isVPOpcode()) {
3312 Mask = Op.getOperand(1);
3313 if (VT.isFixedLengthVector())
3314 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3315 Subtarget);
3316 VL = Op.getOperand(2);
3317 } else {
3318 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3319 }
3320
3321 // We do the conversion on the absolute value and fix the sign at the end.
3322 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3323
3324 // Determine the largest integer that can be represented exactly. This and
3325 // values larger than it don't have any fractional bits so don't need to
3326 // be converted.
3327 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3328 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3329 APFloat MaxVal = APFloat(FltSem);
3330 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3331 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3332 SDValue MaxValNode =
3333 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3334 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3335 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3336
3337 // If abs(Src) was larger than MaxVal or nan, keep it.
3338 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3339 Mask =
3340 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3341 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3342 Mask, Mask, VL});
3343
3344 // Truncate to integer and convert back to FP.
3345 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3346 MVT XLenVT = Subtarget.getXLenVT();
3347 SDValue Truncated;
3348
3349 switch (Op.getOpcode()) {
3350 default:
3351 llvm_unreachable("Unexpected opcode");
3352 case ISD::FRINT:
3353 case ISD::VP_FRINT:
3354 case ISD::FCEIL:
3355 case ISD::VP_FCEIL:
3356 case ISD::FFLOOR:
3357 case ISD::VP_FFLOOR:
3358 case ISD::FROUND:
3359 case ISD::FROUNDEVEN:
3360 case ISD::VP_FROUND:
3361 case ISD::VP_FROUNDEVEN:
3362 case ISD::VP_FROUNDTOZERO: {
3365 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3366 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3367 break;
3368 }
3369 case ISD::FTRUNC:
3370 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3371 Mask, VL);
3372 break;
3373 case ISD::FNEARBYINT:
3374 case ISD::VP_FNEARBYINT:
3375 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3376 Mask, VL);
3377 break;
3378 }
3379
3380 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3381 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3382 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3383 Mask, VL);
3384
3385 // Restore the original sign so that -0.0 is preserved.
3386 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3387 Src, Src, Mask, VL);
3388
3389 if (!VT.isFixedLengthVector())
3390 return Truncated;
3391
3392 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3393}
3394
3395// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3396// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3397// qNan and converting the new source to integer and back to FP.
3398static SDValue
3400 const RISCVSubtarget &Subtarget) {
3401 SDLoc DL(Op);
3402 MVT VT = Op.getSimpleValueType();
3403 SDValue Chain = Op.getOperand(0);
3404 SDValue Src = Op.getOperand(1);
3405
3406 MVT ContainerVT = VT;
3407 if (VT.isFixedLengthVector()) {
3408 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3409 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3410 }
3411
3412 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3413
3414 // Freeze the source since we are increasing the number of uses.
3415 Src = DAG.getFreeze(Src);
3416
3417 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3418 MVT MaskVT = Mask.getSimpleValueType();
3419 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3420 DAG.getVTList(MaskVT, MVT::Other),
3421 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3422 DAG.getUNDEF(MaskVT), Mask, VL});
3423 Chain = Unorder.getValue(1);
3424 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3425 DAG.getVTList(ContainerVT, MVT::Other),
3426 {Chain, Src, Src, Src, Unorder, VL});
3427 Chain = Src.getValue(1);
3428
3429 // We do the conversion on the absolute value and fix the sign at the end.
3430 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3431
3432 // Determine the largest integer that can be represented exactly. This and
3433 // values larger than it don't have any fractional bits so don't need to
3434 // be converted.
3435 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3436 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3437 APFloat MaxVal = APFloat(FltSem);
3438 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3439 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3440 SDValue MaxValNode =
3441 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3442 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3443 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3444
3445 // If abs(Src) was larger than MaxVal or nan, keep it.
3446 Mask = DAG.getNode(
3447 RISCVISD::SETCC_VL, DL, MaskVT,
3448 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3449
3450 // Truncate to integer and convert back to FP.
3451 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3452 MVT XLenVT = Subtarget.getXLenVT();
3453 SDValue Truncated;
3454
3455 switch (Op.getOpcode()) {
3456 default:
3457 llvm_unreachable("Unexpected opcode");
3458 case ISD::STRICT_FCEIL:
3459 case ISD::STRICT_FFLOOR:
3460 case ISD::STRICT_FROUND:
3464 Truncated = DAG.getNode(
3465 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3466 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3467 break;
3468 }
3469 case ISD::STRICT_FTRUNC:
3470 Truncated =
3471 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3472 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3473 break;
3475 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3476 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3477 Mask, VL);
3478 break;
3479 }
3480 Chain = Truncated.getValue(1);
3481
3482 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3483 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3484 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3485 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3486 Truncated, Mask, VL);
3487 Chain = Truncated.getValue(1);
3488 }
3489
3490 // Restore the original sign so that -0.0 is preserved.
3491 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3492 Src, Src, Mask, VL);
3493
3494 if (VT.isFixedLengthVector())
3495 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3496 return DAG.getMergeValues({Truncated, Chain}, DL);
3497}
3498
3499static SDValue
3501 const RISCVSubtarget &Subtarget) {
3502 MVT VT = Op.getSimpleValueType();
3503 if (VT.isVector())
3504 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3505
3506 if (DAG.shouldOptForSize())
3507 return SDValue();
3508
3509 SDLoc DL(Op);
3510 SDValue Src = Op.getOperand(0);
3511
3512 // Create an integer the size of the mantissa with the MSB set. This and all
3513 // values larger than it don't have any fractional bits so don't need to be
3514 // converted.
3515 const fltSemantics &FltSem = VT.getFltSemantics();
3516 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3517 APFloat MaxVal = APFloat(FltSem);
3518 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3519 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3520 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3521
3523 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3524 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3525}
3526
3527// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3529 const RISCVSubtarget &Subtarget) {
3530 SDLoc DL(Op);
3531 MVT DstVT = Op.getSimpleValueType();
3532 SDValue Src = Op.getOperand(0);
3533 MVT SrcVT = Src.getSimpleValueType();
3534 assert(SrcVT.isVector() && DstVT.isVector() &&
3535 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3536 "Unexpected type");
3537
3538 MVT DstContainerVT = DstVT;
3539 MVT SrcContainerVT = SrcVT;
3540
3541 if (DstVT.isFixedLengthVector()) {
3542 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3543 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3544 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3545 }
3546
3547 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3548
3549 // [b]f16 -> f32
3550 MVT SrcElemType = SrcVT.getVectorElementType();
3551 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3552 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3553 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3554 }
3555
3556 SDValue Res =
3557 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3558 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3559 Subtarget.getXLenVT()),
3560 VL);
3561
3562 if (!DstVT.isFixedLengthVector())
3563 return Res;
3564
3565 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3566}
3567
3568static SDValue
3570 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3571 SDValue Offset, SDValue Mask, SDValue VL,
3573 if (Passthru.isUndef())
3575 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3576 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3577 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3578}
3579
3580static SDValue
3581getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3582 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3583 SDValue VL,
3585 if (Passthru.isUndef())
3587 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3588 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3589 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3590}
3591
3595 int64_t Addend;
3596};
3597
3598static std::optional<APInt> getExactInteger(const APFloat &APF,
3600 // We will use a SINT_TO_FP to materialize this constant so we should use a
3601 // signed APSInt here.
3602 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3603 // We use an arbitrary rounding mode here. If a floating-point is an exact
3604 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3605 // the rounding mode changes the output value, then it is not an exact
3606 // integer.
3608 bool IsExact;
3609 // If it is out of signed integer range, it will return an invalid operation.
3610 // If it is not an exact integer, IsExact is false.
3611 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3613 !IsExact)
3614 return std::nullopt;
3615 return ValInt.extractBits(BitWidth, 0);
3616}
3617
3618// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3619// to the (non-zero) step S and start value X. This can be then lowered as the
3620// RVV sequence (VID * S) + X, for example.
3621// The step S is represented as an integer numerator divided by a positive
3622// denominator. Note that the implementation currently only identifies
3623// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3624// cannot detect 2/3, for example.
3625// Note that this method will also match potentially unappealing index
3626// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3627// determine whether this is worth generating code for.
3628//
3629// EltSizeInBits is the size of the type that the sequence will be calculated
3630// in, i.e. SEW for build_vectors or XLEN for address calculations.
3631static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3632 unsigned EltSizeInBits) {
3633 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3634 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3635 return std::nullopt;
3636 bool IsInteger = Op.getValueType().isInteger();
3637
3638 std::optional<unsigned> SeqStepDenom;
3639 std::optional<APInt> SeqStepNum;
3640 std::optional<APInt> SeqAddend;
3641 std::optional<std::pair<APInt, unsigned>> PrevElt;
3642 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3643
3644 // First extract the ops into a list of constant integer values. This may not
3645 // be possible for floats if they're not all representable as integers.
3647 const unsigned OpSize = Op.getScalarValueSizeInBits();
3648 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3649 if (Elt.isUndef()) {
3650 Elts[Idx] = std::nullopt;
3651 continue;
3652 }
3653 if (IsInteger) {
3654 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3655 } else {
3656 auto ExactInteger =
3657 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3658 if (!ExactInteger)
3659 return std::nullopt;
3660 Elts[Idx] = *ExactInteger;
3661 }
3662 }
3663
3664 for (auto [Idx, Elt] : enumerate(Elts)) {
3665 // Assume undef elements match the sequence; we just have to be careful
3666 // when interpolating across them.
3667 if (!Elt)
3668 continue;
3669
3670 if (PrevElt) {
3671 // Calculate the step since the last non-undef element, and ensure
3672 // it's consistent across the entire sequence.
3673 unsigned IdxDiff = Idx - PrevElt->second;
3674 APInt ValDiff = *Elt - PrevElt->first;
3675
3676 // A zero-value value difference means that we're somewhere in the middle
3677 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3678 // step change before evaluating the sequence.
3679 if (ValDiff == 0)
3680 continue;
3681
3682 int64_t Remainder = ValDiff.srem(IdxDiff);
3683 // Normalize the step if it's greater than 1.
3684 if (Remainder != ValDiff.getSExtValue()) {
3685 // The difference must cleanly divide the element span.
3686 if (Remainder != 0)
3687 return std::nullopt;
3688 ValDiff = ValDiff.sdiv(IdxDiff);
3689 IdxDiff = 1;
3690 }
3691
3692 if (!SeqStepNum)
3693 SeqStepNum = ValDiff;
3694 else if (ValDiff != SeqStepNum)
3695 return std::nullopt;
3696
3697 if (!SeqStepDenom)
3698 SeqStepDenom = IdxDiff;
3699 else if (IdxDiff != *SeqStepDenom)
3700 return std::nullopt;
3701 }
3702
3703 // Record this non-undef element for later.
3704 if (!PrevElt || PrevElt->first != *Elt)
3705 PrevElt = std::make_pair(*Elt, Idx);
3706 }
3707
3708 // We need to have logged a step for this to count as a legal index sequence.
3709 if (!SeqStepNum || !SeqStepDenom)
3710 return std::nullopt;
3711
3712 // Loop back through the sequence and validate elements we might have skipped
3713 // while waiting for a valid step. While doing this, log any sequence addend.
3714 for (auto [Idx, Elt] : enumerate(Elts)) {
3715 if (!Elt)
3716 continue;
3717 APInt ExpectedVal =
3718 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3719 *SeqStepNum)
3720 .sdiv(*SeqStepDenom);
3721
3722 APInt Addend = *Elt - ExpectedVal;
3723 if (!SeqAddend)
3724 SeqAddend = Addend;
3725 else if (Addend != SeqAddend)
3726 return std::nullopt;
3727 }
3728
3729 assert(SeqAddend && "Must have an addend if we have a step");
3730
3731 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3732 SeqAddend->getSExtValue()};
3733}
3734
3735// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3736// and lower it as a VRGATHER_VX_VL from the source vector.
3737static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3738 SelectionDAG &DAG,
3739 const RISCVSubtarget &Subtarget) {
3740 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3741 return SDValue();
3742 SDValue Src = SplatVal.getOperand(0);
3743 // Don't perform this optimization for i1 vectors, or if the element types are
3744 // different
3745 // FIXME: Support i1 vectors, maybe by promoting to i8?
3746 MVT EltTy = VT.getVectorElementType();
3747 MVT SrcVT = Src.getSimpleValueType();
3748 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() ||
3749 !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
3750 return SDValue();
3751 SDValue Idx = SplatVal.getOperand(1);
3752 // The index must be a legal type.
3753 if (Idx.getValueType() != Subtarget.getXLenVT())
3754 return SDValue();
3755
3756 // Check that we know Idx lies within VT
3757 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3758 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3759 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3760 return SDValue();
3761 }
3762
3763 // Convert fixed length vectors to scalable
3764 MVT ContainerVT = VT;
3765 if (VT.isFixedLengthVector())
3766 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3767
3768 MVT SrcContainerVT = SrcVT;
3769 if (SrcVT.isFixedLengthVector()) {
3770 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3771 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3772 }
3773
3774 // Put Vec in a VT sized vector
3775 if (SrcContainerVT.getVectorMinNumElements() <
3776 ContainerVT.getVectorMinNumElements())
3777 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3778 else
3779 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3780
3781 // We checked that Idx fits inside VT earlier
3782 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3783 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3784 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3785 if (VT.isFixedLengthVector())
3786 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3787 return Gather;
3788}
3789
3791 const RISCVSubtarget &Subtarget) {
3792 MVT VT = Op.getSimpleValueType();
3793 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3794
3795 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3796
3797 SDLoc DL(Op);
3798 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3799
3800 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3801 int64_t StepNumerator = SimpleVID->StepNumerator;
3802 unsigned StepDenominator = SimpleVID->StepDenominator;
3803 int64_t Addend = SimpleVID->Addend;
3804
3805 assert(StepNumerator != 0 && "Invalid step");
3806 bool Negate = false;
3807 int64_t SplatStepVal = StepNumerator;
3808 unsigned StepOpcode = ISD::MUL;
3809 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3810 // anyway as the shift of 63 won't fit in uimm5.
3811 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3812 isPowerOf2_64(std::abs(StepNumerator))) {
3813 Negate = StepNumerator < 0;
3814 StepOpcode = ISD::SHL;
3815 SplatStepVal = Log2_64(std::abs(StepNumerator));
3816 }
3817
3818 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3819 // since it's the immediate value many RVV instructions accept. There is
3820 // no vmul.vi instruction so ensure multiply constant can fit in a
3821 // single addi instruction. For the addend, we allow up to 32 bits..
3822 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3823 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3824 isPowerOf2_32(StepDenominator) &&
3825 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3826 MVT VIDVT =
3828 MVT VIDContainerVT =
3829 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3830 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3831 // Convert right out of the scalable type so we can use standard ISD
3832 // nodes for the rest of the computation. If we used scalable types with
3833 // these, we'd lose the fixed-length vector info and generate worse
3834 // vsetvli code.
3835 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3836 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3837 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3838 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3839 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3840 }
3841 if (StepDenominator != 1) {
3842 SDValue SplatStep =
3843 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3844 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3845 }
3846 if (Addend != 0 || Negate) {
3847 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3848 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3849 VID);
3850 }
3851 if (VT.isFloatingPoint()) {
3852 // TODO: Use vfwcvt to reduce register pressure.
3853 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3854 }
3855 return VID;
3856 }
3857 }
3858
3859 return SDValue();
3860}
3861
3862/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3863/// which constitute a large proportion of the elements. In such cases we can
3864/// splat a vector with the dominant element and make up the shortfall with
3865/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3866/// Note that this includes vectors of 2 elements by association. The
3867/// upper-most element is the "dominant" one, allowing us to use a splat to
3868/// "insert" the upper element, and an insert of the lower element at position
3869/// 0, which improves codegen.
3871 const RISCVSubtarget &Subtarget) {
3872 MVT VT = Op.getSimpleValueType();
3873 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3874
3875 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3876
3877 SDLoc DL(Op);
3878 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3879
3880 MVT XLenVT = Subtarget.getXLenVT();
3881 unsigned NumElts = Op.getNumOperands();
3882
3883 SDValue DominantValue;
3884 unsigned MostCommonCount = 0;
3885 DenseMap<SDValue, unsigned> ValueCounts;
3886 unsigned NumUndefElts =
3887 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3888
3889 // Track the number of scalar loads we know we'd be inserting, estimated as
3890 // any non-zero floating-point constant. Other kinds of element are either
3891 // already in registers or are materialized on demand. The threshold at which
3892 // a vector load is more desirable than several scalar materializion and
3893 // vector-insertion instructions is not known.
3894 unsigned NumScalarLoads = 0;
3895
3896 for (SDValue V : Op->op_values()) {
3897 if (V.isUndef())
3898 continue;
3899
3900 unsigned &Count = ValueCounts[V];
3901 if (0 == Count)
3902 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3903 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3904
3905 // Is this value dominant? In case of a tie, prefer the highest element as
3906 // it's cheaper to insert near the beginning of a vector than it is at the
3907 // end.
3908 if (++Count >= MostCommonCount) {
3909 DominantValue = V;
3910 MostCommonCount = Count;
3911 }
3912 }
3913
3914 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3915 unsigned NumDefElts = NumElts - NumUndefElts;
3916 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3917
3918 // Don't perform this optimization when optimizing for size, since
3919 // materializing elements and inserting them tends to cause code bloat.
3920 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3921 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3922 ((MostCommonCount > DominantValueCountThreshold) ||
3923 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3924 // Start by splatting the most common element.
3925 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3926
3927 DenseSet<SDValue> Processed{DominantValue};
3928
3929 // We can handle an insert into the last element (of a splat) via
3930 // v(f)slide1down. This is slightly better than the vslideup insert
3931 // lowering as it avoids the need for a vector group temporary. It
3932 // is also better than using vmerge.vx as it avoids the need to
3933 // materialize the mask in a vector register.
3934 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3935 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3936 LastOp != DominantValue) {
3937 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3938 auto OpCode =
3939 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3940 if (!VT.isFloatingPoint())
3941 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3942 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3943 LastOp, Mask, VL);
3944 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3945 Processed.insert(LastOp);
3946 }
3947
3948 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3949 for (const auto &OpIdx : enumerate(Op->ops())) {
3950 const SDValue &V = OpIdx.value();
3951 if (V.isUndef() || !Processed.insert(V).second)
3952 continue;
3953 if (ValueCounts[V] == 1) {
3954 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3955 } else {
3956 // Blend in all instances of this value using a VSELECT, using a
3957 // mask where each bit signals whether that element is the one
3958 // we're after.
3960 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3961 return DAG.getConstant(V == V1, DL, XLenVT);
3962 });
3963 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3964 DAG.getBuildVector(SelMaskTy, DL, Ops),
3965 DAG.getSplatBuildVector(VT, DL, V), Vec);
3966 }
3967 }
3968
3969 return Vec;
3970 }
3971
3972 return SDValue();
3973}
3974
3976 const RISCVSubtarget &Subtarget) {
3977 MVT VT = Op.getSimpleValueType();
3978 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3979
3980 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3981
3982 SDLoc DL(Op);
3983 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3984
3985 MVT XLenVT = Subtarget.getXLenVT();
3986 unsigned NumElts = Op.getNumOperands();
3987
3988 if (VT.getVectorElementType() == MVT::i1) {
3989 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3990 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3991 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3992 }
3993
3994 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3995 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3996 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3997 }
3998
3999 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4000 // scalar integer chunks whose bit-width depends on the number of mask
4001 // bits and XLEN.
4002 // First, determine the most appropriate scalar integer type to use. This
4003 // is at most XLenVT, but may be shrunk to a smaller vector element type
4004 // according to the size of the final vector - use i8 chunks rather than
4005 // XLenVT if we're producing a v8i1. This results in more consistent
4006 // codegen across RV32 and RV64.
4007 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4008 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4009 // If we have to use more than one INSERT_VECTOR_ELT then this
4010 // optimization is likely to increase code size; avoid performing it in
4011 // such a case. We can use a load from a constant pool in this case.
4012 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4013 return SDValue();
4014 // Now we can create our integer vector type. Note that it may be larger
4015 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4016 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4017 MVT IntegerViaVecVT =
4018 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4019 IntegerViaVecElts);
4020
4021 uint64_t Bits = 0;
4022 unsigned BitPos = 0, IntegerEltIdx = 0;
4023 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4024
4025 for (unsigned I = 0; I < NumElts;) {
4026 SDValue V = Op.getOperand(I);
4027 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4028 Bits |= ((uint64_t)BitValue << BitPos);
4029 ++BitPos;
4030 ++I;
4031
4032 // Once we accumulate enough bits to fill our scalar type or process the
4033 // last element, insert into our vector and clear our accumulated data.
4034 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4035 if (NumViaIntegerBits <= 32)
4036 Bits = SignExtend64<32>(Bits);
4037 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4038 Elts[IntegerEltIdx] = Elt;
4039 Bits = 0;
4040 BitPos = 0;
4041 IntegerEltIdx++;
4042 }
4043 }
4044
4045 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4046
4047 if (NumElts < NumViaIntegerBits) {
4048 // If we're producing a smaller vector than our minimum legal integer
4049 // type, bitcast to the equivalent (known-legal) mask type, and extract
4050 // our final mask.
4051 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4052 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4053 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4054 } else {
4055 // Else we must have produced an integer type with the same size as the
4056 // mask type; bitcast for the final result.
4057 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4058 Vec = DAG.getBitcast(VT, Vec);
4059 }
4060
4061 return Vec;
4062 }
4063
4064 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4065 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4066 : RISCVISD::VMV_V_X_VL;
4067 if (!VT.isFloatingPoint())
4068 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4069 Splat =
4070 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4071 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4072 }
4073
4074 // Try and match index sequences, which we can lower to the vid instruction
4075 // with optional modifications. An all-undef vector is matched by
4076 // getSplatValue, above.
4077 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4078 return Res;
4079
4080 // For very small build_vectors, use a single scalar insert of a constant.
4081 // TODO: Base this on constant rematerialization cost, not size.
4082 const unsigned EltBitSize = VT.getScalarSizeInBits();
4083 if (VT.getSizeInBits() <= 32 &&
4085 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4086 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4087 "Unexpected sequence type");
4088 // If we can use the original VL with the modified element type, this
4089 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4090 // be moved into InsertVSETVLI?
4091 unsigned ViaVecLen =
4092 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4093 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4094
4095 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4096 uint64_t SplatValue = 0;
4097 // Construct the amalgamated value at this larger vector type.
4098 for (const auto &OpIdx : enumerate(Op->op_values())) {
4099 const auto &SeqV = OpIdx.value();
4100 if (!SeqV.isUndef())
4101 SplatValue |=
4102 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4103 }
4104
4105 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4106 // achieve better constant materializion.
4107 // On RV32, we need to sign-extend to use getSignedConstant.
4108 if (ViaIntVT == MVT::i32)
4109 SplatValue = SignExtend64<32>(SplatValue);
4110
4111 SDValue Vec = DAG.getInsertVectorElt(
4112 DL, DAG.getUNDEF(ViaVecVT),
4113 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4114 if (ViaVecLen != 1)
4115 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4116 return DAG.getBitcast(VT, Vec);
4117 }
4118
4119
4120 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4121 // when re-interpreted as a vector with a larger element type. For example,
4122 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4123 // could be instead splat as
4124 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4125 // TODO: This optimization could also work on non-constant splats, but it
4126 // would require bit-manipulation instructions to construct the splat value.
4127 SmallVector<SDValue> Sequence;
4128 const auto *BV = cast<BuildVectorSDNode>(Op);
4129 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4131 BV->getRepeatedSequence(Sequence) &&
4132 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4133 unsigned SeqLen = Sequence.size();
4134 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4135 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4136 ViaIntVT == MVT::i64) &&
4137 "Unexpected sequence type");
4138
4139 // If we can use the original VL with the modified element type, this
4140 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4141 // be moved into InsertVSETVLI?
4142 const unsigned RequiredVL = NumElts / SeqLen;
4143 const unsigned ViaVecLen =
4144 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4145 NumElts : RequiredVL;
4146 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4147
4148 unsigned EltIdx = 0;
4149 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4150 uint64_t SplatValue = 0;
4151 // Construct the amalgamated value which can be splatted as this larger
4152 // vector type.
4153 for (const auto &SeqV : Sequence) {
4154 if (!SeqV.isUndef())
4155 SplatValue |=
4156 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4157 EltIdx++;
4158 }
4159
4160 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4161 // achieve better constant materializion.
4162 // On RV32, we need to sign-extend to use getSignedConstant.
4163 if (ViaIntVT == MVT::i32)
4164 SplatValue = SignExtend64<32>(SplatValue);
4165
4166 // Since we can't introduce illegal i64 types at this stage, we can only
4167 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4168 // way we can use RVV instructions to splat.
4169 assert((ViaIntVT.bitsLE(XLenVT) ||
4170 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4171 "Unexpected bitcast sequence");
4172 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4173 SDValue ViaVL =
4174 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4175 MVT ViaContainerVT =
4176 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4177 SDValue Splat =
4178 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4179 DAG.getUNDEF(ViaContainerVT),
4180 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4181 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4182 if (ViaVecLen != RequiredVL)
4184 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4185 return DAG.getBitcast(VT, Splat);
4186 }
4187 }
4188
4189 // If the number of signbits allows, see if we can lower as a <N x i8>.
4190 // Our main goal here is to reduce LMUL (and thus work) required to
4191 // build the constant, but we will also narrow if the resulting
4192 // narrow vector is known to materialize cheaply.
4193 // TODO: We really should be costing the smaller vector. There are
4194 // profitable cases this misses.
4195 if (EltBitSize > 8 && VT.isInteger() &&
4196 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4197 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4198 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4199 DL, Op->ops());
4200 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4201 Source, DAG, Subtarget);
4202 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4203 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4204 }
4205
4206 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4207 return Res;
4208
4209 // For constant vectors, use generic constant pool lowering. Otherwise,
4210 // we'd have to materialize constants in GPRs just to move them into the
4211 // vector.
4212 return SDValue();
4213}
4214
4215static unsigned getPACKOpcode(unsigned DestBW,
4216 const RISCVSubtarget &Subtarget) {
4217 switch (DestBW) {
4218 default:
4219 llvm_unreachable("Unsupported pack size");
4220 case 16:
4221 return RISCV::PACKH;
4222 case 32:
4223 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4224 case 64:
4225 assert(Subtarget.is64Bit());
4226 return RISCV::PACK;
4227 }
4228}
4229
4230/// Double the element size of the build vector to reduce the number
4231/// of vslide1down in the build vector chain. In the worst case, this
4232/// trades three scalar operations for 1 vector operation. Scalar
4233/// operations are generally lower latency, and for out-of-order cores
4234/// we also benefit from additional parallelism.
4236 const RISCVSubtarget &Subtarget) {
4237 SDLoc DL(Op);
4238 MVT VT = Op.getSimpleValueType();
4239 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4240 MVT ElemVT = VT.getVectorElementType();
4241 if (!ElemVT.isInteger())
4242 return SDValue();
4243
4244 // TODO: Relax these architectural restrictions, possibly with costing
4245 // of the actual instructions required.
4246 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4247 return SDValue();
4248
4249 unsigned NumElts = VT.getVectorNumElements();
4250 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4251 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4252 NumElts % 2 != 0)
4253 return SDValue();
4254
4255 // Produce [B,A] packed into a type twice as wide. Note that all
4256 // scalars are XLenVT, possibly masked (see below).
4257 MVT XLenVT = Subtarget.getXLenVT();
4258 SDValue Mask = DAG.getConstant(
4259 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4260 auto pack = [&](SDValue A, SDValue B) {
4261 // Bias the scheduling of the inserted operations to near the
4262 // definition of the element - this tends to reduce register
4263 // pressure overall.
4264 SDLoc ElemDL(B);
4265 if (Subtarget.hasStdExtZbkb())
4266 // Note that we're relying on the high bits of the result being
4267 // don't care. For PACKW, the result is *sign* extended.
4268 return SDValue(
4269 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4270 ElemDL, XLenVT, A, B),
4271 0);
4272
4273 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4274 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4275 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4276 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4277 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4279 };
4280
4281 SmallVector<SDValue> NewOperands;
4282 NewOperands.reserve(NumElts / 2);
4283 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4284 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4285 assert(NumElts == NewOperands.size() * 2);
4286 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4287 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4288 return DAG.getNode(ISD::BITCAST, DL, VT,
4289 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4290}
4291
4293 const RISCVSubtarget &Subtarget) {
4294 MVT VT = Op.getSimpleValueType();
4295 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4296
4297 MVT EltVT = VT.getVectorElementType();
4298 MVT XLenVT = Subtarget.getXLenVT();
4299
4300 SDLoc DL(Op);
4301
4302 // Proper support for f16 requires Zvfh. bf16 always requires special
4303 // handling. We need to cast the scalar to integer and create an integer
4304 // build_vector.
4305 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4306 MVT IVT = VT.changeVectorElementType(MVT::i16);
4308 for (const auto &[I, U] : enumerate(Op->ops())) {
4309 SDValue Elem = U.get();
4310 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4311 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4312 // Called by LegalizeDAG, we need to use XLenVT operations since we
4313 // can't create illegal types.
4314 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4315 // Manually constant fold so the integer build_vector can be lowered
4316 // better. Waiting for DAGCombine will be too late.
4317 APInt V =
4318 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4319 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4320 } else {
4321 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4322 }
4323 } else {
4324 // Called by scalar type legalizer, we can use i16.
4325 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4326 }
4327 }
4328 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4329 return DAG.getBitcast(VT, Res);
4330 }
4331
4332 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4334 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4335
4336 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4337
4338 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4339
4340 if (VT.getVectorElementType() == MVT::i1) {
4341 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4342 // vector type, we have a legal equivalently-sized i8 type, so we can use
4343 // that.
4344 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4345 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4346
4347 SDValue WideVec;
4348 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4349 // For a splat, perform a scalar truncate before creating the wider
4350 // vector.
4351 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4352 DAG.getConstant(1, DL, Splat.getValueType()));
4353 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4354 } else {
4355 SmallVector<SDValue, 8> Ops(Op->op_values());
4356 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4357 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4358 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4359 }
4360
4361 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4362 }
4363
4364 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4365 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4366 return Gather;
4367
4368 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4369 // pressure at high LMUL.
4370 if (all_of(Op->ops().drop_front(),
4371 [](const SDUse &U) { return U.get().isUndef(); })) {
4372 unsigned Opc =
4373 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4374 if (!VT.isFloatingPoint())
4375 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4376 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4377 Splat, VL);
4378 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4379 }
4380
4381 unsigned Opc =
4382 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4383 if (!VT.isFloatingPoint())
4384 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4385 Splat =
4386 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4387 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4388 }
4389
4390 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4391 return Res;
4392
4393 // If we're compiling for an exact VLEN value, we can split our work per
4394 // register in the register group.
4395 if (const auto VLen = Subtarget.getRealVLen();
4396 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4397 MVT ElemVT = VT.getVectorElementType();
4398 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4399 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4400 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4401 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4402 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4403
4404 // The following semantically builds up a fixed length concat_vector
4405 // of the component build_vectors. We eagerly lower to scalable and
4406 // insert_subvector here to avoid DAG combining it back to a large
4407 // build_vector.
4408 SmallVector<SDValue> BuildVectorOps(Op->ops());
4409 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4410 SDValue Vec = DAG.getUNDEF(ContainerVT);
4411 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4412 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4413 SDValue SubBV =
4414 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4415 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4416 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4417 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4418 }
4419 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4420 }
4421
4422 // If we're about to resort to vslide1down (or stack usage), pack our
4423 // elements into the widest scalar type we can. This will force a VL/VTYPE
4424 // toggle, but reduces the critical path, the number of vslide1down ops
4425 // required, and possibly enables scalar folds of the values.
4426 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4427 return Res;
4428
4429 // For m1 vectors, if we have non-undef values in both halves of our vector,
4430 // split the vector into low and high halves, build them separately, then
4431 // use a vselect to combine them. For long vectors, this cuts the critical
4432 // path of the vslide1down sequence in half, and gives us an opportunity
4433 // to special case each half independently. Note that we don't change the
4434 // length of the sub-vectors here, so if both fallback to the generic
4435 // vslide1down path, we should be able to fold the vselect into the final
4436 // vslidedown (for the undef tail) for the first half w/ masking.
4437 unsigned NumElts = VT.getVectorNumElements();
4438 unsigned NumUndefElts =
4439 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4440 unsigned NumDefElts = NumElts - NumUndefElts;
4441 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4442 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4443 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4444 SmallVector<SDValue> MaskVals;
4445 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4446 SubVecAOps.reserve(NumElts);
4447 SubVecBOps.reserve(NumElts);
4448 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4449 SDValue Elem = U.get();
4450 if (Idx < NumElts / 2) {
4451 SubVecAOps.push_back(Elem);
4452 SubVecBOps.push_back(UndefElem);
4453 } else {
4454 SubVecAOps.push_back(UndefElem);
4455 SubVecBOps.push_back(Elem);
4456 }
4457 bool SelectMaskVal = (Idx < NumElts / 2);
4458 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4459 }
4460 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4461 MaskVals.size() == NumElts);
4462
4463 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4464 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4465 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4466 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4467 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4468 }
4469
4470 // Cap the cost at a value linear to the number of elements in the vector.
4471 // The default lowering is to use the stack. The vector store + scalar loads
4472 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4473 // being (at least) linear in LMUL. As a result, using the vslidedown
4474 // lowering for every element ends up being VL*LMUL..
4475 // TODO: Should we be directly costing the stack alternative? Doing so might
4476 // give us a more accurate upper bound.
4477 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4478
4479 // TODO: unify with TTI getSlideCost.
4480 InstructionCost PerSlideCost = 1;
4481 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4482 default: break;
4483 case RISCVVType::LMUL_2:
4484 PerSlideCost = 2;
4485 break;
4486 case RISCVVType::LMUL_4:
4487 PerSlideCost = 4;
4488 break;
4489 case RISCVVType::LMUL_8:
4490 PerSlideCost = 8;
4491 break;
4492 }
4493
4494 // TODO: Should we be using the build instseq then cost + evaluate scheme
4495 // we use for integer constants here?
4496 unsigned UndefCount = 0;
4497 for (const SDValue &V : Op->ops()) {
4498 if (V.isUndef()) {
4499 UndefCount++;
4500 continue;
4501 }
4502 if (UndefCount) {
4503 LinearBudget -= PerSlideCost;
4504 UndefCount = 0;
4505 }
4506 LinearBudget -= PerSlideCost;
4507 }
4508 if (UndefCount) {
4509 LinearBudget -= PerSlideCost;
4510 }
4511
4512 if (LinearBudget < 0)
4513 return SDValue();
4514
4515 assert((!VT.isFloatingPoint() ||
4516 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4517 "Illegal type which will result in reserved encoding");
4518
4519 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4520
4521 SDValue Vec;
4522 UndefCount = 0;
4523 for (SDValue V : Op->ops()) {
4524 if (V.isUndef()) {
4525 UndefCount++;
4526 continue;
4527 }
4528
4529 // Start our sequence with a TA splat in the hopes that hardware is able to
4530 // recognize there's no dependency on the prior value of our temporary
4531 // register.
4532 if (!Vec) {
4533 Vec = DAG.getSplatVector(VT, DL, V);
4534 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4535 UndefCount = 0;
4536 continue;
4537 }
4538
4539 if (UndefCount) {
4540 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4541 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4542 Vec, Offset, Mask, VL, Policy);
4543 UndefCount = 0;
4544 }
4545 auto OpCode =
4546 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4547 if (!VT.isFloatingPoint())
4548 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4549 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4550 V, Mask, VL);
4551 }
4552 if (UndefCount) {
4553 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4554 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4555 Vec, Offset, Mask, VL, Policy);
4556 }
4557 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4558}
4559
4560static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4562 SelectionDAG &DAG) {
4563 if (!Passthru)
4564 Passthru = DAG.getUNDEF(VT);
4565 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4566 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4567 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4568 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4569 // node in order to try and match RVV vector/scalar instructions.
4570 if ((LoC >> 31) == HiC)
4571 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4572
4573 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4574 // VL. This can temporarily increase VL if VL less than VLMAX.
4575 if (LoC == HiC) {
4576 SDValue NewVL;
4577 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4578 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4579 else
4580 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4581 MVT InterVT =
4582 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4583 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4584 DAG.getUNDEF(InterVT), Lo, NewVL);
4585 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4586 }
4587 }
4588
4589 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4590 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4591 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4592 Hi.getConstantOperandVal(1) == 31)
4593 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4594
4595 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4596 // even if it might be sign extended.
4597 if (Hi.isUndef())
4598 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4599
4600 // Fall back to a stack store and stride x0 vector load.
4601 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4602 Hi, VL);
4603}
4604
4605// Called by type legalization to handle splat of i64 on RV32.
4606// FIXME: We can optimize this when the type has sign or zero bits in one
4607// of the halves.
4608static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4609 SDValue Scalar, SDValue VL,
4610 SelectionDAG &DAG) {
4611 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4612 SDValue Lo, Hi;
4613 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4614 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4615}
4616
4617// This function lowers a splat of a scalar operand Splat with the vector
4618// length VL. It ensures the final sequence is type legal, which is useful when
4619// lowering a splat after type legalization.
4620static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4621 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4622 const RISCVSubtarget &Subtarget) {
4623 bool HasPassthru = Passthru && !Passthru.isUndef();
4624 if (!HasPassthru && !Passthru)
4625 Passthru = DAG.getUNDEF(VT);
4626
4627 MVT EltVT = VT.getVectorElementType();
4628 MVT XLenVT = Subtarget.getXLenVT();
4629
4630 if (VT.isFloatingPoint()) {
4631 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4632 EltVT == MVT::bf16) {
4633 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4634 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4635 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4636 else
4637 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4638 MVT IVT = VT.changeVectorElementType(MVT::i16);
4639 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4640 SDValue Splat =
4641 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4642 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4643 }
4644 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4645 }
4646
4647 // Simplest case is that the operand needs to be promoted to XLenVT.
4648 if (Scalar.getValueType().bitsLE(XLenVT)) {
4649 // If the operand is a constant, sign extend to increase our chances
4650 // of being able to use a .vi instruction. ANY_EXTEND would become a
4651 // a zero extend and the simm5 check in isel would fail.
4652 // FIXME: Should we ignore the upper bits in isel instead?
4653 unsigned ExtOpc =
4654 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4655 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4656 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4657 }
4658
4659 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4660 "Unexpected scalar for splat lowering!");
4661
4662 if (isOneConstant(VL) && isNullConstant(Scalar))
4663 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4664 DAG.getConstant(0, DL, XLenVT), VL);
4665
4666 // Otherwise use the more complicated splatting algorithm.
4667 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4668}
4669
4670// This function lowers an insert of a scalar operand Scalar into lane
4671// 0 of the vector regardless of the value of VL. The contents of the
4672// remaining lanes of the result vector are unspecified. VL is assumed
4673// to be non-zero.
4675 const SDLoc &DL, SelectionDAG &DAG,
4676 const RISCVSubtarget &Subtarget) {
4677 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4678
4679 const MVT XLenVT = Subtarget.getXLenVT();
4680 SDValue Passthru = DAG.getUNDEF(VT);
4681
4682 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4683 isNullConstant(Scalar.getOperand(1))) {
4684 SDValue ExtractedVal = Scalar.getOperand(0);
4685 // The element types must be the same.
4686 if (ExtractedVal.getValueType().getVectorElementType() ==
4687 VT.getVectorElementType()) {
4688 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4689 MVT ExtractedContainerVT = ExtractedVT;
4690 if (ExtractedContainerVT.isFixedLengthVector()) {
4691 ExtractedContainerVT = getContainerForFixedLengthVector(
4692 DAG, ExtractedContainerVT, Subtarget);
4693 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4694 ExtractedVal, DAG, Subtarget);
4695 }
4696 if (ExtractedContainerVT.bitsLE(VT))
4697 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4698 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4699 }
4700 }
4701
4702 if (VT.isFloatingPoint())
4703 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4704 VL);
4705
4706 // Avoid the tricky legalization cases by falling back to using the
4707 // splat code which already handles it gracefully.
4708 if (!Scalar.getValueType().bitsLE(XLenVT))
4709 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4710 DAG.getConstant(1, DL, XLenVT),
4711 VT, DL, DAG, Subtarget);
4712
4713 // If the operand is a constant, sign extend to increase our chances
4714 // of being able to use a .vi instruction. ANY_EXTEND would become a
4715 // a zero extend and the simm5 check in isel would fail.
4716 // FIXME: Should we ignore the upper bits in isel instead?
4717 unsigned ExtOpc =
4718 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4719 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4720 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4721 VL);
4722}
4723
4724/// If concat_vector(V1,V2) could be folded away to some existing
4725/// vector source, return it. Note that the source may be larger
4726/// than the requested concat_vector (i.e. a extract_subvector
4727/// might be required.)
4729 EVT VT = V1.getValueType();
4730 assert(VT == V2.getValueType() && "argument types must match");
4731 // Both input must be extracts.
4732 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4734 return SDValue();
4735
4736 // Extracting from the same source.
4737 SDValue Src = V1.getOperand(0);
4738 if (Src != V2.getOperand(0) ||
4739 VT.isScalableVector() != Src.getValueType().isScalableVector())
4740 return SDValue();
4741
4742 // The extracts must extract the two halves of the source.
4743 if (V1.getConstantOperandVal(1) != 0 ||
4745 return SDValue();
4746
4747 return Src;
4748}
4749
4750// Can this shuffle be performed on exactly one (possibly larger) input?
4752
4753 if (V2.isUndef())
4754 return V1;
4755
4756 unsigned NumElts = VT.getVectorNumElements();
4757 // Src needs to have twice the number of elements.
4758 // TODO: Update shuffle lowering to add the extract subvector
4759 if (SDValue Src = foldConcatVector(V1, V2);
4760 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4761 return Src;
4762
4763 return SDValue();
4764}
4765
4766/// Is this shuffle interleaving contiguous elements from one vector into the
4767/// even elements and contiguous elements from another vector into the odd
4768/// elements. \p EvenSrc will contain the element that should be in the first
4769/// even element. \p OddSrc will contain the element that should be in the first
4770/// odd element. These can be the first element in a source or the element half
4771/// way through the source.
4772static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4773 int &OddSrc, const RISCVSubtarget &Subtarget) {
4774 // We need to be able to widen elements to the next larger integer type or
4775 // use the zip2a instruction at e64.
4776 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4777 !Subtarget.hasVendorXRivosVizip())
4778 return false;
4779
4780 int Size = Mask.size();
4781 int NumElts = VT.getVectorNumElements();
4782 assert(Size == (int)NumElts && "Unexpected mask size");
4783
4784 SmallVector<unsigned, 2> StartIndexes;
4785 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4786 return false;
4787
4788 EvenSrc = StartIndexes[0];
4789 OddSrc = StartIndexes[1];
4790
4791 // One source should be low half of first vector.
4792 if (EvenSrc != 0 && OddSrc != 0)
4793 return false;
4794
4795 // Subvectors will be subtracted from either at the start of the two input
4796 // vectors, or at the start and middle of the first vector if it's an unary
4797 // interleave.
4798 // In both cases, HalfNumElts will be extracted.
4799 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4800 // we'll create an illegal extract_subvector.
4801 // FIXME: We could support other values using a slidedown first.
4802 int HalfNumElts = NumElts / 2;
4803 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4804}
4805
4806/// Is this mask representing a masked combination of two slides?
4808 std::array<std::pair<int, int>, 2> &SrcInfo) {
4809 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4810 return false;
4811
4812 // Avoid matching vselect idioms
4813 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4814 return false;
4815 // Prefer vslideup as the second instruction, and identity
4816 // only as the initial instruction.
4817 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4818 SrcInfo[1].second == 0)
4819 std::swap(SrcInfo[0], SrcInfo[1]);
4820 assert(SrcInfo[0].first != -1 && "Must find one slide");
4821 return true;
4822}
4823
4824// Exactly matches the semantics of a previously existing custom matcher
4825// to allow migration to new matcher without changing output.
4826static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4827 unsigned NumElts) {
4828 if (SrcInfo[1].first == -1)
4829 return true;
4830 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4831 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4832}
4833
4834static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4835 ArrayRef<int> Mask, unsigned Factor,
4836 bool RequiredPolarity) {
4837 int NumElts = Mask.size();
4838 for (const auto &[Idx, M] : enumerate(Mask)) {
4839 if (M < 0)
4840 continue;
4841 int Src = M >= NumElts;
4842 int Diff = (int)Idx - (M % NumElts);
4843 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4844 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4845 "Must match exactly one of the two slides");
4846 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4847 return false;
4848 }
4849 return true;
4850}
4851
4852/// Given a shuffle which can be represented as a pair of two slides,
4853/// see if it is a zipeven idiom. Zipeven is:
4854/// vs2: a0 a1 a2 a3
4855/// vs1: b0 b1 b2 b3
4856/// vd: a0 b0 a2 b2
4857static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4858 ArrayRef<int> Mask, unsigned &Factor) {
4859 Factor = SrcInfo[1].second;
4860 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4861 Mask.size() % Factor == 0 &&
4862 isAlternating(SrcInfo, Mask, Factor, true);
4863}
4864
4865/// Given a shuffle which can be represented as a pair of two slides,
4866/// see if it is a zipodd idiom. Zipodd is:
4867/// vs2: a0 a1 a2 a3
4868/// vs1: b0 b1 b2 b3
4869/// vd: a1 b1 a3 b3
4870/// Note that the operand order is swapped due to the way we canonicalize
4871/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4872static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4873 ArrayRef<int> Mask, unsigned &Factor) {
4874 Factor = -SrcInfo[1].second;
4875 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4876 Mask.size() % Factor == 0 &&
4877 isAlternating(SrcInfo, Mask, Factor, false);
4878}
4879
4880// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4881// 2, 4, 8 and the integer type Factor-times larger than VT's
4882// element type must be a legal element type.
4883// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4884// -> [p, q, r, s] (Factor=2, Index=1)
4886 SDValue Src, unsigned Factor,
4887 unsigned Index, SelectionDAG &DAG) {
4888 unsigned EltBits = VT.getScalarSizeInBits();
4889 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4890 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4891 SrcEC.divideCoefficientBy(Factor));
4892 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4893 SrcEC.divideCoefficientBy(Factor));
4894 Src = DAG.getBitcast(WideSrcVT, Src);
4895
4896 unsigned Shift = Index * EltBits;
4897 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4898 DAG.getConstant(Shift, DL, WideSrcVT));
4899 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4901 Res = DAG.getBitcast(CastVT, Res);
4902 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4903}
4904
4905/// Match a single source shuffle which is an identity except that some
4906/// particular element is repeated. This can be lowered as a masked
4907/// vrgather.vi/vx. Note that the two source form of this is handled
4908/// by the recursive splitting logic and doesn't need special handling.
4910 const RISCVSubtarget &Subtarget,
4911 SelectionDAG &DAG) {
4912
4913 SDLoc DL(SVN);
4914 MVT VT = SVN->getSimpleValueType(0);
4915 SDValue V1 = SVN->getOperand(0);
4916 assert(SVN->getOperand(1).isUndef());
4917 ArrayRef<int> Mask = SVN->getMask();
4918 const unsigned NumElts = VT.getVectorNumElements();
4919 MVT XLenVT = Subtarget.getXLenVT();
4920
4921 std::optional<int> SplatIdx;
4922 for (auto [I, M] : enumerate(Mask)) {
4923 if (M == -1 || I == (unsigned)M)
4924 continue;
4925 if (SplatIdx && *SplatIdx != M)
4926 return SDValue();
4927 SplatIdx = M;
4928 }
4929
4930 if (!SplatIdx)
4931 return SDValue();
4932
4933 SmallVector<SDValue> MaskVals;
4934 for (int MaskIndex : Mask) {
4935 bool SelectMaskVal = MaskIndex == *SplatIdx;
4936 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4937 }
4938 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4939 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4940 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4941 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
4942 SmallVector<int>(NumElts, *SplatIdx));
4943 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
4944}
4945
4946// Lower the following shuffle to vslidedown.
4947// a)
4948// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4949// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4950// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4951// b)
4952// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4953// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4954// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4955// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4956// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4957// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4959 SDValue V1, SDValue V2,
4960 ArrayRef<int> Mask,
4961 const RISCVSubtarget &Subtarget,
4962 SelectionDAG &DAG) {
4963 auto findNonEXTRACT_SUBVECTORParent =
4964 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4965 uint64_t Offset = 0;
4966 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4967 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4968 // a scalable vector. But we don't want to match the case.
4969 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4970 Offset += Parent.getConstantOperandVal(1);
4971 Parent = Parent.getOperand(0);
4972 }
4973 return std::make_pair(Parent, Offset);
4974 };
4975
4976 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4977 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4978
4979 // Extracting from the same source.
4980 SDValue Src = V1Src;
4981 if (Src != V2Src)
4982 return SDValue();
4983
4984 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4985 SmallVector<int, 16> NewMask(Mask);
4986 for (size_t i = 0; i != NewMask.size(); ++i) {
4987 if (NewMask[i] == -1)
4988 continue;
4989
4990 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4991 NewMask[i] = NewMask[i] + V1IndexOffset;
4992 } else {
4993 // Minus NewMask.size() is needed. Otherwise, the b case would be
4994 // <5,6,7,12> instead of <5,6,7,8>.
4995 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4996 }
4997 }
4998
4999 // First index must be known and non-zero. It will be used as the slidedown
5000 // amount.
5001 if (NewMask[0] <= 0)
5002 return SDValue();
5003
5004 // NewMask is also continuous.
5005 for (unsigned i = 1; i != NewMask.size(); ++i)
5006 if (NewMask[i - 1] + 1 != NewMask[i])
5007 return SDValue();
5008
5009 MVT XLenVT = Subtarget.getXLenVT();
5010 MVT SrcVT = Src.getSimpleValueType();
5011 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5012 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5013 SDValue Slidedown =
5014 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5015 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5016 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5017 return DAG.getExtractSubvector(
5018 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5019}
5020
5021// Because vslideup leaves the destination elements at the start intact, we can
5022// use it to perform shuffles that insert subvectors:
5023//
5024// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5025// ->
5026// vsetvli zero, 8, e8, mf2, ta, ma
5027// vslideup.vi v8, v9, 4
5028//
5029// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5030// ->
5031// vsetvli zero, 5, e8, mf2, tu, ma
5032// vslideup.v1 v8, v9, 2
5034 SDValue V1, SDValue V2,
5035 ArrayRef<int> Mask,
5036 const RISCVSubtarget &Subtarget,
5037 SelectionDAG &DAG) {
5038 unsigned NumElts = VT.getVectorNumElements();
5039 int NumSubElts, Index;
5040 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5041 Index))
5042 return SDValue();
5043
5044 bool OpsSwapped = Mask[Index] < (int)NumElts;
5045 SDValue InPlace = OpsSwapped ? V2 : V1;
5046 SDValue ToInsert = OpsSwapped ? V1 : V2;
5047
5048 MVT XLenVT = Subtarget.getXLenVT();
5049 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5050 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5051 // We slide up by the index that the subvector is being inserted at, and set
5052 // VL to the index + the number of elements being inserted.
5053 unsigned Policy =
5055 // If the we're adding a suffix to the in place vector, i.e. inserting right
5056 // up to the very end of it, then we don't actually care about the tail.
5057 if (NumSubElts + Index >= (int)NumElts)
5058 Policy |= RISCVVType::TAIL_AGNOSTIC;
5059
5060 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5061 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5062 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5063
5064 SDValue Res;
5065 // If we're inserting into the lowest elements, use a tail undisturbed
5066 // vmv.v.v.
5067 if (Index == 0)
5068 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5069 VL);
5070 else
5071 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5072 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5073 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5074}
5075
5076/// Match v(f)slide1up/down idioms. These operations involve sliding
5077/// N-1 elements to make room for an inserted scalar at one end.
5079 SDValue V1, SDValue V2,
5080 ArrayRef<int> Mask,
5081 const RISCVSubtarget &Subtarget,
5082 SelectionDAG &DAG) {
5083 bool OpsSwapped = false;
5084 if (!isa<BuildVectorSDNode>(V1)) {
5085 if (!isa<BuildVectorSDNode>(V2))
5086 return SDValue();
5087 std::swap(V1, V2);
5088 OpsSwapped = true;
5089 }
5090 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5091 if (!Splat)
5092 return SDValue();
5093
5094 // Return true if the mask could describe a slide of Mask.size() - 1
5095 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5096 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5097 const unsigned S = (Offset > 0) ? 0 : -Offset;
5098 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5099 for (unsigned i = S; i != E; ++i)
5100 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5101 return false;
5102 return true;
5103 };
5104
5105 const unsigned NumElts = VT.getVectorNumElements();
5106 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5107 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5108 return SDValue();
5109
5110 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5111 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5112 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5113 return SDValue();
5114
5115 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5116 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5117
5118 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5119 // vslide1{down,up}.vx instead.
5120 if (VT.getVectorElementType() == MVT::bf16 ||
5121 (VT.getVectorElementType() == MVT::f16 &&
5122 !Subtarget.hasVInstructionsF16())) {
5123 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5124 Splat =
5125 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5126 V2 = DAG.getBitcast(
5127 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5128 SDValue Vec = DAG.getNode(
5129 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5130 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5131 Vec = DAG.getBitcast(ContainerVT, Vec);
5132 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5133 }
5134
5135 auto OpCode = IsVSlidedown ?
5136 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5137 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5138 if (!VT.isFloatingPoint())
5139 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5140 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5141 DAG.getUNDEF(ContainerVT),
5142 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5143 Splat, TrueMask, VL);
5144 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5145}
5146
5147/// Match a mask which "spreads" the leading elements of a vector evenly
5148/// across the result. Factor is the spread amount, and Index is the
5149/// offset applied. (on success, Index < Factor) This is the inverse
5150/// of a deinterleave with the same Factor and Index. This is analogous
5151/// to an interleave, except that all but one lane is undef.
5153 unsigned &Index) {
5154 SmallVector<bool> LaneIsUndef(Factor, true);
5155 for (unsigned i = 0; i < Mask.size(); i++)
5156 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5157
5158 bool Found = false;
5159 for (unsigned i = 0; i < Factor; i++) {
5160 if (LaneIsUndef[i])
5161 continue;
5162 if (Found)
5163 return false;
5164 Index = i;
5165 Found = true;
5166 }
5167 if (!Found)
5168 return false;
5169
5170 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5171 unsigned j = i * Factor + Index;
5172 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5173 return false;
5174 }
5175 return true;
5176}
5177
5178static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5179 const SDLoc &DL, SelectionDAG &DAG,
5180 const RISCVSubtarget &Subtarget) {
5181 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5182 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5183 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5185
5186 MVT VT = Op0.getSimpleValueType();
5188 Op0 = DAG.getBitcast(IntVT, Op0);
5189 Op1 = DAG.getBitcast(IntVT, Op1);
5190
5191 MVT ContainerVT = IntVT;
5192 if (VT.isFixedLengthVector()) {
5193 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5194 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5195 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5196 }
5197
5198 MVT InnerVT = ContainerVT;
5199 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5200 if (Op1.isUndef() &&
5201 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5202 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5203 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5204 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5205 Subtarget.getXLenVT());
5206 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5207 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5208 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5209 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5210 }
5211
5212 SDValue Passthru = DAG.getUNDEF(InnerVT);
5213 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5214 if (InnerVT.bitsLT(ContainerVT))
5215 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5216 if (IntVT.isFixedLengthVector())
5217 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5218 Res = DAG.getBitcast(VT, Res);
5219 return Res;
5220}
5221
5222// Given a vector a, b, c, d return a vector Factor times longer
5223// with Factor-1 undef's between elements. Ex:
5224// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5225// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5226static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5227 const SDLoc &DL, SelectionDAG &DAG) {
5228
5229 MVT VT = V.getSimpleValueType();
5230 unsigned EltBits = VT.getScalarSizeInBits();
5232 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5233
5234 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5235
5236 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5237 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5238 // allow the SHL to fold away if Index is 0.
5239 if (Index != 0)
5240 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5241 DAG.getConstant(EltBits * Index, DL, WideVT));
5242 // Make sure to use original element type
5244 EC.multiplyCoefficientBy(Factor));
5245 return DAG.getBitcast(ResultVT, Result);
5246}
5247
5248// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5249// to create an interleaved vector of <[vscale x] n*2 x ty>.
5250// This requires that the size of ty is less than the subtarget's maximum ELEN.
5252 const SDLoc &DL, SelectionDAG &DAG,
5253 const RISCVSubtarget &Subtarget) {
5254
5255 // FIXME: Not only does this optimize the code, it fixes some correctness
5256 // issues because MIR does not have freeze.
5257 if (EvenV.isUndef())
5258 return getWideningSpread(OddV, 2, 1, DL, DAG);
5259 if (OddV.isUndef())
5260 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5261
5262 MVT VecVT = EvenV.getSimpleValueType();
5263 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5264 // Convert fixed vectors to scalable if needed
5265 if (VecContainerVT.isFixedLengthVector()) {
5266 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5267 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5268 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5269 }
5270
5271 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5272
5273 // We're working with a vector of the same size as the resulting
5274 // interleaved vector, but with half the number of elements and
5275 // twice the SEW (Hence the restriction on not using the maximum
5276 // ELEN)
5277 MVT WideVT =
5279 VecVT.getVectorElementCount());
5280 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5281 if (WideContainerVT.isFixedLengthVector())
5282 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5283
5284 // Bitcast the input vectors to integers in case they are FP
5285 VecContainerVT = VecContainerVT.changeTypeToInteger();
5286 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5287 OddV = DAG.getBitcast(VecContainerVT, OddV);
5288
5289 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5290 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5291
5292 SDValue Interleaved;
5293 if (Subtarget.hasStdExtZvbb()) {
5294 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5295 SDValue OffsetVec =
5296 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5297 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5298 OffsetVec, Passthru, Mask, VL);
5299 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5300 Interleaved, EvenV, Passthru, Mask, VL);
5301 } else {
5302 // FIXME: We should freeze the odd vector here. We already handled the case
5303 // of provably undef/poison above.
5304
5305 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5306 // vwaddu.vv
5307 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5308 OddV, Passthru, Mask, VL);
5309
5310 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5311 SDValue AllOnesVec = DAG.getSplatVector(
5312 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5313 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5314 OddV, AllOnesVec, Passthru, Mask, VL);
5315
5316 // Add the two together so we get
5317 // (OddV * 0xff...ff) + (OddV + EvenV)
5318 // = (OddV * 0x100...00) + EvenV
5319 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5320 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5321 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5322 Interleaved, OddsMul, Passthru, Mask, VL);
5323 }
5324
5325 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5326 MVT ResultContainerVT = MVT::getVectorVT(
5327 VecVT.getVectorElementType(), // Make sure to use original type
5328 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5329 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5330
5331 // Convert back to a fixed vector if needed
5332 MVT ResultVT =
5335 if (ResultVT.isFixedLengthVector())
5336 Interleaved =
5337 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5338
5339 return Interleaved;
5340}
5341
5342// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5343// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5345 SelectionDAG &DAG,
5346 const RISCVSubtarget &Subtarget) {
5347 SDLoc DL(SVN);
5348 MVT VT = SVN->getSimpleValueType(0);
5349 SDValue V = SVN->getOperand(0);
5350 unsigned NumElts = VT.getVectorNumElements();
5351
5352 assert(VT.getVectorElementType() == MVT::i1);
5353
5355 SVN->getMask().size()) ||
5356 !SVN->getOperand(1).isUndef())
5357 return SDValue();
5358
5359 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5360 EVT ViaVT = EVT::getVectorVT(
5361 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5362 EVT ViaBitVT =
5363 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5364
5365 // If we don't have zvbb or the larger element type > ELEN, the operation will
5366 // be illegal.
5368 ViaVT) ||
5369 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5370 return SDValue();
5371
5372 // If the bit vector doesn't fit exactly into the larger element type, we need
5373 // to insert it into the larger vector and then shift up the reversed bits
5374 // afterwards to get rid of the gap introduced.
5375 if (ViaEltSize > NumElts)
5376 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5377
5378 SDValue Res =
5379 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5380
5381 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5382 // element type.
5383 if (ViaEltSize > NumElts)
5384 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5385 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5386
5387 Res = DAG.getBitcast(ViaBitVT, Res);
5388
5389 if (ViaEltSize > NumElts)
5390 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5391 return Res;
5392}
5393
5395 const RISCVSubtarget &Subtarget,
5396 MVT &RotateVT, unsigned &RotateAmt) {
5397 unsigned NumElts = VT.getVectorNumElements();
5398 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5399 unsigned NumSubElts;
5400 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5401 NumElts, NumSubElts, RotateAmt))
5402 return false;
5403 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5404 NumElts / NumSubElts);
5405
5406 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5407 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5408}
5409
5410// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5411// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5412// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5414 SelectionDAG &DAG,
5415 const RISCVSubtarget &Subtarget) {
5416 SDLoc DL(SVN);
5417
5418 EVT VT = SVN->getValueType(0);
5419 unsigned RotateAmt;
5420 MVT RotateVT;
5421 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5422 return SDValue();
5423
5424 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5425
5426 SDValue Rotate;
5427 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5428 // so canonicalize to vrev8.
5429 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5430 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5431 else
5432 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5433 DAG.getConstant(RotateAmt, DL, RotateVT));
5434
5435 return DAG.getBitcast(VT, Rotate);
5436}
5437
5438// If compiling with an exactly known VLEN, see if we can split a
5439// shuffle on m2 or larger into a small number of m1 sized shuffles
5440// which write each destination registers exactly once.
5442 SelectionDAG &DAG,
5443 const RISCVSubtarget &Subtarget) {
5444 SDLoc DL(SVN);
5445 MVT VT = SVN->getSimpleValueType(0);
5446 SDValue V1 = SVN->getOperand(0);
5447 SDValue V2 = SVN->getOperand(1);
5448 ArrayRef<int> Mask = SVN->getMask();
5449
5450 // If we don't know exact data layout, not much we can do. If this
5451 // is already m1 or smaller, no point in splitting further.
5452 const auto VLen = Subtarget.getRealVLen();
5453 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5454 return SDValue();
5455
5456 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5457 // expansion for.
5458 unsigned RotateAmt;
5459 MVT RotateVT;
5460 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5461 return SDValue();
5462
5463 MVT ElemVT = VT.getVectorElementType();
5464 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5465
5466 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5467 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5468 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5469 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5470 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5471 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5472 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5473 unsigned NumOfDestRegs = NumElts / NumOpElts;
5474 // The following semantically builds up a fixed length concat_vector
5475 // of the component shuffle_vectors. We eagerly lower to scalable here
5476 // to avoid DAG combining it back to a large shuffle_vector again.
5477 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5478 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5480 Operands;
5482 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5483 [&]() { Operands.emplace_back(); },
5484 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5485 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5486 SmallVector<int>(SrcSubMask));
5487 },
5488 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5489 if (NewReg)
5490 Operands.emplace_back();
5491 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5492 });
5493 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5494 // Note: check that we do not emit too many shuffles here to prevent code
5495 // size explosion.
5496 // TODO: investigate, if it can be improved by extra analysis of the masks to
5497 // check if the code is more profitable.
5498 unsigned NumShuffles = std::accumulate(
5499 Operands.begin(), Operands.end(), 0u,
5500 [&](unsigned N,
5501 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5502 if (Data.empty())
5503 return N;
5504 N += Data.size();
5505 for (const auto &P : Data) {
5506 unsigned Idx2 = std::get<1>(P);
5507 ArrayRef<int> Mask = std::get<2>(P);
5508 if (Idx2 != UINT_MAX)
5509 ++N;
5510 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5511 --N;
5512 }
5513 return N;
5514 });
5515 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5516 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5517 return SDValue();
5518 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5519 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5520 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5521 return SubVec;
5522 };
5523 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5525 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5526 return SubVec;
5527 };
5528 SDValue Vec = DAG.getUNDEF(ContainerVT);
5529 for (auto [I, Data] : enumerate(Operands)) {
5530 if (Data.empty())
5531 continue;
5533 for (unsigned I : seq<unsigned>(Data.size())) {
5534 const auto &[Idx1, Idx2, _] = Data[I];
5535 // If the shuffle contains permutation of odd number of elements,
5536 // Idx1 might be used already in the first iteration.
5537 //
5538 // Idx1 = shuffle Idx1, Idx2
5539 // Idx1 = shuffle Idx1, Idx3
5540 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5541 if (!V)
5542 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5543 (Idx1 % NumOfSrcRegs) * NumOpElts);
5544 if (Idx2 != UINT_MAX) {
5545 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5546 if (!V)
5547 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5548 (Idx2 % NumOfSrcRegs) * NumOpElts);
5549 }
5550 }
5551 SDValue V;
5552 for (const auto &[Idx1, Idx2, Mask] : Data) {
5553 SDValue V1 = Values.at(Idx1);
5554 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5555 V = PerformShuffle(V1, V2, Mask);
5556 Values[Idx1] = V;
5557 }
5558
5559 unsigned InsertIdx = I * NumOpElts;
5560 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5561 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5562 }
5563 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5564}
5565
5566// Matches a subset of compress masks with a contiguous prefix of output
5567// elements. This could be extended to allow gaps by deciding which
5568// source elements to spuriously demand.
5570 int Last = -1;
5571 bool SawUndef = false;
5572 for (const auto &[Idx, M] : enumerate(Mask)) {
5573 if (M == -1) {
5574 SawUndef = true;
5575 continue;
5576 }
5577 if (SawUndef)
5578 return false;
5579 if (Idx > (unsigned)M)
5580 return false;
5581 if (M <= Last)
5582 return false;
5583 Last = M;
5584 }
5585 return true;
5586}
5587
5588/// Given a shuffle where the indices are disjoint between the two sources,
5589/// e.g.:
5590///
5591/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5592///
5593/// Merge the two sources into one and do a single source shuffle:
5594///
5595/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5596/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5597///
5598/// A vselect will either be merged into a masked instruction or be lowered as a
5599/// vmerge.vvm, which is cheaper than a vrgather.vv.
5601 SelectionDAG &DAG,
5602 const RISCVSubtarget &Subtarget) {
5603 MVT VT = SVN->getSimpleValueType(0);
5604 MVT XLenVT = Subtarget.getXLenVT();
5605 SDLoc DL(SVN);
5606
5607 const ArrayRef<int> Mask = SVN->getMask();
5608
5609 // Work out which source each lane will come from.
5610 SmallVector<int, 16> Srcs(Mask.size(), -1);
5611
5612 for (int Idx : Mask) {
5613 if (Idx == -1)
5614 continue;
5615 unsigned SrcIdx = Idx % Mask.size();
5616 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5617 if (Srcs[SrcIdx] == -1)
5618 // Mark this source as using this lane.
5619 Srcs[SrcIdx] = Src;
5620 else if (Srcs[SrcIdx] != Src)
5621 // The other source is using this lane: not disjoint.
5622 return SDValue();
5623 }
5624
5625 SmallVector<SDValue> SelectMaskVals;
5626 for (int Lane : Srcs) {
5627 if (Lane == -1)
5628 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5629 else
5630 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5631 }
5632 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5633 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5634 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5635 SVN->getOperand(0), SVN->getOperand(1));
5636
5637 // Move all indices relative to the first source.
5638 SmallVector<int> NewMask(Mask.size());
5639 for (unsigned I = 0; I < Mask.size(); I++) {
5640 if (Mask[I] == -1)
5641 NewMask[I] = -1;
5642 else
5643 NewMask[I] = Mask[I] % Mask.size();
5644 }
5645
5646 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5647}
5648
5649/// Is this mask local (i.e. elements only move within their local span), and
5650/// repeating (that is, the same rearrangement is being done within each span)?
5651static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5652 // Require a prefix from the original mask until the consumer code
5653 // is adjusted to rewrite the mask instead of just taking a prefix.
5654 for (auto [I, M] : enumerate(Mask)) {
5655 if (M == -1)
5656 continue;
5657 if ((M / Span) != (int)(I / Span))
5658 return false;
5659 int SpanIdx = I % Span;
5660 int Expected = M % Span;
5661 if (Mask[SpanIdx] != Expected)
5662 return false;
5663 }
5664 return true;
5665}
5666
5667/// Is this mask only using elements from the first span of the input?
5668static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5669 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5670}
5671
5672/// Return true for a mask which performs an arbitrary shuffle within the first
5673/// span, and then repeats that same result across all remaining spans. Note
5674/// that this doesn't check if all the inputs come from a single span!
5675static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5676 // Require a prefix from the original mask until the consumer code
5677 // is adjusted to rewrite the mask instead of just taking a prefix.
5678 for (auto [I, M] : enumerate(Mask)) {
5679 if (M == -1)
5680 continue;
5681 int SpanIdx = I % Span;
5682 if (Mask[SpanIdx] != M)
5683 return false;
5684 }
5685 return true;
5686}
5687
5688/// Try to widen element type to get a new mask value for a better permutation
5689/// sequence. This doesn't try to inspect the widened mask for profitability;
5690/// we speculate the widened form is equal or better. This has the effect of
5691/// reducing mask constant sizes - allowing cheaper materialization sequences
5692/// - and index sequence sizes - reducing register pressure and materialization
5693/// cost, at the cost of (possibly) an extra VTYPE toggle.
5695 SDLoc DL(Op);
5696 MVT VT = Op.getSimpleValueType();
5697 MVT ScalarVT = VT.getVectorElementType();
5698 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5699 SDValue V0 = Op.getOperand(0);
5700 SDValue V1 = Op.getOperand(1);
5701 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5702
5703 // Avoid wasted work leading to isTypeLegal check failing below
5704 if (ElementSize > 32)
5705 return SDValue();
5706
5707 SmallVector<int, 8> NewMask;
5708 if (!widenShuffleMaskElts(Mask, NewMask))
5709 return SDValue();
5710
5711 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5712 : MVT::getIntegerVT(ElementSize * 2);
5713 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5714 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5715 return SDValue();
5716 V0 = DAG.getBitcast(NewVT, V0);
5717 V1 = DAG.getBitcast(NewVT, V1);
5718 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5719}
5720
5722 const RISCVSubtarget &Subtarget) {
5723 SDValue V1 = Op.getOperand(0);
5724 SDValue V2 = Op.getOperand(1);
5725 SDLoc DL(Op);
5726 MVT XLenVT = Subtarget.getXLenVT();
5727 MVT VT = Op.getSimpleValueType();
5728 unsigned NumElts = VT.getVectorNumElements();
5729 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5730
5731 if (VT.getVectorElementType() == MVT::i1) {
5732 // Lower to a vror.vi of a larger element type if possible before we promote
5733 // i1s to i8s.
5734 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5735 return V;
5736 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5737 return V;
5738
5739 // Promote i1 shuffle to i8 shuffle.
5740 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5741 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5742 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5743 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5744 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5745 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5746 ISD::SETNE);
5747 }
5748
5749 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5750
5751 // Store the return value in a single variable instead of structured bindings
5752 // so that we can pass it to GetSlide below, which cannot capture structured
5753 // bindings until C++20.
5754 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5755 auto [TrueMask, VL] = TrueMaskVL;
5756
5757 if (SVN->isSplat()) {
5758 const int Lane = SVN->getSplatIndex();
5759 if (Lane >= 0) {
5760 MVT SVT = VT.getVectorElementType();
5761
5762 // Turn splatted vector load into a strided load with an X0 stride.
5763 SDValue V = V1;
5764 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5765 // with undef.
5766 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5767 int Offset = Lane;
5768 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5769 int OpElements =
5770 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5771 V = V.getOperand(Offset / OpElements);
5772 Offset %= OpElements;
5773 }
5774
5775 // We need to ensure the load isn't atomic or volatile.
5776 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5777 auto *Ld = cast<LoadSDNode>(V);
5778 Offset *= SVT.getStoreSize();
5779 SDValue NewAddr = DAG.getMemBasePlusOffset(
5780 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5781
5782 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5783 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5784 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5785 SDValue IntID =
5786 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5787 SDValue Ops[] = {Ld->getChain(),
5788 IntID,
5789 DAG.getUNDEF(ContainerVT),
5790 NewAddr,
5791 DAG.getRegister(RISCV::X0, XLenVT),
5792 VL};
5793 SDValue NewLoad = DAG.getMemIntrinsicNode(
5794 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5796 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5797 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5798 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5799 }
5800
5801 MVT SplatVT = ContainerVT;
5802
5803 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5804 if (SVT == MVT::bf16 ||
5805 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5806 SVT = MVT::i16;
5807 SplatVT = ContainerVT.changeVectorElementType(SVT);
5808 }
5809
5810 // Otherwise use a scalar load and splat. This will give the best
5811 // opportunity to fold a splat into the operation. ISel can turn it into
5812 // the x0 strided load if we aren't able to fold away the select.
5813 if (SVT.isFloatingPoint())
5814 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5815 Ld->getPointerInfo().getWithOffset(Offset),
5816 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5817 else
5818 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5819 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5820 Ld->getBaseAlign(),
5821 Ld->getMemOperand()->getFlags());
5823
5824 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5825 : RISCVISD::VMV_V_X_VL;
5826 SDValue Splat =
5827 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5828 Splat = DAG.getBitcast(ContainerVT, Splat);
5829 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5830 }
5831
5832 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5833 assert(Lane < (int)NumElts && "Unexpected lane!");
5834 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5835 V1, DAG.getConstant(Lane, DL, XLenVT),
5836 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5837 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5838 }
5839 }
5840
5841 // For exact VLEN m2 or greater, try to split to m1 operations if we
5842 // can split cleanly.
5843 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5844 return V;
5845
5846 ArrayRef<int> Mask = SVN->getMask();
5847
5848 if (SDValue V =
5849 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5850 return V;
5851
5852 if (SDValue V =
5853 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5854 return V;
5855
5856 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5857 // available.
5858 if (Subtarget.hasStdExtZvkb())
5859 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5860 return V;
5861
5862 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5863 NumElts != 2)
5864 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5865
5866 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5867 // use shift and truncate to perform the shuffle.
5868 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5869 // shift-and-trunc reducing total cost for everything except an mf8 result.
5870 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5871 // to do the entire operation.
5872 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5873 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5874 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5875 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5876 unsigned Index = 0;
5877 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5878 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5879 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5880 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5881 if (1 < count_if(Mask,
5882 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5883 1 < count_if(Mask, [&Mask](int Idx) {
5884 return Idx >= (int)Mask.size();
5885 })) {
5886 // Narrow each source and concatenate them.
5887 // FIXME: For small LMUL it is better to concatenate first.
5888 MVT EltVT = VT.getVectorElementType();
5889 auto EltCnt = VT.getVectorElementCount();
5890 MVT SubVT =
5891 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5892
5893 SDValue Lo =
5894 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5895 SDValue Hi =
5896 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5897
5898 SDValue Concat =
5901 if (Factor == 2)
5902 return Concat;
5903
5904 SDValue Vec = DAG.getUNDEF(VT);
5905 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5906 }
5907 }
5908 }
5909 }
5910
5911 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5912 // e64 which can't match above.
5913 unsigned Index = 0;
5914 if (Subtarget.hasVendorXRivosVizip() &&
5916 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5917 unsigned Opc =
5918 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5919 if (V2.isUndef())
5920 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5921 if (auto VLEN = Subtarget.getRealVLen();
5922 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5923 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5924 if (SDValue Src = foldConcatVector(V1, V2)) {
5925 EVT NewVT = VT.getDoubleNumVectorElementsVT();
5926 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
5927 SDValue Res =
5928 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
5929 return DAG.getExtractSubvector(DL, VT, Res, 0);
5930 }
5931 // Deinterleave each source and concatenate them, or concat first, then
5932 // deinterleave.
5933 if (1 < count_if(Mask,
5934 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5935 1 < count_if(Mask,
5936 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
5937
5938 const unsigned EltSize = VT.getScalarSizeInBits();
5939 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
5940 if (NumElts < MinVLMAX) {
5941 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
5942 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
5943 SDValue Res =
5944 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
5945 return DAG.getExtractSubvector(DL, VT, Res, 0);
5946 }
5947
5948 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
5949 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
5950
5951 MVT SubVT = VT.getHalfNumVectorElementsVT();
5952 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
5953 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
5954 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
5955 }
5956 }
5957
5958 if (SDValue V =
5959 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5960 return V;
5961
5962 // Detect an interleave shuffle and lower to
5963 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5964 int EvenSrc, OddSrc;
5965 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
5966 !(NumElts == 2 &&
5967 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
5968 // Extract the halves of the vectors.
5969 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5970
5971 // Recognize if one half is actually undef; the matching above will
5972 // otherwise reuse the even stream for the undef one. This improves
5973 // spread(2) shuffles.
5974 bool LaneIsUndef[2] = { true, true};
5975 for (const auto &[Idx, M] : enumerate(Mask))
5976 LaneIsUndef[Idx % 2] &= (M == -1);
5977
5978 int Size = Mask.size();
5979 SDValue EvenV, OddV;
5980 if (LaneIsUndef[0]) {
5981 EvenV = DAG.getUNDEF(HalfVT);
5982 } else {
5983 assert(EvenSrc >= 0 && "Undef source?");
5984 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5985 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
5986 }
5987
5988 if (LaneIsUndef[1]) {
5989 OddV = DAG.getUNDEF(HalfVT);
5990 } else {
5991 assert(OddSrc >= 0 && "Undef source?");
5992 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5993 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
5994 }
5995
5996 // Prefer vzip2a if available.
5997 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
5998 if (Subtarget.hasVendorXRivosVizip()) {
5999 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6000 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6001 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6002 }
6003 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6004 }
6005
6006 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6007 // instructions (in any combination) with masking on the second instruction.
6008 // Also handles masked slides into an identity source, and single slides
6009 // without masking. Avoid matching bit rotates (which are not also element
6010 // rotates) as slide pairs. This is a performance heuristic, not a
6011 // functional check.
6012 std::array<std::pair<int, int>, 2> SrcInfo;
6013 unsigned RotateAmt;
6014 MVT RotateVT;
6015 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6016 (isElementRotate(SrcInfo, NumElts) ||
6017 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6018 SDValue Sources[2];
6019 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6020 int SrcIdx = Info.first;
6021 assert(SrcIdx == 0 || SrcIdx == 1);
6022 SDValue &Src = Sources[SrcIdx];
6023 if (!Src) {
6024 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6025 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6026 }
6027 return Src;
6028 };
6029 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6030 SDValue Passthru) {
6031 auto [TrueMask, VL] = TrueMaskVL;
6032 SDValue SrcV = GetSourceFor(Src);
6033 int SlideAmt = Src.second;
6034 if (SlideAmt == 0) {
6035 // Should never be second operation
6036 assert(Mask == TrueMask);
6037 return SrcV;
6038 }
6039 if (SlideAmt < 0)
6040 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6041 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6043 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6044 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6046 };
6047
6048 if (SrcInfo[1].first == -1) {
6049 SDValue Res = DAG.getUNDEF(ContainerVT);
6050 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6051 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6052 }
6053
6054 if (Subtarget.hasVendorXRivosVizip()) {
6055 bool TryWiden = false;
6056 unsigned Factor;
6057 if (isZipEven(SrcInfo, Mask, Factor)) {
6058 if (Factor == 1) {
6059 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6060 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6061 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6062 Subtarget);
6063 }
6064 TryWiden = true;
6065 }
6066 if (isZipOdd(SrcInfo, Mask, Factor)) {
6067 if (Factor == 1) {
6068 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6069 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6070 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6071 Subtarget);
6072 }
6073 TryWiden = true;
6074 }
6075 // If we found a widening oppurtunity which would let us form a
6076 // zipeven or zipodd, use the generic code to widen the shuffle
6077 // and recurse through this logic.
6078 if (TryWiden)
6079 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6080 return V;
6081 }
6082
6083 // Build the mask. Note that vslideup unconditionally preserves elements
6084 // below the slide amount in the destination, and thus those elements are
6085 // undefined in the mask. If the mask ends up all true (or undef), it
6086 // will be folded away by general logic.
6087 SmallVector<SDValue> MaskVals;
6088 for (const auto &[Idx, M] : enumerate(Mask)) {
6089 if (M < 0 ||
6090 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6091 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6092 continue;
6093 }
6094 int Src = M >= (int)NumElts;
6095 int Diff = (int)Idx - (M % NumElts);
6096 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6097 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6098 "Must match exactly one of the two slides");
6099 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6100 }
6101 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6102 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6103 SDValue SelectMask = convertToScalableVector(
6104 ContainerVT.changeVectorElementType(MVT::i1),
6105 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6106
6107 SDValue Res = DAG.getUNDEF(ContainerVT);
6108 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6109 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6110 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6111 }
6112
6113 // Handle any remaining single source shuffles
6114 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6115 if (V2.isUndef()) {
6116 // We might be able to express the shuffle as a bitrotate. But even if we
6117 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6118 // shifts and a vor will have a higher throughput than a vrgather.
6119 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6120 return V;
6121
6122 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6123 return V;
6124
6125 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6126 // is fully covered in interleave(2) above, so it is ignored here.
6127 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6128 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6129 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6130 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6131 unsigned Index;
6132 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6133 MVT NarrowVT =
6134 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6135 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6136 return getWideningSpread(Src, Factor, Index, DL, DAG);
6137 }
6138 }
6139 }
6140
6141 // If only a prefix of the source elements influence a prefix of the
6142 // destination elements, try to see if we can reduce the required LMUL
6143 unsigned MinVLen = Subtarget.getRealMinVLen();
6144 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6145 if (NumElts > MinVLMAX) {
6146 unsigned MaxIdx = 0;
6147 for (auto [I, M] : enumerate(Mask)) {
6148 if (M == -1)
6149 continue;
6150 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6151 }
6152 unsigned NewNumElts =
6153 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6154 if (NewNumElts != NumElts) {
6155 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6156 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6157 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6158 Mask.take_front(NewNumElts));
6159 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6160 }
6161 }
6162
6163 // Before hitting generic lowering fallbacks, try to widen the mask
6164 // to a wider SEW.
6165 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6166 return V;
6167
6168 // Can we generate a vcompress instead of a vrgather? These scale better
6169 // at high LMUL, at the cost of not being able to fold a following select
6170 // into them. The mask constants are also smaller than the index vector
6171 // constants, and thus easier to materialize.
6172 if (isCompressMask(Mask)) {
6173 SmallVector<SDValue> MaskVals(NumElts,
6174 DAG.getConstant(false, DL, XLenVT));
6175 for (auto Idx : Mask) {
6176 if (Idx == -1)
6177 break;
6178 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6179 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6180 }
6181 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6182 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6183 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6184 DAG.getUNDEF(VT));
6185 }
6186
6187 if (VT.getScalarSizeInBits() == 8 &&
6188 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6189 // On such a vector we're unable to use i8 as the index type.
6190 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6191 // may involve vector splitting if we're already at LMUL=8, or our
6192 // user-supplied maximum fixed-length LMUL.
6193 return SDValue();
6194 }
6195
6196 // Base case for the two operand recursion below - handle the worst case
6197 // single source shuffle.
6198 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6199 MVT IndexVT = VT.changeTypeToInteger();
6200 // Since we can't introduce illegal index types at this stage, use i16 and
6201 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6202 // than XLenVT.
6203 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6204 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6205 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6206 }
6207
6208 // If the mask allows, we can do all the index computation in 16 bits. This
6209 // requires less work and less register pressure at high LMUL, and creates
6210 // smaller constants which may be cheaper to materialize.
6211 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6212 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6213 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6214 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6215 }
6216
6217 MVT IndexContainerVT =
6218 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6219
6220 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6221 SmallVector<SDValue> GatherIndicesLHS;
6222 for (int MaskIndex : Mask) {
6223 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6224 GatherIndicesLHS.push_back(IsLHSIndex
6225 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6226 : DAG.getUNDEF(XLenVT));
6227 }
6228 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6229 LHSIndices =
6230 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6231 // At m1 and less, there's no point trying any of the high LMUL splitting
6232 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6233 if (NumElts <= MinVLMAX) {
6234 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6235 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6236 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6237 }
6238
6239 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6240 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6241 auto [InnerTrueMask, InnerVL] =
6242 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6243 int N =
6244 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6245 assert(isPowerOf2_32(N) && N <= 8);
6246
6247 // If we have a locally repeating mask, then we can reuse the first
6248 // register in the index register group for all registers within the
6249 // source register group. TODO: This generalizes to m2, and m4.
6250 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6251 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6252 SDValue Gather = DAG.getUNDEF(ContainerVT);
6253 for (int i = 0; i < N; i++) {
6254 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6255 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6256 SDValue SubVec =
6257 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6258 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6259 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6260 }
6261 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6262 }
6263
6264 // If we have a shuffle which only uses the first register in our source
6265 // register group, and repeats the same index across all spans, we can
6266 // use a single vrgather (and possibly some register moves).
6267 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6268 // which we can do a linear number of shuffles to form an m1 which
6269 // contains all the output elements.
6270 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6271 isSpanSplatShuffle(Mask, MinVLMAX)) {
6272 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6273 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6274 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6275 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6276 SDValue Gather = DAG.getUNDEF(ContainerVT);
6277 for (int i = 0; i < N; i++)
6278 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6279 M1VT.getVectorMinNumElements() * i);
6280 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6281 }
6282
6283 // If we have a shuffle which only uses the first register in our
6284 // source register group, we can do a linear number of m1 vrgathers
6285 // reusing the same source register (but with different indices)
6286 // TODO: This can be generalized for m2 or m4, or for any shuffle
6287 // for which we can do a vslidedown followed by this expansion.
6288 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6289 SDValue SlideAmt =
6290 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6291 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6292 SDValue Gather = DAG.getUNDEF(ContainerVT);
6293 for (int i = 0; i < N; i++) {
6294 if (i != 0)
6295 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6296 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6297 SlideAmt, TrueMask, VL);
6298 SDValue SubIndex =
6299 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6300 SDValue SubVec =
6301 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6302 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6303 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6304 M1VT.getVectorMinNumElements() * i);
6305 }
6306 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6307 }
6308
6309 // Fallback to generic vrgather if we can't find anything better.
6310 // On many machines, this will be O(LMUL^2)
6311 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6312 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6313 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6314 }
6315
6316 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6317 // merged with a second vrgather.
6318 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6319
6320 // Now construct the mask that will be used by the blended vrgather operation.
6321 // Construct the appropriate indices into each vector.
6322 for (int MaskIndex : Mask) {
6323 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6324 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6325 ? MaskIndex : -1);
6326 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6327 }
6328
6329 // If the mask indices are disjoint between the two sources, we can lower it
6330 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6331 // operands may end up being lowered to something cheaper than a vrgather.vv.
6332 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6333 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6334 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6335 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6336 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6337 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6338 return V;
6339
6340 // Before hitting generic lowering fallbacks, try to widen the mask
6341 // to a wider SEW.
6342 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6343 return V;
6344
6345 // Try to pick a profitable operand order.
6346 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6347 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6348
6349 // Recursively invoke lowering for each operand if we had two
6350 // independent single source shuffles, and then combine the result via a
6351 // vselect. Note that the vselect will likely be folded back into the
6352 // second permute (vrgather, or other) by the post-isel combine.
6353 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6354 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6355
6356 SmallVector<SDValue> MaskVals;
6357 for (int MaskIndex : Mask) {
6358 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6359 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6360 }
6361
6362 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6363 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6364 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6365
6366 if (SwapOps)
6367 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6368 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6369}
6370
6372 // Only support legal VTs for other shuffles for now.
6373 if (!isTypeLegal(VT))
6374 return false;
6375
6376 // Support splats for any type. These should type legalize well.
6378 return true;
6379
6380 const unsigned NumElts = M.size();
6381 MVT SVT = VT.getSimpleVT();
6382
6383 // Not for i1 vectors.
6384 if (SVT.getScalarType() == MVT::i1)
6385 return false;
6386
6387 std::array<std::pair<int, int>, 2> SrcInfo;
6388 int Dummy1, Dummy2;
6389 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6390 (::isMaskedSlidePair(M, SrcInfo) &&
6391 isElementRotate(SrcInfo, NumElts)) ||
6392 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6393}
6394
6395// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6396// the exponent.
6397SDValue
6398RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6399 SelectionDAG &DAG) const {
6400 MVT VT = Op.getSimpleValueType();
6401 unsigned EltSize = VT.getScalarSizeInBits();
6402 SDValue Src = Op.getOperand(0);
6403 SDLoc DL(Op);
6404 MVT ContainerVT = VT;
6405
6406 SDValue Mask, VL;
6407 if (Op->isVPOpcode()) {
6408 Mask = Op.getOperand(1);
6409 if (VT.isFixedLengthVector())
6410 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6411 Subtarget);
6412 VL = Op.getOperand(2);
6413 }
6414
6415 // We choose FP type that can represent the value if possible. Otherwise, we
6416 // use rounding to zero conversion for correct exponent of the result.
6417 // TODO: Use f16 for i8 when possible?
6418 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6419 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6420 FloatEltVT = MVT::f32;
6421 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6422
6423 // Legal types should have been checked in the RISCVTargetLowering
6424 // constructor.
6425 // TODO: Splitting may make sense in some cases.
6426 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6427 "Expected legal float type!");
6428
6429 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6430 // The trailing zero count is equal to log2 of this single bit value.
6431 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6432 SDValue Neg = DAG.getNegative(Src, DL, VT);
6433 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6434 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6435 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6436 Src, Mask, VL);
6437 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6438 }
6439
6440 // We have a legal FP type, convert to it.
6441 SDValue FloatVal;
6442 if (FloatVT.bitsGT(VT)) {
6443 if (Op->isVPOpcode())
6444 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6445 else
6446 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6447 } else {
6448 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6449 if (VT.isFixedLengthVector()) {
6450 ContainerVT = getContainerForFixedLengthVector(VT);
6451 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6452 }
6453 if (!Op->isVPOpcode())
6454 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6455 SDValue RTZRM =
6457 MVT ContainerFloatVT =
6458 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6459 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6460 Src, Mask, RTZRM, VL);
6461 if (VT.isFixedLengthVector())
6462 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6463 }
6464 // Bitcast to integer and shift the exponent to the LSB.
6465 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6466 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6467 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6468
6469 SDValue Exp;
6470 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6471 if (Op->isVPOpcode()) {
6472 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6473 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6474 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6475 } else {
6476 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6477 DAG.getConstant(ShiftAmt, DL, IntVT));
6478 if (IntVT.bitsLT(VT))
6479 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6480 else if (IntVT.bitsGT(VT))
6481 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6482 }
6483
6484 // The exponent contains log2 of the value in biased form.
6485 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6486 // For trailing zeros, we just need to subtract the bias.
6487 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6488 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6489 DAG.getConstant(ExponentBias, DL, VT));
6490 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6491 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6492 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6493
6494 // For leading zeros, we need to remove the bias and convert from log2 to
6495 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6496 unsigned Adjust = ExponentBias + (EltSize - 1);
6497 SDValue Res;
6498 if (Op->isVPOpcode())
6499 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6500 Mask, VL);
6501 else
6502 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6503
6504 // The above result with zero input equals to Adjust which is greater than
6505 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6506 if (Op.getOpcode() == ISD::CTLZ)
6507 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6508 else if (Op.getOpcode() == ISD::VP_CTLZ)
6509 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6510 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6511 return Res;
6512}
6513
6514SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6515 SelectionDAG &DAG) const {
6516 SDLoc DL(Op);
6517 MVT XLenVT = Subtarget.getXLenVT();
6518 SDValue Source = Op->getOperand(0);
6519 MVT SrcVT = Source.getSimpleValueType();
6520 SDValue Mask = Op->getOperand(1);
6521 SDValue EVL = Op->getOperand(2);
6522
6523 if (SrcVT.isFixedLengthVector()) {
6524 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6525 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6526 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6527 Subtarget);
6528 SrcVT = ContainerVT;
6529 }
6530
6531 // Convert to boolean vector.
6532 if (SrcVT.getScalarType() != MVT::i1) {
6533 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6534 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6535 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6536 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6537 DAG.getUNDEF(SrcVT), Mask, EVL});
6538 }
6539
6540 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6541 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6542 // In this case, we can interpret poison as -1, so nothing to do further.
6543 return Res;
6544
6545 // Convert -1 to VL.
6546 SDValue SetCC =
6547 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6548 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6549 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6550}
6551
6552// While RVV has alignment restrictions, we should always be able to load as a
6553// legal equivalently-sized byte-typed vector instead. This method is
6554// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6555// the load is already correctly-aligned, it returns SDValue().
6556SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6557 SelectionDAG &DAG) const {
6558 auto *Load = cast<LoadSDNode>(Op);
6559 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6560
6562 Load->getMemoryVT(),
6563 *Load->getMemOperand()))
6564 return SDValue();
6565
6566 SDLoc DL(Op);
6567 MVT VT = Op.getSimpleValueType();
6568 unsigned EltSizeBits = VT.getScalarSizeInBits();
6569 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6570 "Unexpected unaligned RVV load type");
6571 MVT NewVT =
6572 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6573 assert(NewVT.isValid() &&
6574 "Expecting equally-sized RVV vector types to be legal");
6575 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6576 Load->getPointerInfo(), Load->getBaseAlign(),
6577 Load->getMemOperand()->getFlags());
6578 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6579}
6580
6581// While RVV has alignment restrictions, we should always be able to store as a
6582// legal equivalently-sized byte-typed vector instead. This method is
6583// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6584// returns SDValue() if the store is already correctly aligned.
6585SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6586 SelectionDAG &DAG) const {
6587 auto *Store = cast<StoreSDNode>(Op);
6588 assert(Store && Store->getValue().getValueType().isVector() &&
6589 "Expected vector store");
6590
6592 Store->getMemoryVT(),
6593 *Store->getMemOperand()))
6594 return SDValue();
6595
6596 SDLoc DL(Op);
6597 SDValue StoredVal = Store->getValue();
6598 MVT VT = StoredVal.getSimpleValueType();
6599 unsigned EltSizeBits = VT.getScalarSizeInBits();
6600 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6601 "Unexpected unaligned RVV store type");
6602 MVT NewVT =
6603 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6604 assert(NewVT.isValid() &&
6605 "Expecting equally-sized RVV vector types to be legal");
6606 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6607 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6608 Store->getPointerInfo(), Store->getBaseAlign(),
6609 Store->getMemOperand()->getFlags());
6610}
6611
6613 const RISCVSubtarget &Subtarget) {
6614 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6615
6616 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6617
6618 // All simm32 constants should be handled by isel.
6619 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6620 // this check redundant, but small immediates are common so this check
6621 // should have better compile time.
6622 if (isInt<32>(Imm))
6623 return Op;
6624
6625 // We only need to cost the immediate, if constant pool lowering is enabled.
6626 if (!Subtarget.useConstantPoolForLargeInts())
6627 return Op;
6628
6630 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6631 return Op;
6632
6633 // Optimizations below are disabled for opt size. If we're optimizing for
6634 // size, use a constant pool.
6635 if (DAG.shouldOptForSize())
6636 return SDValue();
6637
6638 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6639 // that if it will avoid a constant pool.
6640 // It will require an extra temporary register though.
6641 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6642 // low and high 32 bits are the same and bit 31 and 63 are set.
6643 unsigned ShiftAmt, AddOpc;
6644 RISCVMatInt::InstSeq SeqLo =
6645 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6646 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6647 return Op;
6648
6649 return SDValue();
6650}
6651
6652SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6653 SelectionDAG &DAG) const {
6654 MVT VT = Op.getSimpleValueType();
6655 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6656
6657 // Can this constant be selected by a Zfa FLI instruction?
6658 bool Negate = false;
6659 int Index = getLegalZfaFPImm(Imm, VT);
6660
6661 // If the constant is negative, try negating.
6662 if (Index < 0 && Imm.isNegative()) {
6663 Index = getLegalZfaFPImm(-Imm, VT);
6664 Negate = true;
6665 }
6666
6667 // If we couldn't find a FLI lowering, fall back to generic code.
6668 if (Index < 0)
6669 return SDValue();
6670
6671 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6672 SDLoc DL(Op);
6673 SDValue Const =
6674 DAG.getNode(RISCVISD::FLI, DL, VT,
6675 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6676 if (!Negate)
6677 return Const;
6678
6679 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6680}
6681
6683 SelectionDAG &DAG) {
6684
6685 unsigned IsData = Op.getConstantOperandVal(4);
6686
6687 // mips-p8700 we support data prefetch for now.
6688 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6689 return Op.getOperand(0);
6690 return Op;
6691}
6692
6694 const RISCVSubtarget &Subtarget) {
6695 SDLoc dl(Op);
6696 AtomicOrdering FenceOrdering =
6697 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6698 SyncScope::ID FenceSSID =
6699 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6700
6701 if (Subtarget.hasStdExtZtso()) {
6702 // The only fence that needs an instruction is a sequentially-consistent
6703 // cross-thread fence.
6704 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6705 FenceSSID == SyncScope::System)
6706 return Op;
6707
6708 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6709 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6710 }
6711
6712 // singlethread fences only synchronize with signal handlers on the same
6713 // thread and thus only need to preserve instruction order, not actually
6714 // enforce memory ordering.
6715 if (FenceSSID == SyncScope::SingleThread)
6716 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6717 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6718
6719 return Op;
6720}
6721
6722SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6723 SelectionDAG &DAG) const {
6724 SDLoc DL(Op);
6725 MVT VT = Op.getSimpleValueType();
6726 MVT XLenVT = Subtarget.getXLenVT();
6727 unsigned Check = Op.getConstantOperandVal(1);
6728 unsigned TDCMask = 0;
6729 if (Check & fcSNan)
6730 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6731 if (Check & fcQNan)
6732 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6733 if (Check & fcPosInf)
6735 if (Check & fcNegInf)
6737 if (Check & fcPosNormal)
6739 if (Check & fcNegNormal)
6741 if (Check & fcPosSubnormal)
6743 if (Check & fcNegSubnormal)
6745 if (Check & fcPosZero)
6746 TDCMask |= RISCV::FPMASK_Positive_Zero;
6747 if (Check & fcNegZero)
6748 TDCMask |= RISCV::FPMASK_Negative_Zero;
6749
6750 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6751
6752 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6753
6754 if (VT.isVector()) {
6755 SDValue Op0 = Op.getOperand(0);
6756 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6757
6758 if (VT.isScalableVector()) {
6760 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6761 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6762 Mask = Op.getOperand(2);
6763 VL = Op.getOperand(3);
6764 }
6765 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6766 VL, Op->getFlags());
6767 if (IsOneBitMask)
6768 return DAG.getSetCC(DL, VT, FPCLASS,
6769 DAG.getConstant(TDCMask, DL, DstVT),
6771 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6772 DAG.getConstant(TDCMask, DL, DstVT));
6773 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6774 ISD::SETNE);
6775 }
6776
6777 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6778 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6779 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6780 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6781 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6782 Mask = Op.getOperand(2);
6783 MVT MaskContainerVT =
6784 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6785 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6786 VL = Op.getOperand(3);
6787 }
6788 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6789
6790 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6791 Mask, VL, Op->getFlags());
6792
6793 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6794 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6795 if (IsOneBitMask) {
6796 SDValue VMSEQ =
6797 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6798 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6799 DAG.getUNDEF(ContainerVT), Mask, VL});
6800 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6801 }
6802 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6803 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6804
6805 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6806 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6807 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6808
6809 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6810 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6811 DAG.getUNDEF(ContainerVT), Mask, VL});
6812 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6813 }
6814
6815 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6816 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6817 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6819 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6820}
6821
6822// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6823// operations propagate nans.
6825 const RISCVSubtarget &Subtarget) {
6826 SDLoc DL(Op);
6827 MVT VT = Op.getSimpleValueType();
6828
6829 SDValue X = Op.getOperand(0);
6830 SDValue Y = Op.getOperand(1);
6831
6832 if (!VT.isVector()) {
6833 MVT XLenVT = Subtarget.getXLenVT();
6834
6835 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6836 // ensures that when one input is a nan, the other will also be a nan
6837 // allowing the nan to propagate. If both inputs are nan, this will swap the
6838 // inputs which is harmless.
6839
6840 SDValue NewY = Y;
6841 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6842 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6843 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6844 }
6845
6846 SDValue NewX = X;
6847 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6848 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6849 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6850 }
6851
6852 unsigned Opc =
6853 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6854 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6855 }
6856
6857 // Check no NaNs before converting to fixed vector scalable.
6858 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6859 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6860
6861 MVT ContainerVT = VT;
6862 if (VT.isFixedLengthVector()) {
6863 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6864 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6865 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6866 }
6867
6868 SDValue Mask, VL;
6869 if (Op->isVPOpcode()) {
6870 Mask = Op.getOperand(2);
6871 if (VT.isFixedLengthVector())
6872 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6873 Subtarget);
6874 VL = Op.getOperand(3);
6875 } else {
6876 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6877 }
6878
6879 SDValue NewY = Y;
6880 if (!XIsNeverNan) {
6881 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6882 {X, X, DAG.getCondCode(ISD::SETOEQ),
6883 DAG.getUNDEF(ContainerVT), Mask, VL});
6884 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6885 DAG.getUNDEF(ContainerVT), VL);
6886 }
6887
6888 SDValue NewX = X;
6889 if (!YIsNeverNan) {
6890 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6891 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6892 DAG.getUNDEF(ContainerVT), Mask, VL});
6893 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6894 DAG.getUNDEF(ContainerVT), VL);
6895 }
6896
6897 unsigned Opc =
6898 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6899 ? RISCVISD::VFMAX_VL
6900 : RISCVISD::VFMIN_VL;
6901 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6902 DAG.getUNDEF(ContainerVT), Mask, VL);
6903 if (VT.isFixedLengthVector())
6904 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6905 return Res;
6906}
6907
6909 const RISCVSubtarget &Subtarget) {
6910 bool IsFABS = Op.getOpcode() == ISD::FABS;
6911 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6912 "Wrong opcode for lowering FABS or FNEG.");
6913
6914 MVT XLenVT = Subtarget.getXLenVT();
6915 MVT VT = Op.getSimpleValueType();
6916 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6917
6918 SDLoc DL(Op);
6919 SDValue Fmv =
6920 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6921
6922 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6923 Mask = Mask.sext(Subtarget.getXLen());
6924
6925 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6926 SDValue Logic =
6927 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6928 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6929}
6930
6932 const RISCVSubtarget &Subtarget) {
6933 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6934
6935 MVT XLenVT = Subtarget.getXLenVT();
6936 MVT VT = Op.getSimpleValueType();
6937 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6938
6939 SDValue Mag = Op.getOperand(0);
6940 SDValue Sign = Op.getOperand(1);
6941
6942 SDLoc DL(Op);
6943
6944 // Get sign bit into an integer value.
6945 unsigned SignSize = Sign.getValueSizeInBits();
6946 SDValue SignAsInt = [&]() {
6947 if (SignSize == Subtarget.getXLen())
6948 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6949 switch (SignSize) {
6950 case 16:
6951 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6952 case 32:
6953 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6954 case 64: {
6955 assert(XLenVT == MVT::i32 && "Unexpected type");
6956 // Copy the upper word to integer.
6957 SignSize = 32;
6958 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6959 .getValue(1);
6960 }
6961 default:
6962 llvm_unreachable("Unexpected sign size");
6963 }
6964 }();
6965
6966 // Get the signbit at the right position for MagAsInt.
6967 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
6968 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
6969 SignAsInt,
6970 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
6971
6972 // Mask the sign bit and any bits above it. The extra bits will be dropped
6973 // when we convert back to FP.
6974 SDValue SignMask = DAG.getConstant(
6975 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6976 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6977
6978 // Transform Mag value to integer, and clear the sign bit.
6979 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6980 SDValue ClearSignMask = DAG.getConstant(
6981 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6982 SDValue ClearedSign =
6983 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6984
6985 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6987
6988 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6989}
6990
6991/// Get a RISC-V target specified VL op for a given SDNode.
6992static unsigned getRISCVVLOp(SDValue Op) {
6993#define OP_CASE(NODE) \
6994 case ISD::NODE: \
6995 return RISCVISD::NODE##_VL;
6996#define VP_CASE(NODE) \
6997 case ISD::VP_##NODE: \
6998 return RISCVISD::NODE##_VL;
6999 // clang-format off
7000 switch (Op.getOpcode()) {
7001 default:
7002 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7003 OP_CASE(ADD)
7004 OP_CASE(SUB)
7005 OP_CASE(MUL)
7006 OP_CASE(MULHS)
7007 OP_CASE(MULHU)
7008 OP_CASE(SDIV)
7009 OP_CASE(SREM)
7010 OP_CASE(UDIV)
7011 OP_CASE(UREM)
7012 OP_CASE(SHL)
7013 OP_CASE(SRA)
7014 OP_CASE(SRL)
7015 OP_CASE(ROTL)
7016 OP_CASE(ROTR)
7017 OP_CASE(BSWAP)
7018 OP_CASE(CTTZ)
7019 OP_CASE(CTLZ)
7020 OP_CASE(CTPOP)
7021 OP_CASE(BITREVERSE)
7022 OP_CASE(SADDSAT)
7023 OP_CASE(UADDSAT)
7024 OP_CASE(SSUBSAT)
7025 OP_CASE(USUBSAT)
7026 OP_CASE(AVGFLOORS)
7027 OP_CASE(AVGFLOORU)
7028 OP_CASE(AVGCEILS)
7029 OP_CASE(AVGCEILU)
7030 OP_CASE(FADD)
7031 OP_CASE(FSUB)
7032 OP_CASE(FMUL)
7033 OP_CASE(FDIV)
7034 OP_CASE(FNEG)
7035 OP_CASE(FABS)
7036 OP_CASE(FCOPYSIGN)
7037 OP_CASE(FSQRT)
7038 OP_CASE(SMIN)
7039 OP_CASE(SMAX)
7040 OP_CASE(UMIN)
7041 OP_CASE(UMAX)
7042 OP_CASE(STRICT_FADD)
7043 OP_CASE(STRICT_FSUB)
7044 OP_CASE(STRICT_FMUL)
7045 OP_CASE(STRICT_FDIV)
7046 OP_CASE(STRICT_FSQRT)
7047 VP_CASE(ADD) // VP_ADD
7048 VP_CASE(SUB) // VP_SUB
7049 VP_CASE(MUL) // VP_MUL
7050 VP_CASE(SDIV) // VP_SDIV
7051 VP_CASE(SREM) // VP_SREM
7052 VP_CASE(UDIV) // VP_UDIV
7053 VP_CASE(UREM) // VP_UREM
7054 VP_CASE(SHL) // VP_SHL
7055 VP_CASE(FADD) // VP_FADD
7056 VP_CASE(FSUB) // VP_FSUB
7057 VP_CASE(FMUL) // VP_FMUL
7058 VP_CASE(FDIV) // VP_FDIV
7059 VP_CASE(FNEG) // VP_FNEG
7060 VP_CASE(FABS) // VP_FABS
7061 VP_CASE(SMIN) // VP_SMIN
7062 VP_CASE(SMAX) // VP_SMAX
7063 VP_CASE(UMIN) // VP_UMIN
7064 VP_CASE(UMAX) // VP_UMAX
7065 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7066 VP_CASE(SETCC) // VP_SETCC
7067 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7068 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7069 VP_CASE(BITREVERSE) // VP_BITREVERSE
7070 VP_CASE(SADDSAT) // VP_SADDSAT
7071 VP_CASE(UADDSAT) // VP_UADDSAT
7072 VP_CASE(SSUBSAT) // VP_SSUBSAT
7073 VP_CASE(USUBSAT) // VP_USUBSAT
7074 VP_CASE(BSWAP) // VP_BSWAP
7075 VP_CASE(CTLZ) // VP_CTLZ
7076 VP_CASE(CTTZ) // VP_CTTZ
7077 VP_CASE(CTPOP) // VP_CTPOP
7079 case ISD::VP_CTLZ_ZERO_UNDEF:
7080 return RISCVISD::CTLZ_VL;
7082 case ISD::VP_CTTZ_ZERO_UNDEF:
7083 return RISCVISD::CTTZ_VL;
7084 case ISD::FMA:
7085 case ISD::VP_FMA:
7086 return RISCVISD::VFMADD_VL;
7087 case ISD::STRICT_FMA:
7088 return RISCVISD::STRICT_VFMADD_VL;
7089 case ISD::AND:
7090 case ISD::VP_AND:
7091 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7092 return RISCVISD::VMAND_VL;
7093 return RISCVISD::AND_VL;
7094 case ISD::OR:
7095 case ISD::VP_OR:
7096 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7097 return RISCVISD::VMOR_VL;
7098 return RISCVISD::OR_VL;
7099 case ISD::XOR:
7100 case ISD::VP_XOR:
7101 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7102 return RISCVISD::VMXOR_VL;
7103 return RISCVISD::XOR_VL;
7104 case ISD::ANY_EXTEND:
7105 case ISD::ZERO_EXTEND:
7106 return RISCVISD::VZEXT_VL;
7107 case ISD::SIGN_EXTEND:
7108 return RISCVISD::VSEXT_VL;
7109 case ISD::SETCC:
7110 return RISCVISD::SETCC_VL;
7111 case ISD::VSELECT:
7112 return RISCVISD::VMERGE_VL;
7113 case ISD::VP_SELECT:
7114 case ISD::VP_MERGE:
7115 return RISCVISD::VMERGE_VL;
7116 case ISD::VP_SRA:
7117 return RISCVISD::SRA_VL;
7118 case ISD::VP_SRL:
7119 return RISCVISD::SRL_VL;
7120 case ISD::VP_SQRT:
7121 return RISCVISD::FSQRT_VL;
7122 case ISD::VP_SIGN_EXTEND:
7123 return RISCVISD::VSEXT_VL;
7124 case ISD::VP_ZERO_EXTEND:
7125 return RISCVISD::VZEXT_VL;
7126 case ISD::VP_FP_TO_SINT:
7127 return RISCVISD::VFCVT_RTZ_X_F_VL;
7128 case ISD::VP_FP_TO_UINT:
7129 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7130 case ISD::FMINNUM:
7131 case ISD::FMINIMUMNUM:
7132 case ISD::VP_FMINNUM:
7133 return RISCVISD::VFMIN_VL;
7134 case ISD::FMAXNUM:
7135 case ISD::FMAXIMUMNUM:
7136 case ISD::VP_FMAXNUM:
7137 return RISCVISD::VFMAX_VL;
7138 case ISD::LRINT:
7139 case ISD::VP_LRINT:
7140 case ISD::LLRINT:
7141 case ISD::VP_LLRINT:
7142 return RISCVISD::VFCVT_RM_X_F_VL;
7143 }
7144 // clang-format on
7145#undef OP_CASE
7146#undef VP_CASE
7147}
7148
7150 const RISCVSubtarget &Subtarget) {
7151 return (Op.getValueType() == MVT::nxv32f16 &&
7152 (Subtarget.hasVInstructionsF16Minimal() &&
7153 !Subtarget.hasVInstructionsF16())) ||
7154 Op.getValueType() == MVT::nxv32bf16;
7155}
7156
7158 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7159 SDLoc DL(Op);
7160
7163
7164 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7165 if (!Op.getOperand(j).getValueType().isVector()) {
7166 LoOperands[j] = Op.getOperand(j);
7167 HiOperands[j] = Op.getOperand(j);
7168 continue;
7169 }
7170 std::tie(LoOperands[j], HiOperands[j]) =
7171 DAG.SplitVector(Op.getOperand(j), DL);
7172 }
7173
7174 SDValue LoRes =
7175 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7176 SDValue HiRes =
7177 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7178
7179 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7180}
7181
7183 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7184 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7185 SDLoc DL(Op);
7186
7189
7190 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7191 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7192 std::tie(LoOperands[j], HiOperands[j]) =
7193 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7194 continue;
7195 }
7196 if (!Op.getOperand(j).getValueType().isVector()) {
7197 LoOperands[j] = Op.getOperand(j);
7198 HiOperands[j] = Op.getOperand(j);
7199 continue;
7200 }
7201 std::tie(LoOperands[j], HiOperands[j]) =
7202 DAG.SplitVector(Op.getOperand(j), DL);
7203 }
7204
7205 SDValue LoRes =
7206 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7207 SDValue HiRes =
7208 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7209
7210 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7211}
7212
7214 SDLoc DL(Op);
7215
7216 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7217 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7218 auto [EVLLo, EVLHi] =
7219 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7220
7221 SDValue ResLo =
7222 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7223 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7224 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7225 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7226}
7227
7229
7230 assert(Op->isStrictFPOpcode());
7231
7232 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7233
7234 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7235 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7236
7237 SDLoc DL(Op);
7238
7241
7242 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7243 if (!Op.getOperand(j).getValueType().isVector()) {
7244 LoOperands[j] = Op.getOperand(j);
7245 HiOperands[j] = Op.getOperand(j);
7246 continue;
7247 }
7248 std::tie(LoOperands[j], HiOperands[j]) =
7249 DAG.SplitVector(Op.getOperand(j), DL);
7250 }
7251
7252 SDValue LoRes =
7253 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7254 HiOperands[0] = LoRes.getValue(1);
7255 SDValue HiRes =
7256 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7257
7258 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7259 LoRes.getValue(0), HiRes.getValue(0));
7260 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7261}
7262
7263SDValue
7264RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7265 SelectionDAG &DAG) const {
7266 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7267 "Unexpected bfloat16 load lowering");
7268
7269 SDLoc DL(Op);
7270 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7271 EVT MemVT = LD->getMemoryVT();
7272 SDValue Load = DAG.getExtLoad(
7273 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7274 LD->getBasePtr(),
7276 LD->getMemOperand());
7277 // Using mask to make bf16 nan-boxing valid when we don't have flh
7278 // instruction. -65536 would be treat as a small number and thus it can be
7279 // directly used lui to get the constant.
7280 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7281 SDValue OrSixteenOne =
7282 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7283 SDValue ConvertedResult =
7284 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7285 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7286}
7287
7288SDValue
7289RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7290 SelectionDAG &DAG) const {
7291 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7292 "Unexpected bfloat16 store lowering");
7293
7294 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7295 SDLoc DL(Op);
7296 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7297 Subtarget.getXLenVT(), ST->getValue());
7298 return DAG.getTruncStore(
7299 ST->getChain(), DL, FMV, ST->getBasePtr(),
7300 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7301 ST->getMemOperand());
7302}
7303
7305 SelectionDAG &DAG) const {
7306 switch (Op.getOpcode()) {
7307 default:
7309 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7310 case ISD::PREFETCH:
7311 return LowerPREFETCH(Op, Subtarget, DAG);
7312 case ISD::ATOMIC_FENCE:
7313 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7314 case ISD::GlobalAddress:
7315 return lowerGlobalAddress(Op, DAG);
7316 case ISD::BlockAddress:
7317 return lowerBlockAddress(Op, DAG);
7318 case ISD::ConstantPool:
7319 return lowerConstantPool(Op, DAG);
7320 case ISD::JumpTable:
7321 return lowerJumpTable(Op, DAG);
7323 return lowerGlobalTLSAddress(Op, DAG);
7324 case ISD::Constant:
7325 return lowerConstant(Op, DAG, Subtarget);
7326 case ISD::ConstantFP:
7327 return lowerConstantFP(Op, DAG);
7328 case ISD::SELECT:
7329 return lowerSELECT(Op, DAG);
7330 case ISD::BRCOND:
7331 return lowerBRCOND(Op, DAG);
7332 case ISD::VASTART:
7333 return lowerVASTART(Op, DAG);
7334 case ISD::FRAMEADDR:
7335 return lowerFRAMEADDR(Op, DAG);
7336 case ISD::RETURNADDR:
7337 return lowerRETURNADDR(Op, DAG);
7338 case ISD::SHL_PARTS:
7339 return lowerShiftLeftParts(Op, DAG);
7340 case ISD::SRA_PARTS:
7341 return lowerShiftRightParts(Op, DAG, true);
7342 case ISD::SRL_PARTS:
7343 return lowerShiftRightParts(Op, DAG, false);
7344 case ISD::ROTL:
7345 case ISD::ROTR:
7346 if (Op.getValueType().isFixedLengthVector()) {
7347 assert(Subtarget.hasStdExtZvkb());
7348 return lowerToScalableOp(Op, DAG);
7349 }
7350 assert(Subtarget.hasVendorXTHeadBb() &&
7351 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7352 "Unexpected custom legalization");
7353 // XTHeadBb only supports rotate by constant.
7354 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7355 return SDValue();
7356 return Op;
7357 case ISD::BITCAST: {
7358 SDLoc DL(Op);
7359 EVT VT = Op.getValueType();
7360 SDValue Op0 = Op.getOperand(0);
7361 EVT Op0VT = Op0.getValueType();
7362 MVT XLenVT = Subtarget.getXLenVT();
7363 if (Op0VT == MVT::i16 &&
7364 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7365 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7366 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7367 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7368 }
7369 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7370 Subtarget.hasStdExtFOrZfinx()) {
7371 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7372 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7373 }
7374 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7375 Subtarget.hasStdExtDOrZdinx()) {
7376 SDValue Lo, Hi;
7377 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7378 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7379 }
7380
7381 // Consider other scalar<->scalar casts as legal if the types are legal.
7382 // Otherwise expand them.
7383 if (!VT.isVector() && !Op0VT.isVector()) {
7384 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7385 return Op;
7386 return SDValue();
7387 }
7388
7389 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7390 "Unexpected types");
7391
7392 if (VT.isFixedLengthVector()) {
7393 // We can handle fixed length vector bitcasts with a simple replacement
7394 // in isel.
7395 if (Op0VT.isFixedLengthVector())
7396 return Op;
7397 // When bitcasting from scalar to fixed-length vector, insert the scalar
7398 // into a one-element vector of the result type, and perform a vector
7399 // bitcast.
7400 if (!Op0VT.isVector()) {
7401 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7402 if (!isTypeLegal(BVT))
7403 return SDValue();
7404 return DAG.getBitcast(
7405 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7406 }
7407 return SDValue();
7408 }
7409 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7410 // thus: bitcast the vector to a one-element vector type whose element type
7411 // is the same as the result type, and extract the first element.
7412 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7413 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7414 if (!isTypeLegal(BVT))
7415 return SDValue();
7416 SDValue BVec = DAG.getBitcast(BVT, Op0);
7417 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7418 }
7419 return SDValue();
7420 }
7422 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7424 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7426 return LowerINTRINSIC_VOID(Op, DAG);
7427 case ISD::IS_FPCLASS:
7428 return LowerIS_FPCLASS(Op, DAG);
7429 case ISD::BITREVERSE: {
7430 MVT VT = Op.getSimpleValueType();
7431 if (VT.isFixedLengthVector()) {
7432 assert(Subtarget.hasStdExtZvbb());
7433 return lowerToScalableOp(Op, DAG);
7434 }
7435 SDLoc DL(Op);
7436 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7437 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7438 // Expand bitreverse to a bswap(rev8) followed by brev8.
7439 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7440 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7441 }
7442 case ISD::TRUNCATE:
7445 // Only custom-lower vector truncates
7446 if (!Op.getSimpleValueType().isVector())
7447 return Op;
7448 return lowerVectorTruncLike(Op, DAG);
7449 case ISD::ANY_EXTEND:
7450 case ISD::ZERO_EXTEND:
7451 if (Op.getOperand(0).getValueType().isVector() &&
7452 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7453 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7454 if (Op.getValueType().isScalableVector())
7455 return Op;
7456 return lowerToScalableOp(Op, DAG);
7457 case ISD::SIGN_EXTEND:
7458 if (Op.getOperand(0).getValueType().isVector() &&
7459 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7460 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7461 if (Op.getValueType().isScalableVector())
7462 return Op;
7463 return lowerToScalableOp(Op, DAG);
7465 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7467 return lowerINSERT_VECTOR_ELT(Op, DAG);
7469 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7470 case ISD::SCALAR_TO_VECTOR: {
7471 MVT VT = Op.getSimpleValueType();
7472 SDLoc DL(Op);
7473 SDValue Scalar = Op.getOperand(0);
7474 if (VT.getVectorElementType() == MVT::i1) {
7475 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7476 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7477 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7478 }
7479 MVT ContainerVT = VT;
7480 if (VT.isFixedLengthVector())
7481 ContainerVT = getContainerForFixedLengthVector(VT);
7482 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7483
7484 SDValue V;
7485 if (VT.isFloatingPoint()) {
7486 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7487 DAG.getUNDEF(ContainerVT), Scalar, VL);
7488 } else {
7489 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7490 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7491 DAG.getUNDEF(ContainerVT), Scalar, VL);
7492 }
7493 if (VT.isFixedLengthVector())
7494 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7495 return V;
7496 }
7497 case ISD::VSCALE: {
7498 MVT XLenVT = Subtarget.getXLenVT();
7499 MVT VT = Op.getSimpleValueType();
7500 SDLoc DL(Op);
7501 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7502 // We define our scalable vector types for lmul=1 to use a 64 bit known
7503 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7504 // vscale as VLENB / 8.
7505 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7506 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7507 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7508 // We assume VLENB is a multiple of 8. We manually choose the best shift
7509 // here because SimplifyDemandedBits isn't always able to simplify it.
7510 uint64_t Val = Op.getConstantOperandVal(0);
7511 if (isPowerOf2_64(Val)) {
7512 uint64_t Log2 = Log2_64(Val);
7513 if (Log2 < 3) {
7514 SDNodeFlags Flags;
7515 Flags.setExact(true);
7516 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7517 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7518 } else if (Log2 > 3) {
7519 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7520 DAG.getConstant(Log2 - 3, DL, XLenVT));
7521 }
7522 } else if ((Val % 8) == 0) {
7523 // If the multiplier is a multiple of 8, scale it down to avoid needing
7524 // to shift the VLENB value.
7525 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7526 DAG.getConstant(Val / 8, DL, XLenVT));
7527 } else {
7528 SDNodeFlags Flags;
7529 Flags.setExact(true);
7530 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7531 DAG.getConstant(3, DL, XLenVT), Flags);
7532 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7533 DAG.getConstant(Val, DL, XLenVT));
7534 }
7535 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7536 }
7537 case ISD::FPOWI: {
7538 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7539 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7540 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7541 Op.getOperand(1).getValueType() == MVT::i32) {
7542 SDLoc DL(Op);
7543 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7544 SDValue Powi =
7545 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7546 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7547 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7548 }
7549 return SDValue();
7550 }
7551 case ISD::FMAXIMUM:
7552 case ISD::FMINIMUM:
7553 if (isPromotedOpNeedingSplit(Op, Subtarget))
7554 return SplitVectorOp(Op, DAG);
7555 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7556 case ISD::FP_EXTEND:
7557 case ISD::FP_ROUND:
7558 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7561 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7562 case ISD::SINT_TO_FP:
7563 case ISD::UINT_TO_FP:
7564 if (Op.getValueType().isVector() &&
7565 ((Op.getValueType().getScalarType() == MVT::f16 &&
7566 (Subtarget.hasVInstructionsF16Minimal() &&
7567 !Subtarget.hasVInstructionsF16())) ||
7568 Op.getValueType().getScalarType() == MVT::bf16)) {
7569 if (isPromotedOpNeedingSplit(Op, Subtarget))
7570 return SplitVectorOp(Op, DAG);
7571 // int -> f32
7572 SDLoc DL(Op);
7573 MVT NVT =
7574 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7575 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7576 // f32 -> [b]f16
7577 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7578 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7579 }
7580 [[fallthrough]];
7581 case ISD::FP_TO_SINT:
7582 case ISD::FP_TO_UINT:
7583 if (SDValue Op1 = Op.getOperand(0);
7584 Op1.getValueType().isVector() &&
7585 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7586 (Subtarget.hasVInstructionsF16Minimal() &&
7587 !Subtarget.hasVInstructionsF16())) ||
7588 Op1.getValueType().getScalarType() == MVT::bf16)) {
7589 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7590 return SplitVectorOp(Op, DAG);
7591 // [b]f16 -> f32
7592 SDLoc DL(Op);
7593 MVT NVT = MVT::getVectorVT(MVT::f32,
7594 Op1.getValueType().getVectorElementCount());
7595 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7596 // f32 -> int
7597 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7598 }
7599 [[fallthrough]];
7604 // RVV can only do fp<->int conversions to types half/double the size as
7605 // the source. We custom-lower any conversions that do two hops into
7606 // sequences.
7607 MVT VT = Op.getSimpleValueType();
7608 if (VT.isScalarInteger())
7609 return lowerFP_TO_INT(Op, DAG, Subtarget);
7610 bool IsStrict = Op->isStrictFPOpcode();
7611 SDValue Src = Op.getOperand(0 + IsStrict);
7612 MVT SrcVT = Src.getSimpleValueType();
7613 if (SrcVT.isScalarInteger())
7614 return lowerINT_TO_FP(Op, DAG, Subtarget);
7615 if (!VT.isVector())
7616 return Op;
7617 SDLoc DL(Op);
7618 MVT EltVT = VT.getVectorElementType();
7619 MVT SrcEltVT = SrcVT.getVectorElementType();
7620 unsigned EltSize = EltVT.getSizeInBits();
7621 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7622 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7623 "Unexpected vector element types");
7624
7625 bool IsInt2FP = SrcEltVT.isInteger();
7626 // Widening conversions
7627 if (EltSize > (2 * SrcEltSize)) {
7628 if (IsInt2FP) {
7629 // Do a regular integer sign/zero extension then convert to float.
7630 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7632 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7633 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7636 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7637 if (IsStrict)
7638 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7639 Op.getOperand(0), Ext);
7640 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7641 }
7642 // FP2Int
7643 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7644 // Do one doubling fp_extend then complete the operation by converting
7645 // to int.
7646 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7647 if (IsStrict) {
7648 auto [FExt, Chain] =
7649 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7650 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7651 }
7652 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7653 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7654 }
7655
7656 // Narrowing conversions
7657 if (SrcEltSize > (2 * EltSize)) {
7658 if (IsInt2FP) {
7659 // One narrowing int_to_fp, then an fp_round.
7660 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7661 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7662 if (IsStrict) {
7663 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7664 DAG.getVTList(InterimFVT, MVT::Other),
7665 Op.getOperand(0), Src);
7666 SDValue Chain = Int2FP.getValue(1);
7667 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7668 }
7669 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7670 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7671 }
7672 // FP2Int
7673 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7674 // representable by the integer, the result is poison.
7675 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7677 if (IsStrict) {
7678 SDValue FP2Int =
7679 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7680 Op.getOperand(0), Src);
7681 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7682 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7683 }
7684 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7685 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7686 }
7687
7688 // Scalable vectors can exit here. Patterns will handle equally-sized
7689 // conversions halving/doubling ones.
7690 if (!VT.isFixedLengthVector())
7691 return Op;
7692
7693 // For fixed-length vectors we lower to a custom "VL" node.
7694 unsigned RVVOpc = 0;
7695 switch (Op.getOpcode()) {
7696 default:
7697 llvm_unreachable("Impossible opcode");
7698 case ISD::FP_TO_SINT:
7699 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7700 break;
7701 case ISD::FP_TO_UINT:
7702 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7703 break;
7704 case ISD::SINT_TO_FP:
7705 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7706 break;
7707 case ISD::UINT_TO_FP:
7708 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7709 break;
7711 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7712 break;
7714 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7715 break;
7717 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7718 break;
7720 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7721 break;
7722 }
7723
7724 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7725 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7726 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7727 "Expected same element count");
7728
7729 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7730
7731 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7732 if (IsStrict) {
7733 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7734 Op.getOperand(0), Src, Mask, VL);
7735 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7736 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7737 }
7738 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7739 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7740 }
7743 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7744 case ISD::FP_TO_BF16: {
7745 // Custom lower to ensure the libcall return is passed in an FPR on hard
7746 // float ABIs.
7747 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7748 SDLoc DL(Op);
7749 MakeLibCallOptions CallOptions;
7750 RTLIB::Libcall LC =
7751 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7752 SDValue Res =
7753 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7754 if (Subtarget.is64Bit())
7755 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7756 return DAG.getBitcast(MVT::i32, Res);
7757 }
7758 case ISD::BF16_TO_FP: {
7759 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7760 MVT VT = Op.getSimpleValueType();
7761 SDLoc DL(Op);
7762 Op = DAG.getNode(
7763 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7764 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7765 SDValue Res = Subtarget.is64Bit()
7766 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7767 : DAG.getBitcast(MVT::f32, Op);
7768 // fp_extend if the target VT is bigger than f32.
7769 if (VT != MVT::f32)
7770 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7771 return Res;
7772 }
7774 case ISD::FP_TO_FP16: {
7775 // Custom lower to ensure the libcall return is passed in an FPR on hard
7776 // float ABIs.
7777 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7778 SDLoc DL(Op);
7779 MakeLibCallOptions CallOptions;
7780 bool IsStrict = Op->isStrictFPOpcode();
7781 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7782 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7783 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7784 SDValue Res;
7785 std::tie(Res, Chain) =
7786 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7787 if (Subtarget.is64Bit())
7788 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7789 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7790 if (IsStrict)
7791 return DAG.getMergeValues({Result, Chain}, DL);
7792 return Result;
7793 }
7795 case ISD::FP16_TO_FP: {
7796 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7797 // float ABIs.
7798 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7799 SDLoc DL(Op);
7800 MakeLibCallOptions CallOptions;
7801 bool IsStrict = Op->isStrictFPOpcode();
7802 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7803 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7804 SDValue Arg = Subtarget.is64Bit()
7805 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7806 : DAG.getBitcast(MVT::f32, Op0);
7807 SDValue Res;
7808 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7809 CallOptions, DL, Chain);
7810 if (IsStrict)
7811 return DAG.getMergeValues({Res, Chain}, DL);
7812 return Res;
7813 }
7814 case ISD::FTRUNC:
7815 case ISD::FCEIL:
7816 case ISD::FFLOOR:
7817 case ISD::FNEARBYINT:
7818 case ISD::FRINT:
7819 case ISD::FROUND:
7820 case ISD::FROUNDEVEN:
7821 if (isPromotedOpNeedingSplit(Op, Subtarget))
7822 return SplitVectorOp(Op, DAG);
7823 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7824 case ISD::LRINT:
7825 case ISD::LLRINT:
7826 case ISD::LROUND:
7827 case ISD::LLROUND: {
7828 if (Op.getValueType().isVector())
7829 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7830 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7831 "Unexpected custom legalisation");
7832 SDLoc DL(Op);
7833 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7834 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7835 }
7836 case ISD::STRICT_LRINT:
7837 case ISD::STRICT_LLRINT:
7838 case ISD::STRICT_LROUND:
7839 case ISD::STRICT_LLROUND: {
7840 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7841 "Unexpected custom legalisation");
7842 SDLoc DL(Op);
7843 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7844 {Op.getOperand(0), Op.getOperand(1)});
7845 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7846 {Ext.getValue(1), Ext.getValue(0)});
7847 }
7848 case ISD::VECREDUCE_ADD:
7853 return lowerVECREDUCE(Op, DAG);
7854 case ISD::VECREDUCE_AND:
7855 case ISD::VECREDUCE_OR:
7856 case ISD::VECREDUCE_XOR:
7857 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7858 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7859 return lowerVECREDUCE(Op, DAG);
7866 return lowerFPVECREDUCE(Op, DAG);
7867 case ISD::VP_REDUCE_ADD:
7868 case ISD::VP_REDUCE_UMAX:
7869 case ISD::VP_REDUCE_SMAX:
7870 case ISD::VP_REDUCE_UMIN:
7871 case ISD::VP_REDUCE_SMIN:
7872 case ISD::VP_REDUCE_FADD:
7873 case ISD::VP_REDUCE_SEQ_FADD:
7874 case ISD::VP_REDUCE_FMIN:
7875 case ISD::VP_REDUCE_FMAX:
7876 case ISD::VP_REDUCE_FMINIMUM:
7877 case ISD::VP_REDUCE_FMAXIMUM:
7878 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7879 return SplitVectorReductionOp(Op, DAG);
7880 return lowerVPREDUCE(Op, DAG);
7881 case ISD::VP_REDUCE_AND:
7882 case ISD::VP_REDUCE_OR:
7883 case ISD::VP_REDUCE_XOR:
7884 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7885 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7886 return lowerVPREDUCE(Op, DAG);
7887 case ISD::VP_CTTZ_ELTS:
7888 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7889 return lowerVPCttzElements(Op, DAG);
7890 case ISD::UNDEF: {
7891 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7892 return convertFromScalableVector(Op.getSimpleValueType(),
7893 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7894 }
7896 return lowerINSERT_SUBVECTOR(Op, DAG);
7898 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7900 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7902 return lowerVECTOR_INTERLEAVE(Op, DAG);
7903 case ISD::STEP_VECTOR:
7904 return lowerSTEP_VECTOR(Op, DAG);
7906 return lowerVECTOR_REVERSE(Op, DAG);
7907 case ISD::VECTOR_SPLICE:
7908 return lowerVECTOR_SPLICE(Op, DAG);
7909 case ISD::BUILD_VECTOR: {
7910 MVT VT = Op.getSimpleValueType();
7911 MVT EltVT = VT.getVectorElementType();
7912 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7913 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7914 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7915 }
7916 case ISD::SPLAT_VECTOR: {
7917 MVT VT = Op.getSimpleValueType();
7918 MVT EltVT = VT.getVectorElementType();
7919 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7920 EltVT == MVT::bf16) {
7921 SDLoc DL(Op);
7922 SDValue Elt;
7923 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7924 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7925 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7926 Op.getOperand(0));
7927 else
7928 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7929 MVT IVT = VT.changeVectorElementType(MVT::i16);
7930 return DAG.getNode(ISD::BITCAST, DL, VT,
7931 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7932 }
7933
7934 if (EltVT == MVT::i1)
7935 return lowerVectorMaskSplat(Op, DAG);
7936 return SDValue();
7937 }
7939 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7940 case ISD::CONCAT_VECTORS: {
7941 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7942 // better than going through the stack, as the default expansion does.
7943 SDLoc DL(Op);
7944 MVT VT = Op.getSimpleValueType();
7945 MVT ContainerVT = VT;
7946 if (VT.isFixedLengthVector())
7947 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7948
7949 // Recursively split concat_vectors with more than 2 operands:
7950 //
7951 // concat_vector op1, op2, op3, op4
7952 // ->
7953 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7954 //
7955 // This reduces the length of the chain of vslideups and allows us to
7956 // perform the vslideups at a smaller LMUL, limited to MF2.
7957 if (Op.getNumOperands() > 2 &&
7958 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
7959 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7961 size_t HalfNumOps = Op.getNumOperands() / 2;
7962 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7963 Op->ops().take_front(HalfNumOps));
7964 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7965 Op->ops().drop_front(HalfNumOps));
7966 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7967 }
7968
7969 unsigned NumOpElts =
7970 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7971 SDValue Vec = DAG.getUNDEF(VT);
7972 for (const auto &OpIdx : enumerate(Op->ops())) {
7973 SDValue SubVec = OpIdx.value();
7974 // Don't insert undef subvectors.
7975 if (SubVec.isUndef())
7976 continue;
7977 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
7978 }
7979 return Vec;
7980 }
7981 case ISD::LOAD: {
7982 auto *Load = cast<LoadSDNode>(Op);
7983 EVT VT = Load->getValueType(0);
7984 if (VT == MVT::f64) {
7985 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
7986 !Subtarget.is64Bit() && "Unexpected custom legalisation");
7987
7988 // Replace a double precision load with two i32 loads and a BuildPairF64.
7989 SDLoc DL(Op);
7990 SDValue BasePtr = Load->getBasePtr();
7991 SDValue Chain = Load->getChain();
7992
7993 SDValue Lo =
7994 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
7995 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
7996 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
7997 SDValue Hi = DAG.getLoad(
7998 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
7999 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8000 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8001 Hi.getValue(1));
8002
8003 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8004 return DAG.getMergeValues({Pair, Chain}, DL);
8005 }
8006
8007 if (VT == MVT::bf16)
8008 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8009
8010 // Handle normal vector tuple load.
8011 if (VT.isRISCVVectorTuple()) {
8012 SDLoc DL(Op);
8013 MVT XLenVT = Subtarget.getXLenVT();
8014 unsigned NF = VT.getRISCVVectorTupleNumFields();
8015 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8016 unsigned NumElts = Sz / (NF * 8);
8017 int Log2LMUL = Log2_64(NumElts) - 3;
8018
8019 auto Flag = SDNodeFlags();
8020 Flag.setNoUnsignedWrap(true);
8021 SDValue Ret = DAG.getUNDEF(VT);
8022 SDValue BasePtr = Load->getBasePtr();
8023 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8024 VROffset =
8025 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8026 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8027 SmallVector<SDValue, 8> OutChains;
8028
8029 // Load NF vector registers and combine them to a vector tuple.
8030 for (unsigned i = 0; i < NF; ++i) {
8031 SDValue LoadVal = DAG.getLoad(
8032 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8033 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8034 OutChains.push_back(LoadVal.getValue(1));
8035 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8036 DAG.getTargetConstant(i, DL, MVT::i32));
8037 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8038 }
8039 return DAG.getMergeValues(
8040 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8041 }
8042
8043 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8044 return V;
8045 if (Op.getValueType().isFixedLengthVector())
8046 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8047 return Op;
8048 }
8049 case ISD::STORE: {
8050 auto *Store = cast<StoreSDNode>(Op);
8051 SDValue StoredVal = Store->getValue();
8052 EVT VT = StoredVal.getValueType();
8053 if (VT == MVT::f64) {
8054 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8055 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8056
8057 // Replace a double precision store with a SplitF64 and i32 stores.
8058 SDValue DL(Op);
8059 SDValue BasePtr = Store->getBasePtr();
8060 SDValue Chain = Store->getChain();
8061 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8062 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8063
8064 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8065 Store->getPointerInfo(), Store->getBaseAlign(),
8066 Store->getMemOperand()->getFlags());
8067 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8068 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8069 Store->getPointerInfo().getWithOffset(4),
8070 Store->getBaseAlign(),
8071 Store->getMemOperand()->getFlags());
8072 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8073 }
8074 if (VT == MVT::i64) {
8075 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8076 "Unexpected custom legalisation");
8077 if (Store->isTruncatingStore())
8078 return SDValue();
8079
8080 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8081 return SDValue();
8082
8083 SDLoc DL(Op);
8084 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8085 DAG.getTargetConstant(0, DL, MVT::i32));
8086 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8087 DAG.getTargetConstant(1, DL, MVT::i32));
8088
8089 return DAG.getMemIntrinsicNode(
8090 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8091 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8092 Store->getMemOperand());
8093 }
8094
8095 if (VT == MVT::bf16)
8096 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8097
8098 // Handle normal vector tuple store.
8099 if (VT.isRISCVVectorTuple()) {
8100 SDLoc DL(Op);
8101 MVT XLenVT = Subtarget.getXLenVT();
8102 unsigned NF = VT.getRISCVVectorTupleNumFields();
8103 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8104 unsigned NumElts = Sz / (NF * 8);
8105 int Log2LMUL = Log2_64(NumElts) - 3;
8106
8107 auto Flag = SDNodeFlags();
8108 Flag.setNoUnsignedWrap(true);
8109 SDValue Ret;
8110 SDValue Chain = Store->getChain();
8111 SDValue BasePtr = Store->getBasePtr();
8112 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8113 VROffset =
8114 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8115 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8116
8117 // Extract subregisters in a vector tuple and store them individually.
8118 for (unsigned i = 0; i < NF; ++i) {
8119 auto Extract =
8120 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8121 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8122 DAG.getTargetConstant(i, DL, MVT::i32));
8123 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8124 MachinePointerInfo(Store->getAddressSpace()),
8125 Store->getBaseAlign(),
8126 Store->getMemOperand()->getFlags());
8127 Chain = Ret.getValue(0);
8128 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8129 }
8130 return Ret;
8131 }
8132
8133 if (auto V = expandUnalignedRVVStore(Op, DAG))
8134 return V;
8135 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8136 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8137 return Op;
8138 }
8139 case ISD::MLOAD:
8140 case ISD::VP_LOAD:
8141 return lowerMaskedLoad(Op, DAG);
8142 case ISD::VP_LOAD_FF:
8143 return lowerLoadFF(Op, DAG);
8144 case ISD::MSTORE:
8145 case ISD::VP_STORE:
8146 return lowerMaskedStore(Op, DAG);
8148 return lowerVectorCompress(Op, DAG);
8149 case ISD::SELECT_CC: {
8150 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8151 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8152 // into separate SETCC+SELECT just like LegalizeDAG.
8153 SDValue Tmp1 = Op.getOperand(0);
8154 SDValue Tmp2 = Op.getOperand(1);
8155 SDValue True = Op.getOperand(2);
8156 SDValue False = Op.getOperand(3);
8157 EVT VT = Op.getValueType();
8158 SDValue CC = Op.getOperand(4);
8159 EVT CmpVT = Tmp1.getValueType();
8160 EVT CCVT =
8161 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8162 SDLoc DL(Op);
8163 SDValue Cond =
8164 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8165 return DAG.getSelect(DL, VT, Cond, True, False);
8166 }
8167 case ISD::SETCC: {
8168 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8169 if (OpVT.isScalarInteger()) {
8170 MVT VT = Op.getSimpleValueType();
8171 SDValue LHS = Op.getOperand(0);
8172 SDValue RHS = Op.getOperand(1);
8173 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8174 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8175 "Unexpected CondCode");
8176
8177 SDLoc DL(Op);
8178
8179 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8180 // convert this to the equivalent of (set(u)ge X, C+1) by using
8181 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8182 // in a register.
8183 if (isa<ConstantSDNode>(RHS)) {
8184 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8185 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8186 // If this is an unsigned compare and the constant is -1, incrementing
8187 // the constant would change behavior. The result should be false.
8188 if (CCVal == ISD::SETUGT && Imm == -1)
8189 return DAG.getConstant(0, DL, VT);
8190 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8191 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8192 SDValue SetCC = DAG.getSetCC(
8193 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8194 return DAG.getLogicalNOT(DL, SetCC, VT);
8195 }
8196 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8197 if (CCVal == ISD::SETUGT && Imm == 2047) {
8198 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8199 DAG.getShiftAmountConstant(11, OpVT, DL));
8200 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8201 ISD::SETNE);
8202 }
8203 }
8204
8205 // Not a constant we could handle, swap the operands and condition code to
8206 // SETLT/SETULT.
8207 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8208 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8209 }
8210
8211 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8212 return SplitVectorOp(Op, DAG);
8213
8214 return lowerToScalableOp(Op, DAG);
8215 }
8216 case ISD::ADD:
8217 case ISD::SUB:
8218 case ISD::MUL:
8219 case ISD::MULHS:
8220 case ISD::MULHU:
8221 case ISD::AND:
8222 case ISD::OR:
8223 case ISD::XOR:
8224 case ISD::SDIV:
8225 case ISD::SREM:
8226 case ISD::UDIV:
8227 case ISD::UREM:
8228 case ISD::BSWAP:
8229 case ISD::CTPOP:
8230 case ISD::VSELECT:
8231 return lowerToScalableOp(Op, DAG);
8232 case ISD::SHL:
8233 case ISD::SRA:
8234 case ISD::SRL:
8235 if (Op.getSimpleValueType().isFixedLengthVector())
8236 return lowerToScalableOp(Op, DAG);
8237 // This can be called for an i32 shift amount that needs to be promoted.
8238 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8239 "Unexpected custom legalisation");
8240 return SDValue();
8241 case ISD::FABS:
8242 case ISD::FNEG:
8243 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8244 return lowerFABSorFNEG(Op, DAG, Subtarget);
8245 [[fallthrough]];
8246 case ISD::FADD:
8247 case ISD::FSUB:
8248 case ISD::FMUL:
8249 case ISD::FDIV:
8250 case ISD::FSQRT:
8251 case ISD::FMA:
8252 case ISD::FMINNUM:
8253 case ISD::FMAXNUM:
8254 case ISD::FMINIMUMNUM:
8255 case ISD::FMAXIMUMNUM:
8256 if (isPromotedOpNeedingSplit(Op, Subtarget))
8257 return SplitVectorOp(Op, DAG);
8258 [[fallthrough]];
8259 case ISD::AVGFLOORS:
8260 case ISD::AVGFLOORU:
8261 case ISD::AVGCEILS:
8262 case ISD::AVGCEILU:
8263 case ISD::SMIN:
8264 case ISD::SMAX:
8265 case ISD::UMIN:
8266 case ISD::UMAX:
8267 case ISD::UADDSAT:
8268 case ISD::USUBSAT:
8269 case ISD::SADDSAT:
8270 case ISD::SSUBSAT:
8271 return lowerToScalableOp(Op, DAG);
8272 case ISD::ABDS:
8273 case ISD::ABDU: {
8274 SDLoc dl(Op);
8275 EVT VT = Op->getValueType(0);
8276 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8277 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8278 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8279
8280 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8281 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8282 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8283 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8284 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8285 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8286 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8287 }
8288 case ISD::ABS:
8289 case ISD::VP_ABS:
8290 return lowerABS(Op, DAG);
8291 case ISD::CTLZ:
8293 case ISD::CTTZ:
8295 if (Subtarget.hasStdExtZvbb())
8296 return lowerToScalableOp(Op, DAG);
8297 assert(Op.getOpcode() != ISD::CTTZ);
8298 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8299 case ISD::FCOPYSIGN:
8300 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8301 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8302 if (isPromotedOpNeedingSplit(Op, Subtarget))
8303 return SplitVectorOp(Op, DAG);
8304 return lowerToScalableOp(Op, DAG);
8305 case ISD::STRICT_FADD:
8306 case ISD::STRICT_FSUB:
8307 case ISD::STRICT_FMUL:
8308 case ISD::STRICT_FDIV:
8309 case ISD::STRICT_FSQRT:
8310 case ISD::STRICT_FMA:
8311 if (isPromotedOpNeedingSplit(Op, Subtarget))
8312 return SplitStrictFPVectorOp(Op, DAG);
8313 return lowerToScalableOp(Op, DAG);
8314 case ISD::STRICT_FSETCC:
8316 return lowerVectorStrictFSetcc(Op, DAG);
8317 case ISD::STRICT_FCEIL:
8318 case ISD::STRICT_FRINT:
8319 case ISD::STRICT_FFLOOR:
8320 case ISD::STRICT_FTRUNC:
8322 case ISD::STRICT_FROUND:
8324 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8325 case ISD::MGATHER:
8326 case ISD::VP_GATHER:
8327 return lowerMaskedGather(Op, DAG);
8328 case ISD::MSCATTER:
8329 case ISD::VP_SCATTER:
8330 return lowerMaskedScatter(Op, DAG);
8331 case ISD::GET_ROUNDING:
8332 return lowerGET_ROUNDING(Op, DAG);
8333 case ISD::SET_ROUNDING:
8334 return lowerSET_ROUNDING(Op, DAG);
8335 case ISD::GET_FPENV:
8336 return lowerGET_FPENV(Op, DAG);
8337 case ISD::SET_FPENV:
8338 return lowerSET_FPENV(Op, DAG);
8339 case ISD::RESET_FPENV:
8340 return lowerRESET_FPENV(Op, DAG);
8341 case ISD::GET_FPMODE:
8342 return lowerGET_FPMODE(Op, DAG);
8343 case ISD::SET_FPMODE:
8344 return lowerSET_FPMODE(Op, DAG);
8345 case ISD::RESET_FPMODE:
8346 return lowerRESET_FPMODE(Op, DAG);
8347 case ISD::EH_DWARF_CFA:
8348 return lowerEH_DWARF_CFA(Op, DAG);
8349 case ISD::VP_MERGE:
8350 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8351 return lowerVPMergeMask(Op, DAG);
8352 [[fallthrough]];
8353 case ISD::VP_SELECT:
8354 case ISD::VP_ADD:
8355 case ISD::VP_SUB:
8356 case ISD::VP_MUL:
8357 case ISD::VP_SDIV:
8358 case ISD::VP_UDIV:
8359 case ISD::VP_SREM:
8360 case ISD::VP_UREM:
8361 case ISD::VP_UADDSAT:
8362 case ISD::VP_USUBSAT:
8363 case ISD::VP_SADDSAT:
8364 case ISD::VP_SSUBSAT:
8365 case ISD::VP_LRINT:
8366 case ISD::VP_LLRINT:
8367 return lowerVPOp(Op, DAG);
8368 case ISD::VP_AND:
8369 case ISD::VP_OR:
8370 case ISD::VP_XOR:
8371 return lowerLogicVPOp(Op, DAG);
8372 case ISD::VP_FADD:
8373 case ISD::VP_FSUB:
8374 case ISD::VP_FMUL:
8375 case ISD::VP_FDIV:
8376 case ISD::VP_FNEG:
8377 case ISD::VP_FABS:
8378 case ISD::VP_SQRT:
8379 case ISD::VP_FMA:
8380 case ISD::VP_FMINNUM:
8381 case ISD::VP_FMAXNUM:
8382 case ISD::VP_FCOPYSIGN:
8383 if (isPromotedOpNeedingSplit(Op, Subtarget))
8384 return SplitVPOp(Op, DAG);
8385 [[fallthrough]];
8386 case ISD::VP_SRA:
8387 case ISD::VP_SRL:
8388 case ISD::VP_SHL:
8389 return lowerVPOp(Op, DAG);
8390 case ISD::VP_IS_FPCLASS:
8391 return LowerIS_FPCLASS(Op, DAG);
8392 case ISD::VP_SIGN_EXTEND:
8393 case ISD::VP_ZERO_EXTEND:
8394 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8395 return lowerVPExtMaskOp(Op, DAG);
8396 return lowerVPOp(Op, DAG);
8397 case ISD::VP_TRUNCATE:
8398 return lowerVectorTruncLike(Op, DAG);
8399 case ISD::VP_FP_EXTEND:
8400 case ISD::VP_FP_ROUND:
8401 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8402 case ISD::VP_SINT_TO_FP:
8403 case ISD::VP_UINT_TO_FP:
8404 if (Op.getValueType().isVector() &&
8405 ((Op.getValueType().getScalarType() == MVT::f16 &&
8406 (Subtarget.hasVInstructionsF16Minimal() &&
8407 !Subtarget.hasVInstructionsF16())) ||
8408 Op.getValueType().getScalarType() == MVT::bf16)) {
8409 if (isPromotedOpNeedingSplit(Op, Subtarget))
8410 return SplitVectorOp(Op, DAG);
8411 // int -> f32
8412 SDLoc DL(Op);
8413 MVT NVT =
8414 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8415 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8416 // f32 -> [b]f16
8417 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8418 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8419 }
8420 [[fallthrough]];
8421 case ISD::VP_FP_TO_SINT:
8422 case ISD::VP_FP_TO_UINT:
8423 if (SDValue Op1 = Op.getOperand(0);
8424 Op1.getValueType().isVector() &&
8425 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8426 (Subtarget.hasVInstructionsF16Minimal() &&
8427 !Subtarget.hasVInstructionsF16())) ||
8428 Op1.getValueType().getScalarType() == MVT::bf16)) {
8429 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8430 return SplitVectorOp(Op, DAG);
8431 // [b]f16 -> f32
8432 SDLoc DL(Op);
8433 MVT NVT = MVT::getVectorVT(MVT::f32,
8434 Op1.getValueType().getVectorElementCount());
8435 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8436 // f32 -> int
8437 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8438 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8439 }
8440 return lowerVPFPIntConvOp(Op, DAG);
8441 case ISD::VP_SETCC:
8442 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8443 return SplitVPOp(Op, DAG);
8444 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8445 return lowerVPSetCCMaskOp(Op, DAG);
8446 [[fallthrough]];
8447 case ISD::VP_SMIN:
8448 case ISD::VP_SMAX:
8449 case ISD::VP_UMIN:
8450 case ISD::VP_UMAX:
8451 case ISD::VP_BITREVERSE:
8452 case ISD::VP_BSWAP:
8453 return lowerVPOp(Op, DAG);
8454 case ISD::VP_CTLZ:
8455 case ISD::VP_CTLZ_ZERO_UNDEF:
8456 if (Subtarget.hasStdExtZvbb())
8457 return lowerVPOp(Op, DAG);
8458 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8459 case ISD::VP_CTTZ:
8460 case ISD::VP_CTTZ_ZERO_UNDEF:
8461 if (Subtarget.hasStdExtZvbb())
8462 return lowerVPOp(Op, DAG);
8463 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8464 case ISD::VP_CTPOP:
8465 return lowerVPOp(Op, DAG);
8466 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8467 return lowerVPStridedLoad(Op, DAG);
8468 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8469 return lowerVPStridedStore(Op, DAG);
8470 case ISD::VP_FCEIL:
8471 case ISD::VP_FFLOOR:
8472 case ISD::VP_FRINT:
8473 case ISD::VP_FNEARBYINT:
8474 case ISD::VP_FROUND:
8475 case ISD::VP_FROUNDEVEN:
8476 case ISD::VP_FROUNDTOZERO:
8477 if (isPromotedOpNeedingSplit(Op, Subtarget))
8478 return SplitVPOp(Op, DAG);
8479 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8480 case ISD::VP_FMAXIMUM:
8481 case ISD::VP_FMINIMUM:
8482 if (isPromotedOpNeedingSplit(Op, Subtarget))
8483 return SplitVPOp(Op, DAG);
8484 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8485 case ISD::EXPERIMENTAL_VP_SPLICE:
8486 return lowerVPSpliceExperimental(Op, DAG);
8487 case ISD::EXPERIMENTAL_VP_REVERSE:
8488 return lowerVPReverseExperimental(Op, DAG);
8489 case ISD::EXPERIMENTAL_VP_SPLAT:
8490 return lowerVPSplatExperimental(Op, DAG);
8491 case ISD::CLEAR_CACHE: {
8492 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8493 "llvm.clear_cache only needs custom lower on Linux targets");
8494 SDLoc DL(Op);
8495 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8496 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8497 Op.getOperand(2), Flags, DL);
8498 }
8500 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8502 return lowerINIT_TRAMPOLINE(Op, DAG);
8504 return lowerADJUST_TRAMPOLINE(Op, DAG);
8508 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8509 }
8510}
8511
8512SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8513 SDValue Start, SDValue End,
8514 SDValue Flags, SDLoc DL) const {
8515 MakeLibCallOptions CallOptions;
8516 std::pair<SDValue, SDValue> CallResult =
8517 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8518 {Start, End, Flags}, CallOptions, DL, InChain);
8519
8520 // This function returns void so only the out chain matters.
8521 return CallResult.second;
8522}
8523
8524SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8525 SelectionDAG &DAG) const {
8526 if (!Subtarget.is64Bit())
8527 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8528
8529 // Create an MCCodeEmitter to encode instructions.
8531 assert(TLO);
8532 MCContext &MCCtx = TLO->getContext();
8533
8534 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8535 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8536
8537 SDValue Root = Op.getOperand(0);
8538 SDValue Trmp = Op.getOperand(1); // trampoline
8539 SDLoc dl(Op);
8540
8541 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8542
8543 // We store in the trampoline buffer the following instructions and data.
8544 // Offset:
8545 // 0: auipc t2, 0
8546 // 4: ld t0, 24(t2)
8547 // 8: ld t2, 16(t2)
8548 // 12: jalr t0
8549 // 16: <StaticChainOffset>
8550 // 24: <FunctionAddressOffset>
8551 // 32:
8552 // Offset with branch control flow protection enabled:
8553 // 0: lpad <imm20>
8554 // 4: auipc t3, 0
8555 // 8: ld t2, 28(t3)
8556 // 12: ld t3, 20(t3)
8557 // 16: jalr t2
8558 // 20: <StaticChainOffset>
8559 // 28: <FunctionAddressOffset>
8560 // 36:
8561
8562 const bool HasCFBranch =
8563 Subtarget.hasStdExtZicfilp() &&
8565 "cf-protection-branch");
8566 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8567 const unsigned StaticChainOffset = StaticChainIdx * 4;
8568 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8569
8571 assert(STI);
8572 auto GetEncoding = [&](const MCInst &MC) {
8575 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8576 uint32_t Encoding = support::endian::read32le(CB.data());
8577 return Encoding;
8578 };
8579
8580 SmallVector<SDValue> OutChains;
8581
8582 SmallVector<uint32_t> Encodings;
8583 if (!HasCFBranch) {
8584 Encodings.append(
8585 {// auipc t2, 0
8586 // Loads the current PC into t2.
8587 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8588 // ld t0, 24(t2)
8589 // Loads the function address into t0. Note that we are using offsets
8590 // pc-relative to the first instruction of the trampoline.
8591 GetEncoding(MCInstBuilder(RISCV::LD)
8592 .addReg(RISCV::X5)
8593 .addReg(RISCV::X7)
8594 .addImm(FunctionAddressOffset)),
8595 // ld t2, 16(t2)
8596 // Load the value of the static chain.
8597 GetEncoding(MCInstBuilder(RISCV::LD)
8598 .addReg(RISCV::X7)
8599 .addReg(RISCV::X7)
8600 .addImm(StaticChainOffset)),
8601 // jalr t0
8602 // Jump to the function.
8603 GetEncoding(MCInstBuilder(RISCV::JALR)
8604 .addReg(RISCV::X0)
8605 .addReg(RISCV::X5)
8606 .addImm(0))});
8607 } else {
8608 Encodings.append(
8609 {// auipc x0, <imm20> (lpad <imm20>)
8610 // Landing pad.
8611 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8612 // auipc t3, 0
8613 // Loads the current PC into t3.
8614 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8615 // ld t2, (FunctionAddressOffset - 4)(t3)
8616 // Loads the function address into t2. Note that we are using offsets
8617 // pc-relative to the SECOND instruction of the trampoline.
8618 GetEncoding(MCInstBuilder(RISCV::LD)
8619 .addReg(RISCV::X7)
8620 .addReg(RISCV::X28)
8621 .addImm(FunctionAddressOffset - 4)),
8622 // ld t3, (StaticChainOffset - 4)(t3)
8623 // Load the value of the static chain.
8624 GetEncoding(MCInstBuilder(RISCV::LD)
8625 .addReg(RISCV::X28)
8626 .addReg(RISCV::X28)
8627 .addImm(StaticChainOffset - 4)),
8628 // jalr t2
8629 // Software-guarded jump to the function.
8630 GetEncoding(MCInstBuilder(RISCV::JALR)
8631 .addReg(RISCV::X0)
8632 .addReg(RISCV::X7)
8633 .addImm(0))});
8634 }
8635
8636 // Store encoded instructions.
8637 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8638 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8639 DAG.getConstant(Idx * 4, dl, MVT::i64))
8640 : Trmp;
8641 OutChains.push_back(DAG.getTruncStore(
8642 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8643 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8644 }
8645
8646 // Now store the variable part of the trampoline.
8647 SDValue FunctionAddress = Op.getOperand(2);
8648 SDValue StaticChain = Op.getOperand(3);
8649
8650 // Store the given static chain and function pointer in the trampoline buffer.
8651 struct OffsetValuePair {
8652 const unsigned Offset;
8653 const SDValue Value;
8654 SDValue Addr = SDValue(); // Used to cache the address.
8655 } OffsetValues[] = {
8656 {StaticChainOffset, StaticChain},
8657 {FunctionAddressOffset, FunctionAddress},
8658 };
8659 for (auto &OffsetValue : OffsetValues) {
8660 SDValue Addr =
8661 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8662 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8663 OffsetValue.Addr = Addr;
8664 OutChains.push_back(
8665 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8666 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8667 }
8668
8669 assert(OutChains.size() == StaticChainIdx + 2 &&
8670 "Size of OutChains mismatch");
8671 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8672
8673 // The end of instructions of trampoline is the same as the static chain
8674 // address that we computed earlier.
8675 SDValue EndOfTrmp = OffsetValues[0].Addr;
8676
8677 // Call clear cache on the trampoline instructions.
8678 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8679 Trmp, EndOfTrmp);
8680
8681 return Chain;
8682}
8683
8684SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8685 SelectionDAG &DAG) const {
8686 if (!Subtarget.is64Bit())
8687 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8688
8689 return Op.getOperand(0);
8690}
8691
8692SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8693 SelectionDAG &DAG) const {
8694 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8695 // TODO: There are many other sub-cases we could potentially lower, are
8696 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8697 SDLoc DL(Op);
8698 MVT VT = Op.getSimpleValueType();
8699 SDValue Accum = Op.getOperand(0);
8700 assert(Accum.getSimpleValueType() == VT &&
8701 VT.getVectorElementType() == MVT::i32);
8702 SDValue A = Op.getOperand(1);
8703 SDValue B = Op.getOperand(2);
8704 MVT ArgVT = A.getSimpleValueType();
8705 assert(ArgVT == B.getSimpleValueType() &&
8706 ArgVT.getVectorElementType() == MVT::i8);
8707 (void)ArgVT;
8708
8709 // The zvqdotq pseudos are defined with sources and destination both
8710 // being i32. This cast is needed for correctness to avoid incorrect
8711 // .vx matching of i8 splats.
8712 A = DAG.getBitcast(VT, A);
8713 B = DAG.getBitcast(VT, B);
8714
8715 MVT ContainerVT = VT;
8716 if (VT.isFixedLengthVector()) {
8717 ContainerVT = getContainerForFixedLengthVector(VT);
8718 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8719 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8720 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8721 }
8722
8723 unsigned Opc;
8724 switch (Op.getOpcode()) {
8726 Opc = RISCVISD::VQDOT_VL;
8727 break;
8729 Opc = RISCVISD::VQDOTU_VL;
8730 break;
8732 Opc = RISCVISD::VQDOTSU_VL;
8733 break;
8734 default:
8735 llvm_unreachable("Unexpected opcode");
8736 }
8737 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8738 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8739 if (VT.isFixedLengthVector())
8740 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8741 return Res;
8742}
8743
8745 SelectionDAG &DAG, unsigned Flags) {
8746 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8747}
8748
8750 SelectionDAG &DAG, unsigned Flags) {
8751 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8752 Flags);
8753}
8754
8756 SelectionDAG &DAG, unsigned Flags) {
8757 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8758 N->getOffset(), Flags);
8759}
8760
8762 SelectionDAG &DAG, unsigned Flags) {
8763 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8764}
8765
8767 EVT Ty, SelectionDAG &DAG) {
8769 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8770 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8771 return DAG.getLoad(
8772 Ty, DL, DAG.getEntryNode(), LC,
8774}
8775
8777 EVT Ty, SelectionDAG &DAG) {
8779 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8780 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8781 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8782 return DAG.getLoad(
8783 Ty, DL, DAG.getEntryNode(), LC,
8785}
8786
8787template <class NodeTy>
8788SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8789 bool IsLocal, bool IsExternWeak) const {
8790 SDLoc DL(N);
8791 EVT Ty = getPointerTy(DAG.getDataLayout());
8792
8793 // When HWASAN is used and tagging of global variables is enabled
8794 // they should be accessed via the GOT, since the tagged address of a global
8795 // is incompatible with existing code models. This also applies to non-pic
8796 // mode.
8797 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8798 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8799 if (IsLocal && !Subtarget.allowTaggedGlobals())
8800 // Use PC-relative addressing to access the symbol. This generates the
8801 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8802 // %pcrel_lo(auipc)).
8803 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8804
8805 // Use PC-relative addressing to access the GOT for this symbol, then load
8806 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8807 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8808 SDValue Load =
8809 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8815 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8816 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8817 return Load;
8818 }
8819
8820 switch (getTargetMachine().getCodeModel()) {
8821 default:
8822 reportFatalUsageError("Unsupported code model for lowering");
8823 case CodeModel::Small: {
8824 // Generate a sequence for accessing addresses within the first 2 GiB of
8825 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8826 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8827 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8828 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8829 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8830 }
8831 case CodeModel::Medium: {
8832 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8833 if (IsExternWeak) {
8834 // An extern weak symbol may be undefined, i.e. have value 0, which may
8835 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8836 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8837 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8838 SDValue Load =
8839 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8845 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8846 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8847 return Load;
8848 }
8849
8850 // Generate a sequence for accessing addresses within any 2GiB range within
8851 // the address space. This generates the pattern (PseudoLLA sym), which
8852 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8853 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8854 }
8855 case CodeModel::Large: {
8856 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8857 return getLargeGlobalAddress(G, DL, Ty, DAG);
8858
8859 // Using pc-relative mode for other node type.
8860 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8861 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8862 }
8863 }
8864}
8865
8866SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8867 SelectionDAG &DAG) const {
8868 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8869 assert(N->getOffset() == 0 && "unexpected offset in global node");
8870 const GlobalValue *GV = N->getGlobal();
8871 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8872}
8873
8874SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8875 SelectionDAG &DAG) const {
8876 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8877
8878 return getAddr(N, DAG);
8879}
8880
8881SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8882 SelectionDAG &DAG) const {
8883 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8884
8885 return getAddr(N, DAG);
8886}
8887
8888SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8889 SelectionDAG &DAG) const {
8890 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8891
8892 return getAddr(N, DAG);
8893}
8894
8895SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8896 SelectionDAG &DAG,
8897 bool UseGOT) const {
8898 SDLoc DL(N);
8899 EVT Ty = getPointerTy(DAG.getDataLayout());
8900 const GlobalValue *GV = N->getGlobal();
8901 MVT XLenVT = Subtarget.getXLenVT();
8902
8903 if (UseGOT) {
8904 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8905 // load the address from the GOT and add the thread pointer. This generates
8906 // the pattern (PseudoLA_TLS_IE sym), which expands to
8907 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8908 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8909 SDValue Load =
8910 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8916 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8917 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8918
8919 // Add the thread pointer.
8920 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8921 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8922 }
8923
8924 // Generate a sequence for accessing the address relative to the thread
8925 // pointer, with the appropriate adjustment for the thread pointer offset.
8926 // This generates the pattern
8927 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8928 SDValue AddrHi =
8930 SDValue AddrAdd =
8932 SDValue AddrLo =
8934
8935 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8936 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8937 SDValue MNAdd =
8938 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8939 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8940}
8941
8942SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8943 SelectionDAG &DAG) const {
8944 SDLoc DL(N);
8945 EVT Ty = getPointerTy(DAG.getDataLayout());
8946 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8947 const GlobalValue *GV = N->getGlobal();
8948
8949 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8950 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8951 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8952 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8953 SDValue Load =
8954 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8955
8956 // Prepare argument list to generate call.
8958 Args.emplace_back(Load, CallTy);
8959
8960 // Setup call to __tls_get_addr.
8962 CLI.setDebugLoc(DL)
8963 .setChain(DAG.getEntryNode())
8964 .setLibCallee(CallingConv::C, CallTy,
8965 DAG.getExternalSymbol("__tls_get_addr", Ty),
8966 std::move(Args));
8967
8968 return LowerCallTo(CLI).first;
8969}
8970
8971SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8972 SelectionDAG &DAG) const {
8973 SDLoc DL(N);
8974 EVT Ty = getPointerTy(DAG.getDataLayout());
8975 const GlobalValue *GV = N->getGlobal();
8976
8977 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8978 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8979 //
8980 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8981 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8982 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8983 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8984 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8985 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8986}
8987
8988SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8989 SelectionDAG &DAG) const {
8990 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8991 assert(N->getOffset() == 0 && "unexpected offset in global node");
8992
8993 if (DAG.getTarget().useEmulatedTLS())
8994 return LowerToTLSEmulatedModel(N, DAG);
8995
8997
9000 reportFatalUsageError("In GHC calling convention TLS is not supported");
9001
9002 SDValue Addr;
9003 switch (Model) {
9005 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9006 break;
9008 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9009 break;
9012 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9013 : getDynamicTLSAddr(N, DAG);
9014 break;
9015 }
9016
9017 return Addr;
9018}
9019
9020// Return true if Val is equal to (setcc LHS, RHS, CC).
9021// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9022// Otherwise, return std::nullopt.
9023static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9024 ISD::CondCode CC, SDValue Val) {
9025 assert(Val->getOpcode() == ISD::SETCC);
9026 SDValue LHS2 = Val.getOperand(0);
9027 SDValue RHS2 = Val.getOperand(1);
9028 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9029
9030 if (LHS == LHS2 && RHS == RHS2) {
9031 if (CC == CC2)
9032 return true;
9033 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9034 return false;
9035 } else if (LHS == RHS2 && RHS == LHS2) {
9037 if (CC == CC2)
9038 return true;
9039 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9040 return false;
9041 }
9042
9043 return std::nullopt;
9044}
9045
9047 const RISCVSubtarget &Subtarget) {
9048 SDValue CondV = N->getOperand(0);
9049 SDValue TrueV = N->getOperand(1);
9050 SDValue FalseV = N->getOperand(2);
9051 MVT VT = N->getSimpleValueType(0);
9052 SDLoc DL(N);
9053
9054 if (!Subtarget.hasConditionalMoveFusion()) {
9055 // (select c, -1, y) -> -c | y
9056 if (isAllOnesConstant(TrueV)) {
9057 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9058 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9059 }
9060 // (select c, y, -1) -> (c-1) | y
9061 if (isAllOnesConstant(FalseV)) {
9062 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9063 DAG.getAllOnesConstant(DL, VT));
9064 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9065 }
9066
9067 // (select c, 0, y) -> (c-1) & y
9068 if (isNullConstant(TrueV)) {
9069 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9070 DAG.getAllOnesConstant(DL, VT));
9071 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9072 }
9073 // (select c, y, 0) -> -c & y
9074 if (isNullConstant(FalseV)) {
9075 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9076 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9077 }
9078 }
9079
9080 // select c, ~x, x --> xor -c, x
9081 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9082 const APInt &TrueVal = TrueV->getAsAPIntVal();
9083 const APInt &FalseVal = FalseV->getAsAPIntVal();
9084 if (~TrueVal == FalseVal) {
9085 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9086 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9087 }
9088 }
9089
9090 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9091 // when both truev and falsev are also setcc.
9092 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9093 FalseV.getOpcode() == ISD::SETCC) {
9094 SDValue LHS = CondV.getOperand(0);
9095 SDValue RHS = CondV.getOperand(1);
9096 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9097
9098 // (select x, x, y) -> x | y
9099 // (select !x, x, y) -> x & y
9100 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9101 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9102 DAG.getFreeze(FalseV));
9103 }
9104 // (select x, y, x) -> x & y
9105 // (select !x, y, x) -> x | y
9106 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9107 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9108 DAG.getFreeze(TrueV), FalseV);
9109 }
9110 }
9111
9112 return SDValue();
9113}
9114
9115// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9116// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9117// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9118// being `0` or `-1`. In such cases we can replace `select` with `and`.
9119// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9120// than `c0`?
9121static SDValue
9123 const RISCVSubtarget &Subtarget) {
9124 if (Subtarget.hasShortForwardBranchOpt())
9125 return SDValue();
9126
9127 unsigned SelOpNo = 0;
9128 SDValue Sel = BO->getOperand(0);
9129 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9130 SelOpNo = 1;
9131 Sel = BO->getOperand(1);
9132 }
9133
9134 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9135 return SDValue();
9136
9137 unsigned ConstSelOpNo = 1;
9138 unsigned OtherSelOpNo = 2;
9139 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9140 ConstSelOpNo = 2;
9141 OtherSelOpNo = 1;
9142 }
9143 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9144 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9145 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9146 return SDValue();
9147
9148 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9149 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9150 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9151 return SDValue();
9152
9153 SDLoc DL(Sel);
9154 EVT VT = BO->getValueType(0);
9155
9156 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9157 if (SelOpNo == 1)
9158 std::swap(NewConstOps[0], NewConstOps[1]);
9159
9160 SDValue NewConstOp =
9161 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9162 if (!NewConstOp)
9163 return SDValue();
9164
9165 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9166 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9167 return SDValue();
9168
9169 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9170 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9171 if (SelOpNo == 1)
9172 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9173 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9174
9175 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9176 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9177 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9178}
9179
9180SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9181 SDValue CondV = Op.getOperand(0);
9182 SDValue TrueV = Op.getOperand(1);
9183 SDValue FalseV = Op.getOperand(2);
9184 SDLoc DL(Op);
9185 MVT VT = Op.getSimpleValueType();
9186 MVT XLenVT = Subtarget.getXLenVT();
9187
9188 // Lower vector SELECTs to VSELECTs by splatting the condition.
9189 if (VT.isVector()) {
9190 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9191 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9192 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9193 }
9194
9195 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9196 // nodes to implement the SELECT. Performing the lowering here allows for
9197 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9198 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9199 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
9200 VT.isScalarInteger()) {
9201 // (select c, t, 0) -> (czero_eqz t, c)
9202 if (isNullConstant(FalseV))
9203 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9204 // (select c, 0, f) -> (czero_nez f, c)
9205 if (isNullConstant(TrueV))
9206 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9207
9208 // Check to see if a given operation is a 'NOT', if so return the negated
9209 // operand
9210 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9211 using namespace llvm::SDPatternMatch;
9212 SDValue Xor;
9213 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9214 return Xor;
9215 }
9216 return std::nullopt;
9217 };
9218 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9219 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9220 if (TrueV.getOpcode() == ISD::AND &&
9221 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9222 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9223 ? getNotOperand(TrueV.getOperand(1))
9224 : getNotOperand(TrueV.getOperand(0));
9225 if (NotOperand) {
9226 SDValue CMOV =
9227 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9228 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9229 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9230 }
9231 return DAG.getNode(
9232 ISD::OR, DL, VT, TrueV,
9233 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9234 }
9235
9236 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9237 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9238 if (FalseV.getOpcode() == ISD::AND &&
9239 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9240 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9241 ? getNotOperand(FalseV.getOperand(1))
9242 : getNotOperand(FalseV.getOperand(0));
9243 if (NotOperand) {
9244 SDValue CMOV =
9245 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9246 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9247 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9248 }
9249 return DAG.getNode(
9250 ISD::OR, DL, VT, FalseV,
9251 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9252 }
9253
9254 // Try some other optimizations before falling back to generic lowering.
9255 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
9256 return V;
9257
9258 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9259 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9260 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9261 const APInt &TrueVal = TrueV->getAsAPIntVal();
9262 const APInt &FalseVal = FalseV->getAsAPIntVal();
9263
9264 // Prefer these over Zicond to avoid materializing an immediate:
9265 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9266 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9267 if (CondV.getOpcode() == ISD::SETCC &&
9268 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9269 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9270 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9271 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9272 int64_t TrueImm = TrueVal.getSExtValue();
9273 int64_t FalseImm = FalseVal.getSExtValue();
9274 if (CCVal == ISD::SETGT)
9275 std::swap(TrueImm, FalseImm);
9276 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9277 isInt<12>(TrueImm - FalseImm)) {
9278 SDValue SRA =
9279 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9280 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9281 SDValue AND =
9282 DAG.getNode(ISD::AND, DL, VT, SRA,
9283 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9284 return DAG.getNode(ISD::ADD, DL, VT, AND,
9285 DAG.getSignedConstant(FalseImm, DL, VT));
9286 }
9287 }
9288 }
9289
9290 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9291 // a constant in register
9292 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9293 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9294 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9295 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9296 }
9297 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9298 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9299 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9300 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9301 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9302 }
9303
9304 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9305 const int DeltaCost = RISCVMatInt::getIntMatCost(
9306 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9307 // Does the addend fold into an ADDI
9308 if (Addend.isSignedIntN(12))
9309 return DeltaCost;
9310 const int AddendCost = RISCVMatInt::getIntMatCost(
9311 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9312 return AddendCost + DeltaCost;
9313 };
9314 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9315 getCost(TrueVal - FalseVal, FalseVal);
9316 SDValue LHSVal = DAG.getConstant(
9317 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9318 SDValue CMOV =
9319 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9320 DL, VT, LHSVal, CondV);
9321 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9322 }
9323
9324 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9325 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9326 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9327 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9328 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9329 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9330 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9331 // Fall back to XORI if Const == -0x800
9332 if (RawConstVal == -0x800) {
9333 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9334 SDValue CMOV =
9335 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9336 DL, VT, XorOp, CondV);
9337 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9338 }
9339 // Efficient only if the constant and its negation fit into `ADDI`
9340 // Prefer Add/Sub over Xor since can be compressed for small immediates
9341 if (isInt<12>(RawConstVal)) {
9342 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9343 SDValue CMOV =
9344 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9345 DL, VT, SubOp, CondV);
9346 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9347 }
9348 }
9349
9350 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9351 // Unless we have the short forward branch optimization.
9352 if (!Subtarget.hasConditionalMoveFusion())
9353 return DAG.getNode(
9354 ISD::OR, DL, VT,
9355 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9356 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9358 }
9359
9360 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
9361 return V;
9362
9363 if (Op.hasOneUse()) {
9364 unsigned UseOpc = Op->user_begin()->getOpcode();
9365 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9366 SDNode *BinOp = *Op->user_begin();
9367 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9368 DAG, Subtarget)) {
9369 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9370 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9371 // may return a constant node and cause crash in lowerSELECT.
9372 if (NewSel.getOpcode() == ISD::SELECT)
9373 return lowerSELECT(NewSel, DAG);
9374 return NewSel;
9375 }
9376 }
9377 }
9378
9379 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9380 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9381 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9382 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9383 if (FPTV && FPFV) {
9384 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9385 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9386 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9387 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9388 DAG.getConstant(1, DL, XLenVT));
9389 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9390 }
9391 }
9392
9393 // If the condition is not an integer SETCC which operates on XLenVT, we need
9394 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9395 // (select condv, truev, falsev)
9396 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9397 if (CondV.getOpcode() != ISD::SETCC ||
9398 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9399 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9400 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9401
9402 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9403
9404 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9405 }
9406
9407 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9408 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9409 // advantage of the integer compare+branch instructions. i.e.:
9410 // (select (setcc lhs, rhs, cc), truev, falsev)
9411 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9412 SDValue LHS = CondV.getOperand(0);
9413 SDValue RHS = CondV.getOperand(1);
9414 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9415
9416 // Special case for a select of 2 constants that have a difference of 1.
9417 // Normally this is done by DAGCombine, but if the select is introduced by
9418 // type legalization or op legalization, we miss it. Restricting to SETLT
9419 // case for now because that is what signed saturating add/sub need.
9420 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9421 // but we would probably want to swap the true/false values if the condition
9422 // is SETGE/SETLE to avoid an XORI.
9423 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9424 CCVal == ISD::SETLT) {
9425 const APInt &TrueVal = TrueV->getAsAPIntVal();
9426 const APInt &FalseVal = FalseV->getAsAPIntVal();
9427 if (TrueVal - 1 == FalseVal)
9428 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9429 if (TrueVal + 1 == FalseVal)
9430 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9431 }
9432
9433 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9434 // 1 < x ? x : 1 -> 0 < x ? x : 1
9435 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9436 RHS == TrueV && LHS == FalseV) {
9437 LHS = DAG.getConstant(0, DL, VT);
9438 // 0 <u x is the same as x != 0.
9439 if (CCVal == ISD::SETULT) {
9440 std::swap(LHS, RHS);
9441 CCVal = ISD::SETNE;
9442 }
9443 }
9444
9445 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9446 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9447 RHS == FalseV) {
9448 RHS = DAG.getConstant(0, DL, VT);
9449 }
9450
9451 SDValue TargetCC = DAG.getCondCode(CCVal);
9452
9453 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9454 // (select (setcc lhs, rhs, CC), constant, falsev)
9455 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9456 std::swap(TrueV, FalseV);
9457 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9458 }
9459
9460 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9461 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9462}
9463
9464SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9465 SDValue CondV = Op.getOperand(1);
9466 SDLoc DL(Op);
9467 MVT XLenVT = Subtarget.getXLenVT();
9468
9469 if (CondV.getOpcode() == ISD::SETCC &&
9470 CondV.getOperand(0).getValueType() == XLenVT) {
9471 SDValue LHS = CondV.getOperand(0);
9472 SDValue RHS = CondV.getOperand(1);
9473 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9474
9475 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9476
9477 SDValue TargetCC = DAG.getCondCode(CCVal);
9478 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9479 LHS, RHS, TargetCC, Op.getOperand(2));
9480 }
9481
9482 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9483 CondV, DAG.getConstant(0, DL, XLenVT),
9484 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9485}
9486
9487SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9490
9491 SDLoc DL(Op);
9492 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9494
9495 // vastart just stores the address of the VarArgsFrameIndex slot into the
9496 // memory location argument.
9497 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9498 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9499 MachinePointerInfo(SV));
9500}
9501
9502SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9503 SelectionDAG &DAG) const {
9504 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9506 MachineFrameInfo &MFI = MF.getFrameInfo();
9507 MFI.setFrameAddressIsTaken(true);
9508 Register FrameReg = RI.getFrameRegister(MF);
9509 int XLenInBytes = Subtarget.getXLen() / 8;
9510
9511 EVT VT = Op.getValueType();
9512 SDLoc DL(Op);
9513 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9514 unsigned Depth = Op.getConstantOperandVal(0);
9515 while (Depth--) {
9516 int Offset = -(XLenInBytes * 2);
9517 SDValue Ptr = DAG.getNode(
9518 ISD::ADD, DL, VT, FrameAddr,
9520 FrameAddr =
9521 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9522 }
9523 return FrameAddr;
9524}
9525
9526SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9527 SelectionDAG &DAG) const {
9528 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9530 MachineFrameInfo &MFI = MF.getFrameInfo();
9531 MFI.setReturnAddressIsTaken(true);
9532 MVT XLenVT = Subtarget.getXLenVT();
9533 int XLenInBytes = Subtarget.getXLen() / 8;
9534
9535 EVT VT = Op.getValueType();
9536 SDLoc DL(Op);
9537 unsigned Depth = Op.getConstantOperandVal(0);
9538 if (Depth) {
9539 int Off = -XLenInBytes;
9540 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9541 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9542 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9543 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9545 }
9546
9547 // Return the value of the return address register, marking it an implicit
9548 // live-in.
9549 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9550 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9551}
9552
9553SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9554 SelectionDAG &DAG) const {
9555 SDLoc DL(Op);
9556 SDValue Lo = Op.getOperand(0);
9557 SDValue Hi = Op.getOperand(1);
9558 SDValue Shamt = Op.getOperand(2);
9559 EVT VT = Lo.getValueType();
9560
9561 // if Shamt-XLEN < 0: // Shamt < XLEN
9562 // Lo = Lo << Shamt
9563 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9564 // else:
9565 // Lo = 0
9566 // Hi = Lo << (Shamt-XLEN)
9567
9568 SDValue Zero = DAG.getConstant(0, DL, VT);
9569 SDValue One = DAG.getConstant(1, DL, VT);
9570 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9571 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9572 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9573 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9574
9575 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9576 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9577 SDValue ShiftRightLo =
9578 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9579 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9580 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9581 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9582
9583 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9584
9585 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9586 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9587
9588 SDValue Parts[2] = {Lo, Hi};
9589 return DAG.getMergeValues(Parts, DL);
9590}
9591
9592SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9593 bool IsSRA) const {
9594 SDLoc DL(Op);
9595 SDValue Lo = Op.getOperand(0);
9596 SDValue Hi = Op.getOperand(1);
9597 SDValue Shamt = Op.getOperand(2);
9598 EVT VT = Lo.getValueType();
9599
9600 // SRA expansion:
9601 // if Shamt-XLEN < 0: // Shamt < XLEN
9602 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9603 // Hi = Hi >>s Shamt
9604 // else:
9605 // Lo = Hi >>s (Shamt-XLEN);
9606 // Hi = Hi >>s (XLEN-1)
9607 //
9608 // SRL expansion:
9609 // if Shamt-XLEN < 0: // Shamt < XLEN
9610 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9611 // Hi = Hi >>u Shamt
9612 // else:
9613 // Lo = Hi >>u (Shamt-XLEN);
9614 // Hi = 0;
9615
9616 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9617
9618 SDValue Zero = DAG.getConstant(0, DL, VT);
9619 SDValue One = DAG.getConstant(1, DL, VT);
9620 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9621 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9622 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9623 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9624
9625 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9626 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9627 SDValue ShiftLeftHi =
9628 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9629 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9630 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9631 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9632 SDValue HiFalse =
9633 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9634
9635 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9636
9637 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9638 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9639
9640 SDValue Parts[2] = {Lo, Hi};
9641 return DAG.getMergeValues(Parts, DL);
9642}
9643
9644// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9645// legal equivalently-sized i8 type, so we can use that as a go-between.
9646SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9647 SelectionDAG &DAG) const {
9648 SDLoc DL(Op);
9649 MVT VT = Op.getSimpleValueType();
9650 SDValue SplatVal = Op.getOperand(0);
9651 // All-zeros or all-ones splats are handled specially.
9652 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9653 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9654 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9655 }
9656 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9657 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9658 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9659 }
9660 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9661 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9662 DAG.getConstant(1, DL, SplatVal.getValueType()));
9663 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9664 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9665 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9666}
9667
9668// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9669// illegal (currently only vXi64 RV32).
9670// FIXME: We could also catch non-constant sign-extended i32 values and lower
9671// them to VMV_V_X_VL.
9672SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9673 SelectionDAG &DAG) const {
9674 SDLoc DL(Op);
9675 MVT VecVT = Op.getSimpleValueType();
9676 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9677 "Unexpected SPLAT_VECTOR_PARTS lowering");
9678
9679 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9680 SDValue Lo = Op.getOperand(0);
9681 SDValue Hi = Op.getOperand(1);
9682
9683 MVT ContainerVT = VecVT;
9684 if (VecVT.isFixedLengthVector())
9685 ContainerVT = getContainerForFixedLengthVector(VecVT);
9686
9687 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9688
9689 SDValue Res =
9690 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9691
9692 if (VecVT.isFixedLengthVector())
9693 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9694
9695 return Res;
9696}
9697
9698// Custom-lower extensions from mask vectors by using a vselect either with 1
9699// for zero/any-extension or -1 for sign-extension:
9700// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9701// Note that any-extension is lowered identically to zero-extension.
9702SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9703 int64_t ExtTrueVal) const {
9704 SDLoc DL(Op);
9705 MVT VecVT = Op.getSimpleValueType();
9706 SDValue Src = Op.getOperand(0);
9707 // Only custom-lower extensions from mask types
9708 assert(Src.getValueType().isVector() &&
9709 Src.getValueType().getVectorElementType() == MVT::i1);
9710
9711 if (VecVT.isScalableVector()) {
9712 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9713 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9714 if (Src.getOpcode() == ISD::XOR &&
9715 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9716 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9717 SplatTrueVal);
9718 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9719 }
9720
9721 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9722 MVT I1ContainerVT =
9723 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9724
9725 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9726
9727 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9728
9729 MVT XLenVT = Subtarget.getXLenVT();
9730 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9731 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9732
9733 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9734 SDValue Xor = Src.getOperand(0);
9735 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9736 SDValue ScalableOnes = Xor.getOperand(1);
9737 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9738 ScalableOnes.getOperand(0).isUndef() &&
9740 ScalableOnes.getOperand(1).getNode())) {
9741 CC = Xor.getOperand(0);
9742 std::swap(SplatZero, SplatTrueVal);
9743 }
9744 }
9745 }
9746
9747 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9748 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9749 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9750 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9751 SDValue Select =
9752 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9753 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9754
9755 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9756}
9757
9758// Custom-lower truncations from vectors to mask vectors by using a mask and a
9759// setcc operation:
9760// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9761SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9762 SelectionDAG &DAG) const {
9763 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9764 SDLoc DL(Op);
9765 EVT MaskVT = Op.getValueType();
9766 // Only expect to custom-lower truncations to mask types
9767 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9768 "Unexpected type for vector mask lowering");
9769 SDValue Src = Op.getOperand(0);
9770 MVT VecVT = Src.getSimpleValueType();
9771 SDValue Mask, VL;
9772 if (IsVPTrunc) {
9773 Mask = Op.getOperand(1);
9774 VL = Op.getOperand(2);
9775 }
9776 // If this is a fixed vector, we need to convert it to a scalable vector.
9777 MVT ContainerVT = VecVT;
9778
9779 if (VecVT.isFixedLengthVector()) {
9780 ContainerVT = getContainerForFixedLengthVector(VecVT);
9781 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9782 if (IsVPTrunc) {
9783 MVT MaskContainerVT =
9784 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9785 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9786 }
9787 }
9788
9789 if (!IsVPTrunc) {
9790 std::tie(Mask, VL) =
9791 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9792 }
9793
9794 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9795 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9796
9797 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9798 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9799 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9800 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9801
9802 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9803 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9804 DAG.getUNDEF(ContainerVT), Mask, VL);
9805 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9806 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9807 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9808 if (MaskVT.isFixedLengthVector())
9809 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9810 return Trunc;
9811}
9812
9813SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9814 SelectionDAG &DAG) const {
9815 unsigned Opc = Op.getOpcode();
9816 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9817 SDLoc DL(Op);
9818
9819 MVT VT = Op.getSimpleValueType();
9820 // Only custom-lower vector truncates
9821 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9822
9823 // Truncates to mask types are handled differently
9824 if (VT.getVectorElementType() == MVT::i1)
9825 return lowerVectorMaskTruncLike(Op, DAG);
9826
9827 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9828 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9829 // truncate by one power of two at a time.
9830 MVT DstEltVT = VT.getVectorElementType();
9831
9832 SDValue Src = Op.getOperand(0);
9833 MVT SrcVT = Src.getSimpleValueType();
9834 MVT SrcEltVT = SrcVT.getVectorElementType();
9835
9836 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9837 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9838 "Unexpected vector truncate lowering");
9839
9840 MVT ContainerVT = SrcVT;
9841 SDValue Mask, VL;
9842 if (IsVPTrunc) {
9843 Mask = Op.getOperand(1);
9844 VL = Op.getOperand(2);
9845 }
9846 if (SrcVT.isFixedLengthVector()) {
9847 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9848 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9849 if (IsVPTrunc) {
9850 MVT MaskVT = getMaskTypeFor(ContainerVT);
9851 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9852 }
9853 }
9854
9855 SDValue Result = Src;
9856 if (!IsVPTrunc) {
9857 std::tie(Mask, VL) =
9858 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9859 }
9860
9861 unsigned NewOpc;
9863 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9864 else if (Opc == ISD::TRUNCATE_USAT_U)
9865 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9866 else
9867 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9868
9869 do {
9870 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9871 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9872 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9873 } while (SrcEltVT != DstEltVT);
9874
9875 if (SrcVT.isFixedLengthVector())
9876 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9877
9878 return Result;
9879}
9880
9881SDValue
9882RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9883 SelectionDAG &DAG) const {
9884 SDLoc DL(Op);
9885 SDValue Chain = Op.getOperand(0);
9886 SDValue Src = Op.getOperand(1);
9887 MVT VT = Op.getSimpleValueType();
9888 MVT SrcVT = Src.getSimpleValueType();
9889 MVT ContainerVT = VT;
9890 if (VT.isFixedLengthVector()) {
9891 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9892 ContainerVT =
9893 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9894 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9895 }
9896
9897 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9898
9899 // RVV can only widen/truncate fp to types double/half the size as the source.
9900 if ((VT.getVectorElementType() == MVT::f64 &&
9901 (SrcVT.getVectorElementType() == MVT::f16 ||
9902 SrcVT.getVectorElementType() == MVT::bf16)) ||
9903 ((VT.getVectorElementType() == MVT::f16 ||
9904 VT.getVectorElementType() == MVT::bf16) &&
9905 SrcVT.getVectorElementType() == MVT::f64)) {
9906 // For double rounding, the intermediate rounding should be round-to-odd.
9907 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9908 ? RISCVISD::STRICT_FP_EXTEND_VL
9909 : RISCVISD::STRICT_VFNCVT_ROD_VL;
9910 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9911 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9912 Chain, Src, Mask, VL);
9913 Chain = Src.getValue(1);
9914 }
9915
9916 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9917 ? RISCVISD::STRICT_FP_EXTEND_VL
9918 : RISCVISD::STRICT_FP_ROUND_VL;
9919 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
9920 Chain, Src, Mask, VL);
9921 if (VT.isFixedLengthVector()) {
9922 // StrictFP operations have two result values. Their lowered result should
9923 // have same result count.
9924 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
9925 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
9926 }
9927 return Res;
9928}
9929
9930SDValue
9931RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
9932 SelectionDAG &DAG) const {
9933 bool IsVP =
9934 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
9935 bool IsExtend =
9936 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
9937 // RVV can only do truncate fp to types half the size as the source. We
9938 // custom-lower f64->f16 rounds via RVV's round-to-odd float
9939 // conversion instruction.
9940 SDLoc DL(Op);
9941 MVT VT = Op.getSimpleValueType();
9942
9943 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9944
9945 SDValue Src = Op.getOperand(0);
9946 MVT SrcVT = Src.getSimpleValueType();
9947
9948 bool IsDirectExtend =
9949 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9950 (SrcVT.getVectorElementType() != MVT::f16 &&
9951 SrcVT.getVectorElementType() != MVT::bf16));
9952 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9953 VT.getVectorElementType() != MVT::bf16) ||
9954 SrcVT.getVectorElementType() != MVT::f64);
9955
9956 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9957
9958 // We have regular SD node patterns for direct non-VL extends.
9959 if (VT.isScalableVector() && IsDirectConv && !IsVP)
9960 return Op;
9961
9962 // Prepare any fixed-length vector operands.
9963 MVT ContainerVT = VT;
9964 SDValue Mask, VL;
9965 if (IsVP) {
9966 Mask = Op.getOperand(1);
9967 VL = Op.getOperand(2);
9968 }
9969 if (VT.isFixedLengthVector()) {
9970 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9971 ContainerVT =
9972 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9973 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9974 if (IsVP) {
9975 MVT MaskVT = getMaskTypeFor(ContainerVT);
9976 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9977 }
9978 }
9979
9980 if (!IsVP)
9981 std::tie(Mask, VL) =
9982 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9983
9984 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9985
9986 if (IsDirectConv) {
9987 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9988 if (VT.isFixedLengthVector())
9989 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9990 return Src;
9991 }
9992
9993 unsigned InterConvOpc =
9994 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
9995
9996 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9997 SDValue IntermediateConv =
9998 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9999 SDValue Result =
10000 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10001 if (VT.isFixedLengthVector())
10002 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10003 return Result;
10004}
10005
10006// Given a scalable vector type and an index into it, returns the type for the
10007// smallest subvector that the index fits in. This can be used to reduce LMUL
10008// for operations like vslidedown.
10009//
10010// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10011static std::optional<MVT>
10012getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10013 const RISCVSubtarget &Subtarget) {
10014 assert(VecVT.isScalableVector());
10015 const unsigned EltSize = VecVT.getScalarSizeInBits();
10016 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10017 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10018 MVT SmallerVT;
10019 if (MaxIdx < MinVLMAX)
10020 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10021 else if (MaxIdx < MinVLMAX * 2)
10022 SmallerVT =
10024 else if (MaxIdx < MinVLMAX * 4)
10025 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10028 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10029 return std::nullopt;
10030 return SmallerVT;
10031}
10032
10034 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10035 if (!IdxC || isNullConstant(Idx))
10036 return false;
10037 return isUInt<5>(IdxC->getZExtValue());
10038}
10039
10040// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10041// first position of a vector, and that vector is slid up to the insert index.
10042// By limiting the active vector length to index+1 and merging with the
10043// original vector (with an undisturbed tail policy for elements >= VL), we
10044// achieve the desired result of leaving all elements untouched except the one
10045// at VL-1, which is replaced with the desired value.
10046SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10047 SelectionDAG &DAG) const {
10048 SDLoc DL(Op);
10049 MVT VecVT = Op.getSimpleValueType();
10050 MVT XLenVT = Subtarget.getXLenVT();
10051 SDValue Vec = Op.getOperand(0);
10052 SDValue Val = Op.getOperand(1);
10053 MVT ValVT = Val.getSimpleValueType();
10054 SDValue Idx = Op.getOperand(2);
10055
10056 if (VecVT.getVectorElementType() == MVT::i1) {
10057 // FIXME: For now we just promote to an i8 vector and insert into that,
10058 // but this is probably not optimal.
10059 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10060 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10061 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10062 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10063 }
10064
10065 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10066 ValVT == MVT::bf16) {
10067 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10068 MVT IntVT = VecVT.changeTypeToInteger();
10069 SDValue IntInsert = DAG.getNode(
10070 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10071 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10072 return DAG.getBitcast(VecVT, IntInsert);
10073 }
10074
10075 MVT ContainerVT = VecVT;
10076 // If the operand is a fixed-length vector, convert to a scalable one.
10077 if (VecVT.isFixedLengthVector()) {
10078 ContainerVT = getContainerForFixedLengthVector(VecVT);
10079 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10080 }
10081
10082 // If we know the index we're going to insert at, we can shrink Vec so that
10083 // we're performing the scalar inserts and slideup on a smaller LMUL.
10084 SDValue OrigVec = Vec;
10085 std::optional<unsigned> AlignedIdx;
10086 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10087 const unsigned OrigIdx = IdxC->getZExtValue();
10088 // Do we know an upper bound on LMUL?
10089 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10090 DL, DAG, Subtarget)) {
10091 ContainerVT = *ShrunkVT;
10092 AlignedIdx = 0;
10093 }
10094
10095 // If we're compiling for an exact VLEN value, we can always perform
10096 // the insert in m1 as we can determine the register corresponding to
10097 // the index in the register group.
10098 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10099 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10100 EVT ElemVT = VecVT.getVectorElementType();
10101 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10102 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10103 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10104 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10105 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10106 ContainerVT = M1VT;
10107 }
10108
10109 if (AlignedIdx)
10110 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10111 }
10112
10113 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10114 // Even i64-element vectors on RV32 can be lowered without scalar
10115 // legalization if the most-significant 32 bits of the value are not affected
10116 // by the sign-extension of the lower 32 bits.
10117 // TODO: We could also catch sign extensions of a 32-bit value.
10118 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10119 const auto *CVal = cast<ConstantSDNode>(Val);
10120 if (isInt<32>(CVal->getSExtValue())) {
10121 IsLegalInsert = true;
10122 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10123 }
10124 }
10125
10126 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10127
10128 SDValue ValInVec;
10129
10130 if (IsLegalInsert) {
10131 unsigned Opc =
10132 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10133 if (isNullConstant(Idx)) {
10134 if (!VecVT.isFloatingPoint())
10135 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10136 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10137
10138 if (AlignedIdx)
10139 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10140 if (!VecVT.isFixedLengthVector())
10141 return Vec;
10142 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10143 }
10144
10145 // Use ri.vinsert.v.x if available.
10146 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10148 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10149 SDValue PolicyOp =
10151 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10152 VL, PolicyOp);
10153 if (AlignedIdx)
10154 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10155 if (!VecVT.isFixedLengthVector())
10156 return Vec;
10157 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10158 }
10159
10160 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10161 } else {
10162 // On RV32, i64-element vectors must be specially handled to place the
10163 // value at element 0, by using two vslide1down instructions in sequence on
10164 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10165 // this.
10166 SDValue ValLo, ValHi;
10167 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10168 MVT I32ContainerVT =
10169 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10170 SDValue I32Mask =
10171 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10172 // Limit the active VL to two.
10173 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10174 // If the Idx is 0 we can insert directly into the vector.
10175 if (isNullConstant(Idx)) {
10176 // First slide in the lo value, then the hi in above it. We use slide1down
10177 // to avoid the register group overlap constraint of vslide1up.
10178 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10179 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10180 // If the source vector is undef don't pass along the tail elements from
10181 // the previous slide1down.
10182 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10183 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10184 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10185 // Bitcast back to the right container type.
10186 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10187
10188 if (AlignedIdx)
10189 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10190 if (!VecVT.isFixedLengthVector())
10191 return ValInVec;
10192 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10193 }
10194
10195 // First slide in the lo value, then the hi in above it. We use slide1down
10196 // to avoid the register group overlap constraint of vslide1up.
10197 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10198 DAG.getUNDEF(I32ContainerVT),
10199 DAG.getUNDEF(I32ContainerVT), ValLo,
10200 I32Mask, InsertI64VL);
10201 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10202 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10203 I32Mask, InsertI64VL);
10204 // Bitcast back to the right container type.
10205 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10206 }
10207
10208 // Now that the value is in a vector, slide it into position.
10209 SDValue InsertVL =
10210 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10211
10212 // Use tail agnostic policy if Idx is the last index of Vec.
10214 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10215 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10217 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10218 Idx, Mask, InsertVL, Policy);
10219
10220 if (AlignedIdx)
10221 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10222 if (!VecVT.isFixedLengthVector())
10223 return Slideup;
10224 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10225}
10226
10227// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10228// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10229// types this is done using VMV_X_S to allow us to glean information about the
10230// sign bits of the result.
10231SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10232 SelectionDAG &DAG) const {
10233 SDLoc DL(Op);
10234 SDValue Idx = Op.getOperand(1);
10235 SDValue Vec = Op.getOperand(0);
10236 EVT EltVT = Op.getValueType();
10237 MVT VecVT = Vec.getSimpleValueType();
10238 MVT XLenVT = Subtarget.getXLenVT();
10239
10240 if (VecVT.getVectorElementType() == MVT::i1) {
10241 // Use vfirst.m to extract the first bit.
10242 if (isNullConstant(Idx)) {
10243 MVT ContainerVT = VecVT;
10244 if (VecVT.isFixedLengthVector()) {
10245 ContainerVT = getContainerForFixedLengthVector(VecVT);
10246 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10247 }
10248 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10249 SDValue Vfirst =
10250 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10251 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10252 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10253 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10254 }
10255 if (VecVT.isFixedLengthVector()) {
10256 unsigned NumElts = VecVT.getVectorNumElements();
10257 if (NumElts >= 8) {
10258 MVT WideEltVT;
10259 unsigned WidenVecLen;
10260 SDValue ExtractElementIdx;
10261 SDValue ExtractBitIdx;
10262 unsigned MaxEEW = Subtarget.getELen();
10263 MVT LargestEltVT = MVT::getIntegerVT(
10264 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10265 if (NumElts <= LargestEltVT.getSizeInBits()) {
10266 assert(isPowerOf2_32(NumElts) &&
10267 "the number of elements should be power of 2");
10268 WideEltVT = MVT::getIntegerVT(NumElts);
10269 WidenVecLen = 1;
10270 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10271 ExtractBitIdx = Idx;
10272 } else {
10273 WideEltVT = LargestEltVT;
10274 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10275 // extract element index = index / element width
10276 ExtractElementIdx = DAG.getNode(
10277 ISD::SRL, DL, XLenVT, Idx,
10278 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10279 // mask bit index = index % element width
10280 ExtractBitIdx = DAG.getNode(
10281 ISD::AND, DL, XLenVT, Idx,
10282 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10283 }
10284 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10285 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10286 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10287 Vec, ExtractElementIdx);
10288 // Extract the bit from GPR.
10289 SDValue ShiftRight =
10290 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10291 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10292 DAG.getConstant(1, DL, XLenVT));
10293 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10294 }
10295 }
10296 // Otherwise, promote to an i8 vector and extract from that.
10297 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10298 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10299 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10300 }
10301
10302 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10303 EltVT == MVT::bf16) {
10304 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10305 MVT IntVT = VecVT.changeTypeToInteger();
10306 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10307 SDValue IntExtract =
10308 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10309 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10310 }
10311
10312 // If this is a fixed vector, we need to convert it to a scalable vector.
10313 MVT ContainerVT = VecVT;
10314 if (VecVT.isFixedLengthVector()) {
10315 ContainerVT = getContainerForFixedLengthVector(VecVT);
10316 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10317 }
10318
10319 // If we're compiling for an exact VLEN value and we have a known
10320 // constant index, we can always perform the extract in m1 (or
10321 // smaller) as we can determine the register corresponding to
10322 // the index in the register group.
10323 const auto VLen = Subtarget.getRealVLen();
10324 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10325 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10326 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10327 unsigned OrigIdx = IdxC->getZExtValue();
10328 EVT ElemVT = VecVT.getVectorElementType();
10329 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10330 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10331 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10332 unsigned ExtractIdx =
10333 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10334 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10335 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10336 ContainerVT = M1VT;
10337 }
10338
10339 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10340 // contains our index.
10341 std::optional<uint64_t> MaxIdx;
10342 if (VecVT.isFixedLengthVector())
10343 MaxIdx = VecVT.getVectorNumElements() - 1;
10344 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10345 MaxIdx = IdxC->getZExtValue();
10346 if (MaxIdx) {
10347 if (auto SmallerVT =
10348 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10349 ContainerVT = *SmallerVT;
10350 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10351 }
10352 }
10353
10354 // Use ri.vextract.x.v if available.
10355 // TODO: Avoid index 0 and just use the vmv.x.s
10356 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10358 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10359 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10360 }
10361
10362 // If after narrowing, the required slide is still greater than LMUL2,
10363 // fallback to generic expansion and go through the stack. This is done
10364 // for a subtle reason: extracting *all* elements out of a vector is
10365 // widely expected to be linear in vector size, but because vslidedown
10366 // is linear in LMUL, performing N extracts using vslidedown becomes
10367 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10368 // seems to have the same problem (the store is linear in LMUL), but the
10369 // generic expansion *memoizes* the store, and thus for many extracts of
10370 // the same vector we end up with one store and a bunch of loads.
10371 // TODO: We don't have the same code for insert_vector_elt because we
10372 // have BUILD_VECTOR and handle the degenerate case there. Should we
10373 // consider adding an inverse BUILD_VECTOR node?
10374 MVT LMUL2VT =
10376 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10377 return SDValue();
10378
10379 // If the index is 0, the vector is already in the right position.
10380 if (!isNullConstant(Idx)) {
10381 // Use a VL of 1 to avoid processing more elements than we need.
10382 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10383 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10384 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10385 }
10386
10387 if (!EltVT.isInteger()) {
10388 // Floating-point extracts are handled in TableGen.
10389 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10390 }
10391
10392 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10393 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10394}
10395
10396// Some RVV intrinsics may claim that they want an integer operand to be
10397// promoted or expanded.
10399 const RISCVSubtarget &Subtarget) {
10400 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10401 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10402 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10403 "Unexpected opcode");
10404
10405 if (!Subtarget.hasVInstructions())
10406 return SDValue();
10407
10408 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10409 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10410 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10411
10412 SDLoc DL(Op);
10413
10415 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10416 if (!II || !II->hasScalarOperand())
10417 return SDValue();
10418
10419 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10420 assert(SplatOp < Op.getNumOperands());
10421
10423 SDValue &ScalarOp = Operands[SplatOp];
10424 MVT OpVT = ScalarOp.getSimpleValueType();
10425 MVT XLenVT = Subtarget.getXLenVT();
10426
10427 // If this isn't a scalar, or its type is XLenVT we're done.
10428 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10429 return SDValue();
10430
10431 // Simplest case is that the operand needs to be promoted to XLenVT.
10432 if (OpVT.bitsLT(XLenVT)) {
10433 // If the operand is a constant, sign extend to increase our chances
10434 // of being able to use a .vi instruction. ANY_EXTEND would become a
10435 // a zero extend and the simm5 check in isel would fail.
10436 // FIXME: Should we ignore the upper bits in isel instead?
10437 unsigned ExtOpc =
10438 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10439 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10440 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10441 }
10442
10443 // Use the previous operand to get the vXi64 VT. The result might be a mask
10444 // VT for compares. Using the previous operand assumes that the previous
10445 // operand will never have a smaller element size than a scalar operand and
10446 // that a widening operation never uses SEW=64.
10447 // NOTE: If this fails the below assert, we can probably just find the
10448 // element count from any operand or result and use it to construct the VT.
10449 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10450 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10451
10452 // The more complex case is when the scalar is larger than XLenVT.
10453 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10454 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10455
10456 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10457 // instruction to sign-extend since SEW>XLEN.
10458 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10459 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10460 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10461 }
10462
10463 switch (IntNo) {
10464 case Intrinsic::riscv_vslide1up:
10465 case Intrinsic::riscv_vslide1down:
10466 case Intrinsic::riscv_vslide1up_mask:
10467 case Intrinsic::riscv_vslide1down_mask: {
10468 // We need to special case these when the scalar is larger than XLen.
10469 unsigned NumOps = Op.getNumOperands();
10470 bool IsMasked = NumOps == 7;
10471
10472 // Convert the vector source to the equivalent nxvXi32 vector.
10473 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10474 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10475 SDValue ScalarLo, ScalarHi;
10476 std::tie(ScalarLo, ScalarHi) =
10477 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10478
10479 // Double the VL since we halved SEW.
10480 SDValue AVL = getVLOperand(Op);
10481 SDValue I32VL;
10482
10483 // Optimize for constant AVL
10484 if (isa<ConstantSDNode>(AVL)) {
10485 const auto [MinVLMAX, MaxVLMAX] =
10487
10488 uint64_t AVLInt = AVL->getAsZExtVal();
10489 if (AVLInt <= MinVLMAX) {
10490 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10491 } else if (AVLInt >= 2 * MaxVLMAX) {
10492 // Just set vl to VLMAX in this situation
10493 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10494 } else {
10495 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10496 // is related to the hardware implementation.
10497 // So let the following code handle
10498 }
10499 }
10500 if (!I32VL) {
10502 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10503 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10504 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10505 SDValue SETVL =
10506 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10507 // Using vsetvli instruction to get actually used length which related to
10508 // the hardware implementation
10509 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10510 SEW, LMUL);
10511 I32VL =
10512 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10513 }
10514
10515 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10516
10517 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10518 // instructions.
10519 SDValue Passthru;
10520 if (IsMasked)
10521 Passthru = DAG.getUNDEF(I32VT);
10522 else
10523 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10524
10525 if (IntNo == Intrinsic::riscv_vslide1up ||
10526 IntNo == Intrinsic::riscv_vslide1up_mask) {
10527 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10528 ScalarHi, I32Mask, I32VL);
10529 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10530 ScalarLo, I32Mask, I32VL);
10531 } else {
10532 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10533 ScalarLo, I32Mask, I32VL);
10534 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10535 ScalarHi, I32Mask, I32VL);
10536 }
10537
10538 // Convert back to nxvXi64.
10539 Vec = DAG.getBitcast(VT, Vec);
10540
10541 if (!IsMasked)
10542 return Vec;
10543 // Apply mask after the operation.
10544 SDValue Mask = Operands[NumOps - 3];
10545 SDValue MaskedOff = Operands[1];
10546 // Assume Policy operand is the last operand.
10547 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10548 // We don't need to select maskedoff if it's undef.
10549 if (MaskedOff.isUndef())
10550 return Vec;
10551 // TAMU
10552 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10553 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10554 DAG.getUNDEF(VT), AVL);
10555 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10556 // It's fine because vmerge does not care mask policy.
10557 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10558 MaskedOff, AVL);
10559 }
10560 }
10561
10562 // We need to convert the scalar to a splat vector.
10563 SDValue VL = getVLOperand(Op);
10564 assert(VL.getValueType() == XLenVT);
10565 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10566 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10567}
10568
10569// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10570// scalable vector llvm.get.vector.length for now.
10571//
10572// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10573// (vscale * VF). The vscale and VF are independent of element width. We use
10574// SEW=8 for the vsetvli because it is the only element width that supports all
10575// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10576// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10577// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10578// SEW and LMUL are better for the surrounding vector instructions.
10580 const RISCVSubtarget &Subtarget) {
10581 MVT XLenVT = Subtarget.getXLenVT();
10582
10583 // The smallest LMUL is only valid for the smallest element width.
10584 const unsigned ElementWidth = 8;
10585
10586 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10587 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10588 // We don't support VF==1 with ELEN==32.
10589 [[maybe_unused]] unsigned MinVF =
10590 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10591
10592 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10593 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10594 "Unexpected VF");
10595
10596 bool Fractional = VF < LMul1VF;
10597 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10598 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10599 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10600
10601 SDLoc DL(N);
10602
10603 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10604 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10605
10606 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10607
10608 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10609 SDValue Res =
10610 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10611 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10612}
10613
10615 const RISCVSubtarget &Subtarget) {
10616 SDValue Op0 = N->getOperand(1);
10617 MVT OpVT = Op0.getSimpleValueType();
10618 MVT ContainerVT = OpVT;
10619 if (OpVT.isFixedLengthVector()) {
10620 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10621 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10622 }
10623 MVT XLenVT = Subtarget.getXLenVT();
10624 SDLoc DL(N);
10625 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10626 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10627 if (isOneConstant(N->getOperand(2)))
10628 return Res;
10629
10630 // Convert -1 to VL.
10631 SDValue Setcc =
10632 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10633 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10634 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10635}
10636
10637static inline void promoteVCIXScalar(const SDValue &Op,
10639 SelectionDAG &DAG) {
10640 const RISCVSubtarget &Subtarget =
10642
10643 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10644 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10645 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10646 SDLoc DL(Op);
10647
10649 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10650 if (!II || !II->hasScalarOperand())
10651 return;
10652
10653 unsigned SplatOp = II->ScalarOperand + 1;
10654 assert(SplatOp < Op.getNumOperands());
10655
10656 SDValue &ScalarOp = Operands[SplatOp];
10657 MVT OpVT = ScalarOp.getSimpleValueType();
10658 MVT XLenVT = Subtarget.getXLenVT();
10659
10660 // The code below is partially copied from lowerVectorIntrinsicScalars.
10661 // If this isn't a scalar, or its type is XLenVT we're done.
10662 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10663 return;
10664
10665 // Manually emit promote operation for scalar operation.
10666 if (OpVT.bitsLT(XLenVT)) {
10667 unsigned ExtOpc =
10668 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10669 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10670 }
10671}
10672
10673static void processVCIXOperands(SDValue &OrigOp,
10675 SelectionDAG &DAG) {
10676 promoteVCIXScalar(OrigOp, Operands, DAG);
10677 const RISCVSubtarget &Subtarget =
10679 for (SDValue &V : Operands) {
10680 EVT ValType = V.getValueType();
10681 if (ValType.isVector() && ValType.isFloatingPoint()) {
10682 MVT InterimIVT =
10683 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10684 ValType.getVectorElementCount());
10685 V = DAG.getBitcast(InterimIVT, V);
10686 }
10687 if (ValType.isFixedLengthVector()) {
10688 MVT OpContainerVT = getContainerForFixedLengthVector(
10689 DAG, V.getSimpleValueType(), Subtarget);
10690 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10691 }
10692 }
10693}
10694
10695// LMUL * VLEN should be greater than or equal to EGS * SEW
10696static inline bool isValidEGW(int EGS, EVT VT,
10697 const RISCVSubtarget &Subtarget) {
10698 return (Subtarget.getRealMinVLen() *
10700 EGS * VT.getScalarSizeInBits();
10701}
10702
10703SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10704 SelectionDAG &DAG) const {
10705 unsigned IntNo = Op.getConstantOperandVal(0);
10706 SDLoc DL(Op);
10707 MVT XLenVT = Subtarget.getXLenVT();
10708
10709 switch (IntNo) {
10710 default:
10711 break; // Don't custom lower most intrinsics.
10712 case Intrinsic::riscv_tuple_insert: {
10713 SDValue Vec = Op.getOperand(1);
10714 SDValue SubVec = Op.getOperand(2);
10715 SDValue Index = Op.getOperand(3);
10716
10717 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10718 SubVec, Index);
10719 }
10720 case Intrinsic::riscv_tuple_extract: {
10721 SDValue Vec = Op.getOperand(1);
10722 SDValue Index = Op.getOperand(2);
10723
10724 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10725 Index);
10726 }
10727 case Intrinsic::thread_pointer: {
10728 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10729 return DAG.getRegister(RISCV::X4, PtrVT);
10730 }
10731 case Intrinsic::riscv_orc_b:
10732 case Intrinsic::riscv_brev8:
10733 case Intrinsic::riscv_sha256sig0:
10734 case Intrinsic::riscv_sha256sig1:
10735 case Intrinsic::riscv_sha256sum0:
10736 case Intrinsic::riscv_sha256sum1:
10737 case Intrinsic::riscv_sm3p0:
10738 case Intrinsic::riscv_sm3p1: {
10739 unsigned Opc;
10740 switch (IntNo) {
10741 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10742 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10743 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10744 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10745 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10746 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10747 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10748 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10749 }
10750
10751 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10752 }
10753 case Intrinsic::riscv_sm4ks:
10754 case Intrinsic::riscv_sm4ed: {
10755 unsigned Opc =
10756 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10757
10758 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10759 Op.getOperand(3));
10760 }
10761 case Intrinsic::riscv_zip:
10762 case Intrinsic::riscv_unzip: {
10763 unsigned Opc =
10764 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10765 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10766 }
10767 case Intrinsic::riscv_mopr:
10768 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10769 Op.getOperand(2));
10770
10771 case Intrinsic::riscv_moprr: {
10772 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10773 Op.getOperand(2), Op.getOperand(3));
10774 }
10775 case Intrinsic::riscv_clmul:
10776 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10777 Op.getOperand(2));
10778 case Intrinsic::riscv_clmulh:
10779 case Intrinsic::riscv_clmulr: {
10780 unsigned Opc =
10781 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10782 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10783 }
10784 case Intrinsic::experimental_get_vector_length:
10785 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10786 case Intrinsic::experimental_cttz_elts:
10787 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10788 case Intrinsic::riscv_vmv_x_s: {
10789 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10790 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10791 }
10792 case Intrinsic::riscv_vfmv_f_s:
10793 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10794 case Intrinsic::riscv_vmv_v_x:
10795 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10796 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10797 Subtarget);
10798 case Intrinsic::riscv_vfmv_v_f:
10799 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10800 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10801 case Intrinsic::riscv_vmv_s_x: {
10802 SDValue Scalar = Op.getOperand(2);
10803
10804 if (Scalar.getValueType().bitsLE(XLenVT)) {
10805 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10806 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10807 Op.getOperand(1), Scalar, Op.getOperand(3));
10808 }
10809
10810 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10811
10812 // This is an i64 value that lives in two scalar registers. We have to
10813 // insert this in a convoluted way. First we build vXi64 splat containing
10814 // the two values that we assemble using some bit math. Next we'll use
10815 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10816 // to merge element 0 from our splat into the source vector.
10817 // FIXME: This is probably not the best way to do this, but it is
10818 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10819 // point.
10820 // sw lo, (a0)
10821 // sw hi, 4(a0)
10822 // vlse vX, (a0)
10823 //
10824 // vid.v vVid
10825 // vmseq.vx mMask, vVid, 0
10826 // vmerge.vvm vDest, vSrc, vVal, mMask
10827 MVT VT = Op.getSimpleValueType();
10828 SDValue Vec = Op.getOperand(1);
10829 SDValue VL = getVLOperand(Op);
10830
10831 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10832 if (Op.getOperand(1).isUndef())
10833 return SplattedVal;
10834 SDValue SplattedIdx =
10835 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10836 DAG.getConstant(0, DL, MVT::i32), VL);
10837
10838 MVT MaskVT = getMaskTypeFor(VT);
10839 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10840 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10841 SDValue SelectCond =
10842 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10843 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10844 DAG.getUNDEF(MaskVT), Mask, VL});
10845 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10846 Vec, DAG.getUNDEF(VT), VL);
10847 }
10848 case Intrinsic::riscv_vfmv_s_f:
10849 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10850 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10851 // EGS * EEW >= 128 bits
10852 case Intrinsic::riscv_vaesdf_vv:
10853 case Intrinsic::riscv_vaesdf_vs:
10854 case Intrinsic::riscv_vaesdm_vv:
10855 case Intrinsic::riscv_vaesdm_vs:
10856 case Intrinsic::riscv_vaesef_vv:
10857 case Intrinsic::riscv_vaesef_vs:
10858 case Intrinsic::riscv_vaesem_vv:
10859 case Intrinsic::riscv_vaesem_vs:
10860 case Intrinsic::riscv_vaeskf1:
10861 case Intrinsic::riscv_vaeskf2:
10862 case Intrinsic::riscv_vaesz_vs:
10863 case Intrinsic::riscv_vsm4k:
10864 case Intrinsic::riscv_vsm4r_vv:
10865 case Intrinsic::riscv_vsm4r_vs: {
10866 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10867 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10868 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10869 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10870 return Op;
10871 }
10872 // EGS * EEW >= 256 bits
10873 case Intrinsic::riscv_vsm3c:
10874 case Intrinsic::riscv_vsm3me: {
10875 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10876 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10877 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10878 return Op;
10879 }
10880 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10881 case Intrinsic::riscv_vsha2ch:
10882 case Intrinsic::riscv_vsha2cl:
10883 case Intrinsic::riscv_vsha2ms: {
10884 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10885 !Subtarget.hasStdExtZvknhb())
10886 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10887 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10888 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10889 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10890 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10891 return Op;
10892 }
10893 case Intrinsic::riscv_sf_vc_v_x:
10894 case Intrinsic::riscv_sf_vc_v_i:
10895 case Intrinsic::riscv_sf_vc_v_xv:
10896 case Intrinsic::riscv_sf_vc_v_iv:
10897 case Intrinsic::riscv_sf_vc_v_vv:
10898 case Intrinsic::riscv_sf_vc_v_fv:
10899 case Intrinsic::riscv_sf_vc_v_xvv:
10900 case Intrinsic::riscv_sf_vc_v_ivv:
10901 case Intrinsic::riscv_sf_vc_v_vvv:
10902 case Intrinsic::riscv_sf_vc_v_fvv:
10903 case Intrinsic::riscv_sf_vc_v_xvw:
10904 case Intrinsic::riscv_sf_vc_v_ivw:
10905 case Intrinsic::riscv_sf_vc_v_vvw:
10906 case Intrinsic::riscv_sf_vc_v_fvw: {
10907 MVT VT = Op.getSimpleValueType();
10908
10909 SmallVector<SDValue> Operands{Op->op_values()};
10911
10912 MVT RetVT = VT;
10913 if (VT.isFixedLengthVector())
10915 else if (VT.isFloatingPoint())
10918
10919 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
10920
10921 if (VT.isFixedLengthVector())
10922 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
10923 else if (VT.isFloatingPoint())
10924 NewNode = DAG.getBitcast(VT, NewNode);
10925
10926 if (Op == NewNode)
10927 break;
10928
10929 return NewNode;
10930 }
10931 }
10932
10933 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10934}
10935
10937 unsigned Type) {
10938 SDLoc DL(Op);
10939 SmallVector<SDValue> Operands{Op->op_values()};
10940 Operands.erase(Operands.begin() + 1);
10941
10942 const RISCVSubtarget &Subtarget =
10944 MVT VT = Op.getSimpleValueType();
10945 MVT RetVT = VT;
10946 MVT FloatVT = VT;
10947
10948 if (VT.isFloatingPoint()) {
10949 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
10951 FloatVT = RetVT;
10952 }
10953 if (VT.isFixedLengthVector())
10955 Subtarget);
10956
10958
10959 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
10960 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
10961 SDValue Chain = NewNode.getValue(1);
10962
10963 if (VT.isFixedLengthVector())
10964 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
10965 if (VT.isFloatingPoint())
10966 NewNode = DAG.getBitcast(VT, NewNode);
10967
10968 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
10969
10970 return NewNode;
10971}
10972
10974 unsigned Type) {
10975 SmallVector<SDValue> Operands{Op->op_values()};
10976 Operands.erase(Operands.begin() + 1);
10978
10979 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10980}
10981
10982static SDValue
10984 const RISCVSubtarget &Subtarget,
10985 SelectionDAG &DAG) {
10986 bool IsStrided;
10987 switch (IntNo) {
10988 case Intrinsic::riscv_seg2_load_mask:
10989 case Intrinsic::riscv_seg3_load_mask:
10990 case Intrinsic::riscv_seg4_load_mask:
10991 case Intrinsic::riscv_seg5_load_mask:
10992 case Intrinsic::riscv_seg6_load_mask:
10993 case Intrinsic::riscv_seg7_load_mask:
10994 case Intrinsic::riscv_seg8_load_mask:
10995 IsStrided = false;
10996 break;
10997 case Intrinsic::riscv_sseg2_load_mask:
10998 case Intrinsic::riscv_sseg3_load_mask:
10999 case Intrinsic::riscv_sseg4_load_mask:
11000 case Intrinsic::riscv_sseg5_load_mask:
11001 case Intrinsic::riscv_sseg6_load_mask:
11002 case Intrinsic::riscv_sseg7_load_mask:
11003 case Intrinsic::riscv_sseg8_load_mask:
11004 IsStrided = true;
11005 break;
11006 default:
11007 llvm_unreachable("unexpected intrinsic ID");
11008 };
11009
11010 static const Intrinsic::ID VlsegInts[7] = {
11011 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11012 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11013 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11014 Intrinsic::riscv_vlseg8_mask};
11015 static const Intrinsic::ID VlssegInts[7] = {
11016 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11017 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11018 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11019 Intrinsic::riscv_vlsseg8_mask};
11020
11021 SDLoc DL(Op);
11022 unsigned NF = Op->getNumValues() - 1;
11023 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11024 MVT XLenVT = Subtarget.getXLenVT();
11025 MVT VT = Op->getSimpleValueType(0);
11026 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11027 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11028 ContainerVT.getScalarSizeInBits();
11029 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11030
11031 // Operands: (chain, int_id, pointer, mask, vl) or
11032 // (chain, int_id, pointer, offset, mask, vl)
11033 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11034 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11035 MVT MaskVT = Mask.getSimpleValueType();
11036 MVT MaskContainerVT =
11037 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11038 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11039
11040 SDValue IntID = DAG.getTargetConstant(
11041 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11042 auto *Load = cast<MemIntrinsicSDNode>(Op);
11043
11044 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11046 Load->getChain(),
11047 IntID,
11048 DAG.getUNDEF(VecTupTy),
11049 Op.getOperand(2),
11050 Mask,
11051 VL,
11054 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11055 // Insert the stride operand.
11056 if (IsStrided)
11057 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11058
11059 SDValue Result =
11061 Load->getMemoryVT(), Load->getMemOperand());
11063 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11064 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11065 Result.getValue(0),
11066 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11067 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11068 }
11069 Results.push_back(Result.getValue(1));
11070 return DAG.getMergeValues(Results, DL);
11071}
11072
11073SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11074 SelectionDAG &DAG) const {
11075 unsigned IntNo = Op.getConstantOperandVal(1);
11076 switch (IntNo) {
11077 default:
11078 break;
11079 case Intrinsic::riscv_seg2_load_mask:
11080 case Intrinsic::riscv_seg3_load_mask:
11081 case Intrinsic::riscv_seg4_load_mask:
11082 case Intrinsic::riscv_seg5_load_mask:
11083 case Intrinsic::riscv_seg6_load_mask:
11084 case Intrinsic::riscv_seg7_load_mask:
11085 case Intrinsic::riscv_seg8_load_mask:
11086 case Intrinsic::riscv_sseg2_load_mask:
11087 case Intrinsic::riscv_sseg3_load_mask:
11088 case Intrinsic::riscv_sseg4_load_mask:
11089 case Intrinsic::riscv_sseg5_load_mask:
11090 case Intrinsic::riscv_sseg6_load_mask:
11091 case Intrinsic::riscv_sseg7_load_mask:
11092 case Intrinsic::riscv_sseg8_load_mask:
11093 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11094
11095 case Intrinsic::riscv_sf_vc_v_x_se:
11096 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11097 case Intrinsic::riscv_sf_vc_v_i_se:
11098 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11099 case Intrinsic::riscv_sf_vc_v_xv_se:
11100 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11101 case Intrinsic::riscv_sf_vc_v_iv_se:
11102 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11103 case Intrinsic::riscv_sf_vc_v_vv_se:
11104 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11105 case Intrinsic::riscv_sf_vc_v_fv_se:
11106 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11107 case Intrinsic::riscv_sf_vc_v_xvv_se:
11108 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11109 case Intrinsic::riscv_sf_vc_v_ivv_se:
11110 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11111 case Intrinsic::riscv_sf_vc_v_vvv_se:
11112 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11113 case Intrinsic::riscv_sf_vc_v_fvv_se:
11114 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11115 case Intrinsic::riscv_sf_vc_v_xvw_se:
11116 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11117 case Intrinsic::riscv_sf_vc_v_ivw_se:
11118 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11119 case Intrinsic::riscv_sf_vc_v_vvw_se:
11120 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11121 case Intrinsic::riscv_sf_vc_v_fvw_se:
11122 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11123 }
11124
11125 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11126}
11127
11128static SDValue
11130 const RISCVSubtarget &Subtarget,
11131 SelectionDAG &DAG) {
11132 bool IsStrided;
11133 switch (IntNo) {
11134 case Intrinsic::riscv_seg2_store_mask:
11135 case Intrinsic::riscv_seg3_store_mask:
11136 case Intrinsic::riscv_seg4_store_mask:
11137 case Intrinsic::riscv_seg5_store_mask:
11138 case Intrinsic::riscv_seg6_store_mask:
11139 case Intrinsic::riscv_seg7_store_mask:
11140 case Intrinsic::riscv_seg8_store_mask:
11141 IsStrided = false;
11142 break;
11143 case Intrinsic::riscv_sseg2_store_mask:
11144 case Intrinsic::riscv_sseg3_store_mask:
11145 case Intrinsic::riscv_sseg4_store_mask:
11146 case Intrinsic::riscv_sseg5_store_mask:
11147 case Intrinsic::riscv_sseg6_store_mask:
11148 case Intrinsic::riscv_sseg7_store_mask:
11149 case Intrinsic::riscv_sseg8_store_mask:
11150 IsStrided = true;
11151 break;
11152 default:
11153 llvm_unreachable("unexpected intrinsic ID");
11154 }
11155
11156 SDLoc DL(Op);
11157 static const Intrinsic::ID VssegInts[] = {
11158 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11159 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11160 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11161 Intrinsic::riscv_vsseg8_mask};
11162 static const Intrinsic::ID VsssegInts[] = {
11163 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11164 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11165 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11166 Intrinsic::riscv_vssseg8_mask};
11167
11168 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11169 // (chain, int_id, vec*, ptr, stride, mask, vl)
11170 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11171 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11172 MVT XLenVT = Subtarget.getXLenVT();
11173 MVT VT = Op->getOperand(2).getSimpleValueType();
11174 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11175 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11176 ContainerVT.getScalarSizeInBits();
11177 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11178
11179 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11180 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11181 MVT MaskVT = Mask.getSimpleValueType();
11182 MVT MaskContainerVT =
11183 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11184 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11185
11186 SDValue IntID = DAG.getTargetConstant(
11187 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11188 SDValue Ptr = Op->getOperand(NF + 2);
11189
11190 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11191
11192 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11193 for (unsigned i = 0; i < NF; i++)
11194 StoredVal = DAG.getNode(
11195 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11196 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11197 DAG, Subtarget),
11198 DAG.getTargetConstant(i, DL, MVT::i32));
11199
11201 FixedIntrinsic->getChain(),
11202 IntID,
11203 StoredVal,
11204 Ptr,
11205 Mask,
11206 VL,
11207 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11208 // Insert the stride operand.
11209 if (IsStrided)
11210 Ops.insert(std::next(Ops.begin(), 4),
11211 Op.getOperand(Op.getNumOperands() - 3));
11212
11213 return DAG.getMemIntrinsicNode(
11214 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11215 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11216}
11217
11218SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11219 SelectionDAG &DAG) const {
11220 unsigned IntNo = Op.getConstantOperandVal(1);
11221 switch (IntNo) {
11222 default:
11223 break;
11224 case Intrinsic::riscv_seg2_store_mask:
11225 case Intrinsic::riscv_seg3_store_mask:
11226 case Intrinsic::riscv_seg4_store_mask:
11227 case Intrinsic::riscv_seg5_store_mask:
11228 case Intrinsic::riscv_seg6_store_mask:
11229 case Intrinsic::riscv_seg7_store_mask:
11230 case Intrinsic::riscv_seg8_store_mask:
11231 case Intrinsic::riscv_sseg2_store_mask:
11232 case Intrinsic::riscv_sseg3_store_mask:
11233 case Intrinsic::riscv_sseg4_store_mask:
11234 case Intrinsic::riscv_sseg5_store_mask:
11235 case Intrinsic::riscv_sseg6_store_mask:
11236 case Intrinsic::riscv_sseg7_store_mask:
11237 case Intrinsic::riscv_sseg8_store_mask:
11238 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11239
11240 case Intrinsic::riscv_sf_vc_xv_se:
11241 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11242 case Intrinsic::riscv_sf_vc_iv_se:
11243 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11244 case Intrinsic::riscv_sf_vc_vv_se:
11245 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11246 case Intrinsic::riscv_sf_vc_fv_se:
11247 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11248 case Intrinsic::riscv_sf_vc_xvv_se:
11249 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11250 case Intrinsic::riscv_sf_vc_ivv_se:
11251 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11252 case Intrinsic::riscv_sf_vc_vvv_se:
11253 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11254 case Intrinsic::riscv_sf_vc_fvv_se:
11255 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11256 case Intrinsic::riscv_sf_vc_xvw_se:
11257 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11258 case Intrinsic::riscv_sf_vc_ivw_se:
11259 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11260 case Intrinsic::riscv_sf_vc_vvw_se:
11261 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11262 case Intrinsic::riscv_sf_vc_fvw_se:
11263 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11264 }
11265
11266 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11267}
11268
11269static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11270 switch (ISDOpcode) {
11271 default:
11272 llvm_unreachable("Unhandled reduction");
11273 case ISD::VP_REDUCE_ADD:
11274 case ISD::VECREDUCE_ADD:
11275 return RISCVISD::VECREDUCE_ADD_VL;
11276 case ISD::VP_REDUCE_UMAX:
11278 return RISCVISD::VECREDUCE_UMAX_VL;
11279 case ISD::VP_REDUCE_SMAX:
11281 return RISCVISD::VECREDUCE_SMAX_VL;
11282 case ISD::VP_REDUCE_UMIN:
11284 return RISCVISD::VECREDUCE_UMIN_VL;
11285 case ISD::VP_REDUCE_SMIN:
11287 return RISCVISD::VECREDUCE_SMIN_VL;
11288 case ISD::VP_REDUCE_AND:
11289 case ISD::VECREDUCE_AND:
11290 return RISCVISD::VECREDUCE_AND_VL;
11291 case ISD::VP_REDUCE_OR:
11292 case ISD::VECREDUCE_OR:
11293 return RISCVISD::VECREDUCE_OR_VL;
11294 case ISD::VP_REDUCE_XOR:
11295 case ISD::VECREDUCE_XOR:
11296 return RISCVISD::VECREDUCE_XOR_VL;
11297 case ISD::VP_REDUCE_FADD:
11298 return RISCVISD::VECREDUCE_FADD_VL;
11299 case ISD::VP_REDUCE_SEQ_FADD:
11300 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11301 case ISD::VP_REDUCE_FMAX:
11302 case ISD::VP_REDUCE_FMAXIMUM:
11303 return RISCVISD::VECREDUCE_FMAX_VL;
11304 case ISD::VP_REDUCE_FMIN:
11305 case ISD::VP_REDUCE_FMINIMUM:
11306 return RISCVISD::VECREDUCE_FMIN_VL;
11307 }
11308
11309}
11310
11311SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11312 SelectionDAG &DAG,
11313 bool IsVP) const {
11314 SDLoc DL(Op);
11315 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11316 MVT VecVT = Vec.getSimpleValueType();
11317 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11318 Op.getOpcode() == ISD::VECREDUCE_OR ||
11319 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11320 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11321 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11322 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11323 "Unexpected reduction lowering");
11324
11325 MVT XLenVT = Subtarget.getXLenVT();
11326
11327 MVT ContainerVT = VecVT;
11328 if (VecVT.isFixedLengthVector()) {
11329 ContainerVT = getContainerForFixedLengthVector(VecVT);
11330 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11331 }
11332
11333 SDValue Mask, VL;
11334 if (IsVP) {
11335 Mask = Op.getOperand(2);
11336 VL = Op.getOperand(3);
11337 } else {
11338 std::tie(Mask, VL) =
11339 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11340 }
11341
11342 ISD::CondCode CC;
11343 switch (Op.getOpcode()) {
11344 default:
11345 llvm_unreachable("Unhandled reduction");
11346 case ISD::VECREDUCE_AND:
11347 case ISD::VP_REDUCE_AND: {
11348 // vcpop ~x == 0
11349 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11350 if (IsVP || VecVT.isFixedLengthVector())
11351 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11352 else
11353 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11354 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11355 CC = ISD::SETEQ;
11356 break;
11357 }
11358 case ISD::VECREDUCE_OR:
11359 case ISD::VP_REDUCE_OR:
11360 // vcpop x != 0
11361 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11362 CC = ISD::SETNE;
11363 break;
11364 case ISD::VECREDUCE_XOR:
11365 case ISD::VP_REDUCE_XOR: {
11366 // ((vcpop x) & 1) != 0
11367 SDValue One = DAG.getConstant(1, DL, XLenVT);
11368 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11369 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11370 CC = ISD::SETNE;
11371 break;
11372 }
11373 }
11374
11375 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11376 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11377 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11378
11379 if (!IsVP)
11380 return SetCC;
11381
11382 // Now include the start value in the operation.
11383 // Note that we must return the start value when no elements are operated
11384 // upon. The vcpop instructions we've emitted in each case above will return
11385 // 0 for an inactive vector, and so we've already received the neutral value:
11386 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11387 // can simply include the start value.
11388 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11389 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11390}
11391
11392static bool isNonZeroAVL(SDValue AVL) {
11393 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11394 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11395 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11396 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11397}
11398
11399/// Helper to lower a reduction sequence of the form:
11400/// scalar = reduce_op vec, scalar_start
11401static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11402 SDValue StartValue, SDValue Vec, SDValue Mask,
11403 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11404 const RISCVSubtarget &Subtarget) {
11405 const MVT VecVT = Vec.getSimpleValueType();
11406 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11407 const MVT XLenVT = Subtarget.getXLenVT();
11408 const bool NonZeroAVL = isNonZeroAVL(VL);
11409
11410 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11411 // or the original VT if fractional.
11412 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11413 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11414 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11415 // be the result of the reduction operation.
11416 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11417 SDValue InitialValue =
11418 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11419 if (M1VT != InnerVT)
11420 InitialValue =
11421 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11422 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11424 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11425 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11426 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 SDValue Vec = Op.getOperand(0);
11433 EVT VecEVT = Vec.getValueType();
11434
11435 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11436
11437 // Due to ordering in legalize types we may have a vector type that needs to
11438 // be split. Do that manually so we can get down to a legal type.
11439 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11441 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11442 VecEVT = Lo.getValueType();
11443 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11444 }
11445
11446 // TODO: The type may need to be widened rather than split. Or widened before
11447 // it can be split.
11448 if (!isTypeLegal(VecEVT))
11449 return SDValue();
11450
11451 MVT VecVT = VecEVT.getSimpleVT();
11452 MVT VecEltVT = VecVT.getVectorElementType();
11453 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11454
11455 MVT ContainerVT = VecVT;
11456 if (VecVT.isFixedLengthVector()) {
11457 ContainerVT = getContainerForFixedLengthVector(VecVT);
11458 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11459 }
11460
11461 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11462
11463 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11464 switch (BaseOpc) {
11465 case ISD::AND:
11466 case ISD::OR:
11467 case ISD::UMAX:
11468 case ISD::UMIN:
11469 case ISD::SMAX:
11470 case ISD::SMIN:
11471 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11472 }
11473 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11474 Mask, VL, DL, DAG, Subtarget);
11475}
11476
11477// Given a reduction op, this function returns the matching reduction opcode,
11478// the vector SDValue and the scalar SDValue required to lower this to a
11479// RISCVISD node.
11480static std::tuple<unsigned, SDValue, SDValue>
11482 const RISCVSubtarget &Subtarget) {
11483 SDLoc DL(Op);
11484 auto Flags = Op->getFlags();
11485 unsigned Opcode = Op.getOpcode();
11486 switch (Opcode) {
11487 default:
11488 llvm_unreachable("Unhandled reduction");
11489 case ISD::VECREDUCE_FADD: {
11490 // Use positive zero if we can. It is cheaper to materialize.
11491 SDValue Zero =
11492 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11493 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11494 }
11496 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11497 Op.getOperand(0));
11501 case ISD::VECREDUCE_FMAX: {
11502 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11503 unsigned RVVOpc =
11504 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11505 ? RISCVISD::VECREDUCE_FMIN_VL
11506 : RISCVISD::VECREDUCE_FMAX_VL;
11507 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11508 }
11509 }
11510}
11511
11512SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11513 SelectionDAG &DAG) const {
11514 SDLoc DL(Op);
11515 MVT VecEltVT = Op.getSimpleValueType();
11516
11517 unsigned RVVOpcode;
11518 SDValue VectorVal, ScalarVal;
11519 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11520 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11521 MVT VecVT = VectorVal.getSimpleValueType();
11522
11523 MVT ContainerVT = VecVT;
11524 if (VecVT.isFixedLengthVector()) {
11525 ContainerVT = getContainerForFixedLengthVector(VecVT);
11526 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11527 }
11528
11529 MVT ResVT = Op.getSimpleValueType();
11530 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11531 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11532 VL, DL, DAG, Subtarget);
11533 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11534 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11535 return Res;
11536
11537 if (Op->getFlags().hasNoNaNs())
11538 return Res;
11539
11540 // Force output to NaN if any element is Nan.
11541 SDValue IsNan =
11542 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11543 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11544 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11545 MVT XLenVT = Subtarget.getXLenVT();
11546 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11547 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11548 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11549 return DAG.getSelect(
11550 DL, ResVT, NoNaNs, Res,
11551 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11552}
11553
11554SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11555 SelectionDAG &DAG) const {
11556 SDLoc DL(Op);
11557 unsigned Opc = Op.getOpcode();
11558 SDValue Start = Op.getOperand(0);
11559 SDValue Vec = Op.getOperand(1);
11560 EVT VecEVT = Vec.getValueType();
11561 MVT XLenVT = Subtarget.getXLenVT();
11562
11563 // TODO: The type may need to be widened rather than split. Or widened before
11564 // it can be split.
11565 if (!isTypeLegal(VecEVT))
11566 return SDValue();
11567
11568 MVT VecVT = VecEVT.getSimpleVT();
11569 unsigned RVVOpcode = getRVVReductionOp(Opc);
11570
11571 if (VecVT.isFixedLengthVector()) {
11572 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11573 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11574 }
11575
11576 SDValue VL = Op.getOperand(3);
11577 SDValue Mask = Op.getOperand(2);
11578 SDValue Res =
11579 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11580 Vec, Mask, VL, DL, DAG, Subtarget);
11581 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11582 Op->getFlags().hasNoNaNs())
11583 return Res;
11584
11585 // Propagate NaNs.
11586 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11587 // Check if any of the elements in Vec is NaN.
11588 SDValue IsNaN = DAG.getNode(
11589 RISCVISD::SETCC_VL, DL, PredVT,
11590 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11591 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11592 // Check if the start value is NaN.
11593 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11594 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11595 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11596 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11597 MVT ResVT = Res.getSimpleValueType();
11598 return DAG.getSelect(
11599 DL, ResVT, NoNaNs, Res,
11600 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11601}
11602
11603SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11604 SelectionDAG &DAG) const {
11605 SDValue Vec = Op.getOperand(0);
11606 SDValue SubVec = Op.getOperand(1);
11607 MVT VecVT = Vec.getSimpleValueType();
11608 MVT SubVecVT = SubVec.getSimpleValueType();
11609
11610 SDLoc DL(Op);
11611 MVT XLenVT = Subtarget.getXLenVT();
11612 unsigned OrigIdx = Op.getConstantOperandVal(2);
11613 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11614
11615 if (OrigIdx == 0 && Vec.isUndef())
11616 return Op;
11617
11618 // We don't have the ability to slide mask vectors up indexed by their i1
11619 // elements; the smallest we can do is i8. Often we are able to bitcast to
11620 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11621 // into a scalable one, we might not necessarily have enough scalable
11622 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11623 if (SubVecVT.getVectorElementType() == MVT::i1) {
11624 if (VecVT.getVectorMinNumElements() >= 8 &&
11625 SubVecVT.getVectorMinNumElements() >= 8) {
11626 assert(OrigIdx % 8 == 0 && "Invalid index");
11627 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11628 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11629 "Unexpected mask vector lowering");
11630 OrigIdx /= 8;
11631 SubVecVT =
11632 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11633 SubVecVT.isScalableVector());
11634 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11635 VecVT.isScalableVector());
11636 Vec = DAG.getBitcast(VecVT, Vec);
11637 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11638 } else {
11639 // We can't slide this mask vector up indexed by its i1 elements.
11640 // This poses a problem when we wish to insert a scalable vector which
11641 // can't be re-expressed as a larger type. Just choose the slow path and
11642 // extend to a larger type, then truncate back down.
11643 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11644 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11645 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11646 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11647 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11648 Op.getOperand(2));
11649 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11650 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11651 }
11652 }
11653
11654 // If the subvector vector is a fixed-length type and we don't know VLEN
11655 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11656 // don't know which register of a LMUL group contains the specific subvector
11657 // as we only know the minimum register size. Therefore we must slide the
11658 // vector group up the full amount.
11659 const auto VLen = Subtarget.getRealVLen();
11660 if (SubVecVT.isFixedLengthVector() && !VLen) {
11661 MVT ContainerVT = VecVT;
11662 if (VecVT.isFixedLengthVector()) {
11663 ContainerVT = getContainerForFixedLengthVector(VecVT);
11664 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11665 }
11666
11667 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11668
11669 SDValue Mask =
11670 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11671 // Set the vector length to only the number of elements we care about. Note
11672 // that for slideup this includes the offset.
11673 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11674 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11675
11676 // Use tail agnostic policy if we're inserting over Vec's tail.
11678 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11680
11681 // If we're inserting into the lowest elements, use a tail undisturbed
11682 // vmv.v.v.
11683 if (OrigIdx == 0) {
11684 SubVec =
11685 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11686 } else {
11687 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11688 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11689 SlideupAmt, Mask, VL, Policy);
11690 }
11691
11692 if (VecVT.isFixedLengthVector())
11693 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11694 return DAG.getBitcast(Op.getValueType(), SubVec);
11695 }
11696
11697 MVT ContainerVecVT = VecVT;
11698 if (VecVT.isFixedLengthVector()) {
11699 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11700 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11701 }
11702
11703 MVT ContainerSubVecVT = SubVecVT;
11704 if (SubVecVT.isFixedLengthVector()) {
11705 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11706 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11707 }
11708
11709 unsigned SubRegIdx;
11710 ElementCount RemIdx;
11711 // insert_subvector scales the index by vscale if the subvector is scalable,
11712 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11713 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11714 if (SubVecVT.isFixedLengthVector()) {
11715 assert(VLen);
11716 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11717 auto Decompose =
11719 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11720 SubRegIdx = Decompose.first;
11721 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11722 (OrigIdx % Vscale));
11723 } else {
11724 auto Decompose =
11726 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11727 SubRegIdx = Decompose.first;
11728 RemIdx = ElementCount::getScalable(Decompose.second);
11729 }
11730
11733 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11734 bool ExactlyVecRegSized =
11735 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11736 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11737
11738 // 1. If the Idx has been completely eliminated and this subvector's size is
11739 // a vector register or a multiple thereof, or the surrounding elements are
11740 // undef, then this is a subvector insert which naturally aligns to a vector
11741 // register. These can easily be handled using subregister manipulation.
11742 // 2. If the subvector isn't an exact multiple of a valid register group size,
11743 // then the insertion must preserve the undisturbed elements of the register.
11744 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11745 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11746 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11747 // of that LMUL=1 type back into the larger vector (resolving to another
11748 // subregister operation). See below for how our VSLIDEUP works. We go via a
11749 // LMUL=1 type to avoid allocating a large register group to hold our
11750 // subvector.
11751 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11752 if (SubVecVT.isFixedLengthVector()) {
11753 // We may get NoSubRegister if inserting at index 0 and the subvec
11754 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11755 if (SubRegIdx == RISCV::NoSubRegister) {
11756 assert(OrigIdx == 0);
11757 return Op;
11758 }
11759
11760 // Use a insert_subvector that will resolve to an insert subreg.
11761 assert(VLen);
11762 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11763 SDValue Insert =
11764 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11765 if (VecVT.isFixedLengthVector())
11766 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11767 return Insert;
11768 }
11769 return Op;
11770 }
11771
11772 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11773 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11774 // (in our case undisturbed). This means we can set up a subvector insertion
11775 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11776 // size of the subvector.
11777 MVT InterSubVT = ContainerVecVT;
11778 SDValue AlignedExtract = Vec;
11779 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11780 if (SubVecVT.isFixedLengthVector()) {
11781 assert(VLen);
11782 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11783 }
11784 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11785 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11786 // Extract a subvector equal to the nearest full vector register type. This
11787 // should resolve to a EXTRACT_SUBREG instruction.
11788 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11789 }
11790
11791 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11792
11793 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11794
11795 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11796 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11797
11798 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11800 if (Subtarget.expandVScale(EndIndex) ==
11801 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11803
11804 // If we're inserting into the lowest elements, use a tail undisturbed
11805 // vmv.v.v.
11806 if (RemIdx.isZero()) {
11807 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11808 SubVec, VL);
11809 } else {
11810 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11811
11812 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11813 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11814
11815 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11816 SlideupAmt, Mask, VL, Policy);
11817 }
11818
11819 // If required, insert this subvector back into the correct vector register.
11820 // This should resolve to an INSERT_SUBREG instruction.
11821 if (ContainerVecVT.bitsGT(InterSubVT))
11822 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11823
11824 if (VecVT.isFixedLengthVector())
11825 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11826
11827 // We might have bitcast from a mask type: cast back to the original type if
11828 // required.
11829 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11830}
11831
11832SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11833 SelectionDAG &DAG) const {
11834 SDValue Vec = Op.getOperand(0);
11835 MVT SubVecVT = Op.getSimpleValueType();
11836 MVT VecVT = Vec.getSimpleValueType();
11837
11838 SDLoc DL(Op);
11839 MVT XLenVT = Subtarget.getXLenVT();
11840 unsigned OrigIdx = Op.getConstantOperandVal(1);
11841 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11842
11843 // With an index of 0 this is a cast-like subvector, which can be performed
11844 // with subregister operations.
11845 if (OrigIdx == 0)
11846 return Op;
11847
11848 // We don't have the ability to slide mask vectors down indexed by their i1
11849 // elements; the smallest we can do is i8. Often we are able to bitcast to
11850 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11851 // from a scalable one, we might not necessarily have enough scalable
11852 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11853 if (SubVecVT.getVectorElementType() == MVT::i1) {
11854 if (VecVT.getVectorMinNumElements() >= 8 &&
11855 SubVecVT.getVectorMinNumElements() >= 8) {
11856 assert(OrigIdx % 8 == 0 && "Invalid index");
11857 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11858 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11859 "Unexpected mask vector lowering");
11860 OrigIdx /= 8;
11861 SubVecVT =
11862 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11863 SubVecVT.isScalableVector());
11864 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11865 VecVT.isScalableVector());
11866 Vec = DAG.getBitcast(VecVT, Vec);
11867 } else {
11868 // We can't slide this mask vector down, indexed by its i1 elements.
11869 // This poses a problem when we wish to extract a scalable vector which
11870 // can't be re-expressed as a larger type. Just choose the slow path and
11871 // extend to a larger type, then truncate back down.
11872 // TODO: We could probably improve this when extracting certain fixed
11873 // from fixed, where we can extract as i8 and shift the correct element
11874 // right to reach the desired subvector?
11875 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11876 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11877 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11878 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11879 Op.getOperand(1));
11880 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11881 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11882 }
11883 }
11884
11885 const auto VLen = Subtarget.getRealVLen();
11886
11887 // If the subvector vector is a fixed-length type and we don't know VLEN
11888 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11889 // don't know which register of a LMUL group contains the specific subvector
11890 // as we only know the minimum register size. Therefore we must slide the
11891 // vector group down the full amount.
11892 if (SubVecVT.isFixedLengthVector() && !VLen) {
11893 MVT ContainerVT = VecVT;
11894 if (VecVT.isFixedLengthVector()) {
11895 ContainerVT = getContainerForFixedLengthVector(VecVT);
11896 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11897 }
11898
11899 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11900 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
11901 if (auto ShrunkVT =
11902 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
11903 ContainerVT = *ShrunkVT;
11904 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11905 }
11906
11907 SDValue Mask =
11908 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11909 // Set the vector length to only the number of elements we care about. This
11910 // avoids sliding down elements we're going to discard straight away.
11911 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11912 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11913 SDValue Slidedown =
11914 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11915 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
11916 // Now we can use a cast-like subvector extract to get the result.
11917 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
11918 return DAG.getBitcast(Op.getValueType(), Slidedown);
11919 }
11920
11921 if (VecVT.isFixedLengthVector()) {
11922 VecVT = getContainerForFixedLengthVector(VecVT);
11923 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
11924 }
11925
11926 MVT ContainerSubVecVT = SubVecVT;
11927 if (SubVecVT.isFixedLengthVector())
11928 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11929
11930 unsigned SubRegIdx;
11931 ElementCount RemIdx;
11932 // extract_subvector scales the index by vscale if the subvector is scalable,
11933 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11934 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11935 if (SubVecVT.isFixedLengthVector()) {
11936 assert(VLen);
11937 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11938 auto Decompose =
11940 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11941 SubRegIdx = Decompose.first;
11942 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11943 (OrigIdx % Vscale));
11944 } else {
11945 auto Decompose =
11947 VecVT, ContainerSubVecVT, OrigIdx, TRI);
11948 SubRegIdx = Decompose.first;
11949 RemIdx = ElementCount::getScalable(Decompose.second);
11950 }
11951
11952 // If the Idx has been completely eliminated then this is a subvector extract
11953 // which naturally aligns to a vector register. These can easily be handled
11954 // using subregister manipulation. We use an extract_subvector that will
11955 // resolve to an extract subreg.
11956 if (RemIdx.isZero()) {
11957 if (SubVecVT.isFixedLengthVector()) {
11958 assert(VLen);
11959 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11960 Vec =
11961 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
11962 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
11963 }
11964 return Op;
11965 }
11966
11967 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
11968 // was > M1 then the index would need to be a multiple of VLMAX, and so would
11969 // divide exactly.
11970 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
11971 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
11972
11973 // If the vector type is an LMUL-group type, extract a subvector equal to the
11974 // nearest full vector register type.
11975 MVT InterSubVT = VecVT;
11976 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
11977 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
11978 // we should have successfully decomposed the extract into a subregister.
11979 // We use an extract_subvector that will resolve to a subreg extract.
11980 assert(SubRegIdx != RISCV::NoSubRegister);
11981 (void)SubRegIdx;
11982 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
11983 if (SubVecVT.isFixedLengthVector()) {
11984 assert(VLen);
11985 Idx /= *VLen / RISCV::RVVBitsPerBlock;
11986 }
11987 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
11988 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
11989 }
11990
11991 // Slide this vector register down by the desired number of elements in order
11992 // to place the desired subvector starting at element 0.
11993 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11994 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
11995 if (SubVecVT.isFixedLengthVector())
11996 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11997 SDValue Slidedown =
11998 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
11999 Vec, SlidedownAmt, Mask, VL);
12000
12001 // Now the vector is in the right position, extract our final subvector. This
12002 // should resolve to a COPY.
12003 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12004
12005 // We might have bitcast from a mask type: cast back to the original type if
12006 // required.
12007 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12008}
12009
12010// Widen a vector's operands to i8, then truncate its results back to the
12011// original type, typically i1. All operand and result types must be the same.
12013 SelectionDAG &DAG) {
12014 MVT VT = N.getSimpleValueType();
12015 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12017 for (SDValue Op : N->ops()) {
12018 assert(Op.getSimpleValueType() == VT &&
12019 "Operands and result must be same type");
12020 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12021 }
12022
12023 unsigned NumVals = N->getNumValues();
12024
12026 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12027 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12028 SmallVector<SDValue, 4> TruncVals;
12029 for (unsigned I = 0; I < NumVals; I++) {
12030 TruncVals.push_back(
12031 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12032 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12033 }
12034
12035 if (TruncVals.size() > 1)
12036 return DAG.getMergeValues(TruncVals, DL);
12037 return TruncVals.front();
12038}
12039
12040SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12041 SelectionDAG &DAG) const {
12042 SDLoc DL(Op);
12043 MVT VecVT = Op.getSimpleValueType();
12044
12045 const unsigned Factor = Op->getNumValues();
12046 assert(Factor <= 8);
12047
12048 // 1 bit element vectors need to be widened to e8
12049 if (VecVT.getVectorElementType() == MVT::i1)
12050 return widenVectorOpsToi8(Op, DL, DAG);
12051
12052 // Convert to scalable vectors first.
12053 if (VecVT.isFixedLengthVector()) {
12054 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12055 SmallVector<SDValue, 8> Ops(Factor);
12056 for (unsigned i = 0U; i < Factor; ++i)
12057 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12058 Subtarget);
12059
12060 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12061 SDValue NewDeinterleave =
12062 DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs, Ops);
12063
12064 SmallVector<SDValue, 8> Res(Factor);
12065 for (unsigned i = 0U; i < Factor; ++i)
12066 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12067 DAG, Subtarget);
12068 return DAG.getMergeValues(Res, DL);
12069 }
12070
12071 // If concatenating would exceed LMUL=8, we need to split.
12072 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12073 (8 * RISCV::RVVBitsPerBlock)) {
12074 SmallVector<SDValue, 8> Ops(Factor * 2);
12075 for (unsigned i = 0; i != Factor; ++i) {
12076 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12077 Ops[i * 2] = OpLo;
12078 Ops[i * 2 + 1] = OpHi;
12079 }
12080
12081 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12082
12084 ArrayRef(Ops).slice(0, Factor));
12086 ArrayRef(Ops).slice(Factor, Factor));
12087
12088 SmallVector<SDValue, 8> Res(Factor);
12089 for (unsigned i = 0; i != Factor; ++i)
12090 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12091 Hi.getValue(i));
12092
12093 return DAG.getMergeValues(Res, DL);
12094 }
12095
12096 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12097 MVT VT = Op->getSimpleValueType(0);
12098 SDValue V1 = Op->getOperand(0);
12099 SDValue V2 = Op->getOperand(1);
12100
12101 // For fractional LMUL, check if we can use a higher LMUL
12102 // instruction to avoid a vslidedown.
12103 if (SDValue Src = foldConcatVector(V1, V2);
12104 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12105 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12106 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12107 // Freeze the source so we can increase its use count.
12108 Src = DAG.getFreeze(Src);
12109 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12110 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12111 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12112 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12113 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12114 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12115 return DAG.getMergeValues({Even, Odd}, DL);
12116 }
12117
12118 // Freeze the sources so we can increase their use count.
12119 V1 = DAG.getFreeze(V1);
12120 V2 = DAG.getFreeze(V2);
12121 SDValue Even =
12122 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12123 SDValue Odd =
12124 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12125 return DAG.getMergeValues({Even, Odd}, DL);
12126 }
12127
12128 SmallVector<SDValue, 8> Ops(Op->op_values());
12129
12130 // Concatenate the vectors as one vector to deinterleave
12131 MVT ConcatVT =
12134 PowerOf2Ceil(Factor)));
12135 if (Ops.size() < PowerOf2Ceil(Factor))
12136 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12137 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12138
12139 if (Factor == 2) {
12140 // We can deinterleave through vnsrl.wi if the element type is smaller than
12141 // ELEN
12142 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12143 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12144 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12145 return DAG.getMergeValues({Even, Odd}, DL);
12146 }
12147
12148 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12149 // possibly mask vector, then extract the required subvector. Doing this
12150 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12151 // creation to be rematerialized during register allocation to reduce
12152 // register pressure if needed.
12153
12154 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12155
12156 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12157 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12158 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12159
12160 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12161 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12162 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12163
12164 // vcompress the even and odd elements into two separate vectors
12165 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12166 EvenMask, DAG.getUNDEF(ConcatVT));
12167 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12168 OddMask, DAG.getUNDEF(ConcatVT));
12169
12170 // Extract the result half of the gather for even and odd
12171 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12172 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12173
12174 return DAG.getMergeValues({Even, Odd}, DL);
12175 }
12176
12177 // Store with unit-stride store and load it back with segmented load.
12178 MVT XLenVT = Subtarget.getXLenVT();
12179 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12180 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12181
12182 // Allocate a stack slot.
12183 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12185 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12186 auto &MF = DAG.getMachineFunction();
12187 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12188 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12189
12190 SDValue StoreOps[] = {DAG.getEntryNode(),
12191 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12192 Concat, StackPtr, VL};
12193
12194 SDValue Chain = DAG.getMemIntrinsicNode(
12195 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12196 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12198
12199 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12200 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12201 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12202 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12203 Intrinsic::riscv_vlseg8_mask};
12204
12205 SDValue LoadOps[] = {
12206 Chain,
12207 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12208 Passthru,
12209 StackPtr,
12210 Mask,
12211 VL,
12214 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12215
12216 unsigned Sz =
12217 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12218 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12219
12221 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12222 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12224
12225 SmallVector<SDValue, 8> Res(Factor);
12226
12227 for (unsigned i = 0U; i < Factor; ++i)
12228 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12229 DAG.getTargetConstant(i, DL, MVT::i32));
12230
12231 return DAG.getMergeValues(Res, DL);
12232}
12233
12234SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12235 SelectionDAG &DAG) const {
12236 SDLoc DL(Op);
12237 MVT VecVT = Op.getSimpleValueType();
12238
12239 const unsigned Factor = Op.getNumOperands();
12240 assert(Factor <= 8);
12241
12242 // i1 vectors need to be widened to i8
12243 if (VecVT.getVectorElementType() == MVT::i1)
12244 return widenVectorOpsToi8(Op, DL, DAG);
12245
12246 // Convert to scalable vectors first.
12247 if (VecVT.isFixedLengthVector()) {
12248 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12249 SmallVector<SDValue, 8> Ops(Factor);
12250 for (unsigned i = 0U; i < Factor; ++i)
12251 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12252 Subtarget);
12253
12254 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12255 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12256
12257 SmallVector<SDValue, 8> Res(Factor);
12258 for (unsigned i = 0U; i < Factor; ++i)
12259 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12260 Subtarget);
12261 return DAG.getMergeValues(Res, DL);
12262 }
12263
12264 MVT XLenVT = Subtarget.getXLenVT();
12265 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12266
12267 // If the VT is larger than LMUL=8, we need to split and reassemble.
12268 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12269 (8 * RISCV::RVVBitsPerBlock)) {
12270 SmallVector<SDValue, 8> Ops(Factor * 2);
12271 for (unsigned i = 0; i != Factor; ++i) {
12272 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12273 Ops[i] = OpLo;
12274 Ops[i + Factor] = OpHi;
12275 }
12276
12277 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12278
12279 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12280 ArrayRef(Ops).take_front(Factor)),
12282 ArrayRef(Ops).drop_front(Factor))};
12283
12284 SmallVector<SDValue, 8> Concats(Factor);
12285 for (unsigned i = 0; i != Factor; ++i) {
12286 unsigned IdxLo = 2 * i;
12287 unsigned IdxHi = 2 * i + 1;
12288 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12289 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12290 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12291 }
12292
12293 return DAG.getMergeValues(Concats, DL);
12294 }
12295
12296 SDValue Interleaved;
12297
12298 // Spill to the stack using a segment store for simplicity.
12299 if (Factor != 2) {
12300 EVT MemVT =
12302 VecVT.getVectorElementCount() * Factor);
12303
12304 // Allocate a stack slot.
12305 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12307 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12308 EVT PtrVT = StackPtr.getValueType();
12309 auto &MF = DAG.getMachineFunction();
12310 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12311 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12312
12313 static const Intrinsic::ID IntrIds[] = {
12314 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12315 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12316 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12317 Intrinsic::riscv_vsseg8_mask,
12318 };
12319
12320 unsigned Sz =
12321 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12322 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12323
12324 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12325 for (unsigned i = 0; i < Factor; i++)
12326 StoredVal =
12327 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12328 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12329
12330 SDValue Ops[] = {DAG.getEntryNode(),
12331 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12332 StoredVal,
12333 StackPtr,
12334 Mask,
12335 VL,
12337 DL, XLenVT)};
12338
12339 SDValue Chain = DAG.getMemIntrinsicNode(
12340 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12341 VecVT.getVectorElementType(), PtrInfo, Alignment,
12343
12344 SmallVector<SDValue, 8> Loads(Factor);
12345
12347 DAG.getVScale(DL, PtrVT,
12348 APInt(PtrVT.getFixedSizeInBits(),
12349 VecVT.getStoreSize().getKnownMinValue()));
12350 for (unsigned i = 0; i != Factor; ++i) {
12351 if (i != 0)
12352 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12353
12354 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12355 }
12356
12357 return DAG.getMergeValues(Loads, DL);
12358 }
12359
12360 // Use ri.vzip2{a,b} if available
12361 // TODO: Figure out the best lowering for the spread variants
12362 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12363 !Op.getOperand(1).isUndef()) {
12364 // Freeze the sources so we can increase their use count.
12365 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12366 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12367 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12368 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12369 return DAG.getMergeValues({Lo, Hi}, DL);
12370 }
12371
12372 // If the element type is smaller than ELEN, then we can interleave with
12373 // vwaddu.vv and vwmaccu.vx
12374 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12375 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12376 DAG, Subtarget);
12377 } else {
12378 // Otherwise, fallback to using vrgathere16.vv
12379 MVT ConcatVT =
12382 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12383 Op.getOperand(0), Op.getOperand(1));
12384
12385 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12386
12387 // 0 1 2 3 4 5 6 7 ...
12388 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12389
12390 // 1 1 1 1 1 1 1 1 ...
12391 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12392
12393 // 1 0 1 0 1 0 1 0 ...
12394 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12395 OddMask = DAG.getSetCC(
12396 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12397 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12399
12400 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12401
12402 // Build up the index vector for interleaving the concatenated vector
12403 // 0 0 1 1 2 2 3 3 ...
12404 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12405 // 0 n 1 n+1 2 n+2 3 n+3 ...
12406 Idx =
12407 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12408
12409 // Then perform the interleave
12410 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12411 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12412 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12413 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12414 }
12415
12416 // Extract the two halves from the interleaved result
12417 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12418 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12419 VecVT.getVectorMinNumElements());
12420
12421 return DAG.getMergeValues({Lo, Hi}, DL);
12422}
12423
12424// Lower step_vector to the vid instruction. Any non-identity step value must
12425// be accounted for my manual expansion.
12426SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12427 SelectionDAG &DAG) const {
12428 SDLoc DL(Op);
12429 MVT VT = Op.getSimpleValueType();
12430 assert(VT.isScalableVector() && "Expected scalable vector");
12431 MVT XLenVT = Subtarget.getXLenVT();
12432 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12433 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12434 uint64_t StepValImm = Op.getConstantOperandVal(0);
12435 if (StepValImm != 1) {
12436 if (isPowerOf2_64(StepValImm)) {
12437 SDValue StepVal =
12438 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12439 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12440 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12441 } else {
12442 SDValue StepVal = lowerScalarSplat(
12443 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12444 VL, VT, DL, DAG, Subtarget);
12445 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12446 }
12447 }
12448 return StepVec;
12449}
12450
12451// Implement vector_reverse using vrgather.vv with indices determined by
12452// subtracting the id of each element from (VLMAX-1). This will convert
12453// the indices like so:
12454// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12455// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12456SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12457 SelectionDAG &DAG) const {
12458 SDLoc DL(Op);
12459 MVT VecVT = Op.getSimpleValueType();
12460 if (VecVT.getVectorElementType() == MVT::i1) {
12461 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12462 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12463 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12464 return DAG.getSetCC(DL, VecVT, Op2,
12465 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12466 }
12467
12468 MVT ContainerVT = VecVT;
12469 SDValue Vec = Op.getOperand(0);
12470 if (VecVT.isFixedLengthVector()) {
12471 ContainerVT = getContainerForFixedLengthVector(VecVT);
12472 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12473 }
12474
12475 MVT XLenVT = Subtarget.getXLenVT();
12476 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12477
12478 // On some uarchs vrgather.vv will read from every input register for each
12479 // output register, regardless of the indices. However to reverse a vector
12480 // each output register only needs to read from one register. So decompose it
12481 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12482 // O(LMUL^2).
12483 //
12484 // vsetvli a1, zero, e64, m4, ta, ma
12485 // vrgatherei16.vv v12, v8, v16
12486 // ->
12487 // vsetvli a1, zero, e64, m1, ta, ma
12488 // vrgather.vv v15, v8, v16
12489 // vrgather.vv v14, v9, v16
12490 // vrgather.vv v13, v10, v16
12491 // vrgather.vv v12, v11, v16
12492 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12493 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12494 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12495 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12496 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12497 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12498
12499 // Fixed length vectors might not fit exactly into their container, and so
12500 // leave a gap in the front of the vector after being reversed. Slide this
12501 // away.
12502 //
12503 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12504 // 0 1 2 3 x x x x <- reverse
12505 // x x x x 0 1 2 3 <- vslidedown.vx
12506 if (VecVT.isFixedLengthVector()) {
12507 SDValue Offset = DAG.getNode(
12508 ISD::SUB, DL, XLenVT,
12509 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12510 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12511 Concat =
12512 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12513 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12514 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12515 }
12516 return Concat;
12517 }
12518
12519 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12520 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12521 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12522 unsigned MaxVLMAX =
12523 VecVT.isFixedLengthVector()
12524 ? VecVT.getVectorNumElements()
12525 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12526
12527 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12528 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12529
12530 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12531 // to use vrgatherei16.vv.
12532 if (MaxVLMAX > 256 && EltSize == 8) {
12533 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12534 // Reverse each half, then reassemble them in reverse order.
12535 // NOTE: It's also possible that after splitting that VLMAX no longer
12536 // requires vrgatherei16.vv.
12537 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12538 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12539 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12540 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12541 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12542 // Reassemble the low and high pieces reversed.
12543 // FIXME: This is a CONCAT_VECTORS.
12544 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12545 return DAG.getInsertSubvector(DL, Res, Lo,
12546 LoVT.getVectorMinNumElements());
12547 }
12548
12549 // Just promote the int type to i16 which will double the LMUL.
12550 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12551 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12552 }
12553
12554 // At LMUL > 1, do the index computation in 16 bits to reduce register
12555 // pressure.
12556 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12557 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12558 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12559 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12560 IntVT = IntVT.changeVectorElementType(MVT::i16);
12561 }
12562
12563 // Calculate VLMAX-1 for the desired SEW.
12564 SDValue VLMinus1 = DAG.getNode(
12565 ISD::SUB, DL, XLenVT,
12566 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12567 DAG.getConstant(1, DL, XLenVT));
12568
12569 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12570 bool IsRV32E64 =
12571 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12572 SDValue SplatVL;
12573 if (!IsRV32E64)
12574 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12575 else
12576 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12577 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12578
12579 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12580 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12581 DAG.getUNDEF(IntVT), Mask, VL);
12582
12583 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12584 DAG.getUNDEF(ContainerVT), Mask, VL);
12585 if (VecVT.isFixedLengthVector())
12586 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12587 return Gather;
12588}
12589
12590SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12591 SelectionDAG &DAG) const {
12592 SDLoc DL(Op);
12593 SDValue V1 = Op.getOperand(0);
12594 SDValue V2 = Op.getOperand(1);
12595 MVT XLenVT = Subtarget.getXLenVT();
12596 MVT VecVT = Op.getSimpleValueType();
12597
12598 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12599
12600 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12601 SDValue DownOffset, UpOffset;
12602 if (ImmValue >= 0) {
12603 // The operand is a TargetConstant, we need to rebuild it as a regular
12604 // constant.
12605 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12606 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12607 } else {
12608 // The operand is a TargetConstant, we need to rebuild it as a regular
12609 // constant rather than negating the original operand.
12610 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12611 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12612 }
12613
12614 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12615
12616 SDValue SlideDown = getVSlidedown(
12617 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12618 Subtarget.hasVLDependentLatency() ? UpOffset
12619 : DAG.getRegister(RISCV::X0, XLenVT));
12620 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12621 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12623}
12624
12625SDValue
12626RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12627 SelectionDAG &DAG) const {
12628 SDLoc DL(Op);
12629 auto *Load = cast<LoadSDNode>(Op);
12630
12632 Load->getMemoryVT(),
12633 *Load->getMemOperand()) &&
12634 "Expecting a correctly-aligned load");
12635
12636 MVT VT = Op.getSimpleValueType();
12637 MVT XLenVT = Subtarget.getXLenVT();
12638 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12639
12640 // If we know the exact VLEN and our fixed length vector completely fills
12641 // the container, use a whole register load instead.
12642 const auto [MinVLMAX, MaxVLMAX] =
12643 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12644 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12645 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12646 MachineMemOperand *MMO = Load->getMemOperand();
12647 SDValue NewLoad =
12648 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12649 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12650 MMO->getAAInfo(), MMO->getRanges());
12651 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12652 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12653 }
12654
12655 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12656
12657 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12658 SDValue IntID = DAG.getTargetConstant(
12659 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12660 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12661 if (!IsMaskOp)
12662 Ops.push_back(DAG.getUNDEF(ContainerVT));
12663 Ops.push_back(Load->getBasePtr());
12664 Ops.push_back(VL);
12665 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12666 SDValue NewLoad =
12668 Load->getMemoryVT(), Load->getMemOperand());
12669
12670 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12671 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12672}
12673
12674SDValue
12675RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12676 SelectionDAG &DAG) const {
12677 SDLoc DL(Op);
12678 auto *Store = cast<StoreSDNode>(Op);
12679
12681 Store->getMemoryVT(),
12682 *Store->getMemOperand()) &&
12683 "Expecting a correctly-aligned store");
12684
12685 SDValue StoreVal = Store->getValue();
12686 MVT VT = StoreVal.getSimpleValueType();
12687 MVT XLenVT = Subtarget.getXLenVT();
12688
12689 // If the size less than a byte, we need to pad with zeros to make a byte.
12690 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12691 VT = MVT::v8i1;
12692 StoreVal =
12693 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12694 }
12695
12696 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12697
12698 SDValue NewValue =
12699 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12700
12701 // If we know the exact VLEN and our fixed length vector completely fills
12702 // the container, use a whole register store instead.
12703 const auto [MinVLMAX, MaxVLMAX] =
12704 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12705 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12706 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12707 MachineMemOperand *MMO = Store->getMemOperand();
12708 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12709 MMO->getPointerInfo(), MMO->getBaseAlign(),
12710 MMO->getFlags(), MMO->getAAInfo());
12711 }
12712
12713 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12714
12715 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12716 SDValue IntID = DAG.getTargetConstant(
12717 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12718 return DAG.getMemIntrinsicNode(
12719 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12720 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12721 Store->getMemoryVT(), Store->getMemOperand());
12722}
12723
12724SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12725 SelectionDAG &DAG) const {
12726 SDLoc DL(Op);
12727 MVT VT = Op.getSimpleValueType();
12728
12729 const auto *MemSD = cast<MemSDNode>(Op);
12730 EVT MemVT = MemSD->getMemoryVT();
12731 MachineMemOperand *MMO = MemSD->getMemOperand();
12732 SDValue Chain = MemSD->getChain();
12733 SDValue BasePtr = MemSD->getBasePtr();
12734
12735 SDValue Mask, PassThru, VL;
12736 bool IsExpandingLoad = false;
12737 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12738 Mask = VPLoad->getMask();
12739 PassThru = DAG.getUNDEF(VT);
12740 VL = VPLoad->getVectorLength();
12741 } else {
12742 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12743 Mask = MLoad->getMask();
12744 PassThru = MLoad->getPassThru();
12745 IsExpandingLoad = MLoad->isExpandingLoad();
12746 }
12747
12748 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12749
12750 MVT XLenVT = Subtarget.getXLenVT();
12751
12752 MVT ContainerVT = VT;
12753 if (VT.isFixedLengthVector()) {
12754 ContainerVT = getContainerForFixedLengthVector(VT);
12755 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12756 if (!IsUnmasked) {
12757 MVT MaskVT = getMaskTypeFor(ContainerVT);
12758 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12759 }
12760 }
12761
12762 if (!VL)
12763 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12764
12765 SDValue ExpandingVL;
12766 if (!IsUnmasked && IsExpandingLoad) {
12767 ExpandingVL = VL;
12768 VL =
12769 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12770 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12771 }
12772
12773 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12774 : Intrinsic::riscv_vle_mask;
12775 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12776 if (IntID == Intrinsic::riscv_vle)
12777 Ops.push_back(DAG.getUNDEF(ContainerVT));
12778 else
12779 Ops.push_back(PassThru);
12780 Ops.push_back(BasePtr);
12781 if (IntID == Intrinsic::riscv_vle_mask)
12782 Ops.push_back(Mask);
12783 Ops.push_back(VL);
12784 if (IntID == Intrinsic::riscv_vle_mask)
12785 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12786
12787 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12788
12789 SDValue Result =
12790 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12791 Chain = Result.getValue(1);
12792 if (ExpandingVL) {
12793 MVT IndexVT = ContainerVT;
12794 if (ContainerVT.isFloatingPoint())
12795 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12796
12797 MVT IndexEltVT = IndexVT.getVectorElementType();
12798 bool UseVRGATHEREI16 = false;
12799 // If index vector is an i8 vector and the element count exceeds 256, we
12800 // should change the element type of index vector to i16 to avoid
12801 // overflow.
12802 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12803 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12804 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12805 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12806 UseVRGATHEREI16 = true;
12807 }
12808
12809 SDValue Iota =
12810 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12811 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12812 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12813 Result =
12814 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12815 : RISCVISD::VRGATHER_VV_VL,
12816 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12817 }
12818
12819 if (VT.isFixedLengthVector())
12820 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12821
12822 return DAG.getMergeValues({Result, Chain}, DL);
12823}
12824
12825SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12826 SDLoc DL(Op);
12827 MVT VT = Op->getSimpleValueType(0);
12828
12829 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12830 EVT MemVT = VPLoadFF->getMemoryVT();
12831 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12832 SDValue Chain = VPLoadFF->getChain();
12833 SDValue BasePtr = VPLoadFF->getBasePtr();
12834
12835 SDValue Mask = VPLoadFF->getMask();
12836 SDValue VL = VPLoadFF->getVectorLength();
12837
12838 MVT XLenVT = Subtarget.getXLenVT();
12839
12840 MVT ContainerVT = VT;
12841 if (VT.isFixedLengthVector()) {
12842 ContainerVT = getContainerForFixedLengthVector(VT);
12843 MVT MaskVT = getMaskTypeFor(ContainerVT);
12844 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12845 }
12846
12847 unsigned IntID = Intrinsic::riscv_vleff_mask;
12848 SDValue Ops[] = {
12849 Chain,
12850 DAG.getTargetConstant(IntID, DL, XLenVT),
12851 DAG.getUNDEF(ContainerVT),
12852 BasePtr,
12853 Mask,
12854 VL,
12856
12857 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12858
12859 SDValue Result =
12860 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12861 SDValue OutVL = Result.getValue(1);
12862 Chain = Result.getValue(2);
12863
12864 if (VT.isFixedLengthVector())
12865 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12866
12867 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12868}
12869
12870SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12871 SelectionDAG &DAG) const {
12872 SDLoc DL(Op);
12873
12874 const auto *MemSD = cast<MemSDNode>(Op);
12875 EVT MemVT = MemSD->getMemoryVT();
12876 MachineMemOperand *MMO = MemSD->getMemOperand();
12877 SDValue Chain = MemSD->getChain();
12878 SDValue BasePtr = MemSD->getBasePtr();
12879 SDValue Val, Mask, VL;
12880
12881 bool IsCompressingStore = false;
12882 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12883 Val = VPStore->getValue();
12884 Mask = VPStore->getMask();
12885 VL = VPStore->getVectorLength();
12886 } else {
12887 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12888 Val = MStore->getValue();
12889 Mask = MStore->getMask();
12890 IsCompressingStore = MStore->isCompressingStore();
12891 }
12892
12893 bool IsUnmasked =
12894 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12895
12896 MVT VT = Val.getSimpleValueType();
12897 MVT XLenVT = Subtarget.getXLenVT();
12898
12899 MVT ContainerVT = VT;
12900 if (VT.isFixedLengthVector()) {
12901 ContainerVT = getContainerForFixedLengthVector(VT);
12902
12903 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12904 if (!IsUnmasked || IsCompressingStore) {
12905 MVT MaskVT = getMaskTypeFor(ContainerVT);
12906 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12907 }
12908 }
12909
12910 if (!VL)
12911 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12912
12913 if (IsCompressingStore) {
12914 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12915 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
12916 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
12917 VL =
12918 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12919 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12920 }
12921
12922 unsigned IntID =
12923 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
12924 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12925 Ops.push_back(Val);
12926 Ops.push_back(BasePtr);
12927 if (!IsUnmasked)
12928 Ops.push_back(Mask);
12929 Ops.push_back(VL);
12930
12932 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12933}
12934
12935SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
12936 SelectionDAG &DAG) const {
12937 SDLoc DL(Op);
12938 SDValue Val = Op.getOperand(0);
12939 SDValue Mask = Op.getOperand(1);
12940 SDValue Passthru = Op.getOperand(2);
12941
12942 MVT VT = Val.getSimpleValueType();
12943 MVT XLenVT = Subtarget.getXLenVT();
12944 MVT ContainerVT = VT;
12945 if (VT.isFixedLengthVector()) {
12946 ContainerVT = getContainerForFixedLengthVector(VT);
12947 MVT MaskVT = getMaskTypeFor(ContainerVT);
12948 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12949 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12950 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
12951 }
12952
12953 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12954 SDValue Res =
12955 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12956 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
12957 Passthru, Val, Mask, VL);
12958
12959 if (VT.isFixedLengthVector())
12960 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
12961
12962 return Res;
12963}
12964
12965SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
12966 SelectionDAG &DAG) const {
12967 unsigned Opc = Op.getOpcode();
12968 SDLoc DL(Op);
12969 SDValue Chain = Op.getOperand(0);
12970 SDValue Op1 = Op.getOperand(1);
12971 SDValue Op2 = Op.getOperand(2);
12972 SDValue CC = Op.getOperand(3);
12973 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12974 MVT VT = Op.getSimpleValueType();
12975 MVT InVT = Op1.getSimpleValueType();
12976
12977 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
12978 // condition code.
12979 if (Opc == ISD::STRICT_FSETCCS) {
12980 // Expand strict_fsetccs(x, oeq) to
12981 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
12982 SDVTList VTList = Op->getVTList();
12983 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
12984 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
12985 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
12986 Op2, OLECCVal);
12987 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
12988 Op1, OLECCVal);
12989 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12990 Tmp1.getValue(1), Tmp2.getValue(1));
12991 // Tmp1 and Tmp2 might be the same node.
12992 if (Tmp1 != Tmp2)
12993 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
12994 return DAG.getMergeValues({Tmp1, OutChain}, DL);
12995 }
12996
12997 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
12998 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
12999 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13000 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13001 Op2, OEQCCVal);
13002 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13003 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13004 }
13005 }
13006
13007 MVT ContainerInVT = InVT;
13008 if (InVT.isFixedLengthVector()) {
13009 ContainerInVT = getContainerForFixedLengthVector(InVT);
13010 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13011 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13012 }
13013 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13014
13015 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13016
13017 SDValue Res;
13018 if (Opc == ISD::STRICT_FSETCC &&
13019 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13020 CCVal == ISD::SETOLE)) {
13021 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13022 // active when both input elements are ordered.
13023 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13024 SDValue OrderMask1 = DAG.getNode(
13025 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13026 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13027 True, VL});
13028 SDValue OrderMask2 = DAG.getNode(
13029 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13030 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13031 True, VL});
13032 Mask =
13033 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13034 // Use Mask as the passthru operand to let the result be 0 if either of the
13035 // inputs is unordered.
13036 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13037 DAG.getVTList(MaskVT, MVT::Other),
13038 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13039 } else {
13040 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13041 : RISCVISD::STRICT_FSETCCS_VL;
13042 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13043 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13044 }
13045
13046 if (VT.isFixedLengthVector()) {
13047 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13048 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13049 }
13050 return Res;
13051}
13052
13053// Lower vector ABS to smax(X, sub(0, X)).
13054SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13055 SDLoc DL(Op);
13056 MVT VT = Op.getSimpleValueType();
13057 SDValue X = Op.getOperand(0);
13058
13059 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13060 "Unexpected type for ISD::ABS");
13061
13062 MVT ContainerVT = VT;
13063 if (VT.isFixedLengthVector()) {
13064 ContainerVT = getContainerForFixedLengthVector(VT);
13065 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13066 }
13067
13068 SDValue Mask, VL;
13069 if (Op->getOpcode() == ISD::VP_ABS) {
13070 Mask = Op->getOperand(1);
13071 if (VT.isFixedLengthVector())
13072 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13073 Subtarget);
13074 VL = Op->getOperand(2);
13075 } else
13076 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13077
13078 SDValue SplatZero = DAG.getNode(
13079 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13080 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13081 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13082 DAG.getUNDEF(ContainerVT), Mask, VL);
13083 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13084 DAG.getUNDEF(ContainerVT), Mask, VL);
13085
13086 if (VT.isFixedLengthVector())
13087 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13088 return Max;
13089}
13090
13091SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13092 SelectionDAG &DAG) const {
13093 const auto &TSInfo =
13094 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13095
13096 unsigned NewOpc = getRISCVVLOp(Op);
13097 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13098 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13099
13100 MVT VT = Op.getSimpleValueType();
13101 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13102
13103 // Create list of operands by converting existing ones to scalable types.
13105 for (const SDValue &V : Op->op_values()) {
13106 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13107
13108 // Pass through non-vector operands.
13109 if (!V.getValueType().isVector()) {
13110 Ops.push_back(V);
13111 continue;
13112 }
13113
13114 // "cast" fixed length vector to a scalable vector.
13115 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13116 "Only fixed length vectors are supported!");
13117 MVT VContainerVT = ContainerVT.changeVectorElementType(
13118 V.getSimpleValueType().getVectorElementType());
13119 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13120 }
13121
13122 SDLoc DL(Op);
13123 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13124 if (HasPassthruOp)
13125 Ops.push_back(DAG.getUNDEF(ContainerVT));
13126 if (HasMask)
13127 Ops.push_back(Mask);
13128 Ops.push_back(VL);
13129
13130 // StrictFP operations have two result values. Their lowered result should
13131 // have same result count.
13132 if (Op->isStrictFPOpcode()) {
13133 SDValue ScalableRes =
13134 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13135 Op->getFlags());
13136 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13137 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13138 }
13139
13140 SDValue ScalableRes =
13141 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13142 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13143}
13144
13145// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13146// * Operands of each node are assumed to be in the same order.
13147// * The EVL operand is promoted from i32 to i64 on RV64.
13148// * Fixed-length vectors are converted to their scalable-vector container
13149// types.
13150SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13151 const auto &TSInfo =
13152 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13153
13154 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13155 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13156
13157 SDLoc DL(Op);
13158 MVT VT = Op.getSimpleValueType();
13160
13161 MVT ContainerVT = VT;
13162 if (VT.isFixedLengthVector())
13163 ContainerVT = getContainerForFixedLengthVector(VT);
13164
13165 for (const auto &OpIdx : enumerate(Op->ops())) {
13166 SDValue V = OpIdx.value();
13167 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13168 // Add dummy passthru value before the mask. Or if there isn't a mask,
13169 // before EVL.
13170 if (HasPassthruOp) {
13171 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13172 if (MaskIdx) {
13173 if (*MaskIdx == OpIdx.index())
13174 Ops.push_back(DAG.getUNDEF(ContainerVT));
13175 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13176 OpIdx.index()) {
13177 if (Op.getOpcode() == ISD::VP_MERGE) {
13178 // For VP_MERGE, copy the false operand instead of an undef value.
13179 Ops.push_back(Ops.back());
13180 } else {
13181 assert(Op.getOpcode() == ISD::VP_SELECT);
13182 // For VP_SELECT, add an undef value.
13183 Ops.push_back(DAG.getUNDEF(ContainerVT));
13184 }
13185 }
13186 }
13187 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13188 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13189 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13191 Subtarget.getXLenVT()));
13192 // Pass through operands which aren't fixed-length vectors.
13193 if (!V.getValueType().isFixedLengthVector()) {
13194 Ops.push_back(V);
13195 continue;
13196 }
13197 // "cast" fixed length vector to a scalable vector.
13198 MVT OpVT = V.getSimpleValueType();
13199 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13200 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13201 "Only fixed length vectors are supported!");
13202 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13203 }
13204
13205 if (!VT.isFixedLengthVector())
13206 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13207
13208 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13209
13210 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13211}
13212
13213SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13214 SelectionDAG &DAG) const {
13215 SDLoc DL(Op);
13216 MVT VT = Op.getSimpleValueType();
13217
13218 SDValue Src = Op.getOperand(0);
13219 // NOTE: Mask is dropped.
13220 SDValue VL = Op.getOperand(2);
13221
13222 MVT ContainerVT = VT;
13223 if (VT.isFixedLengthVector()) {
13224 ContainerVT = getContainerForFixedLengthVector(VT);
13225 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13226 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13227 }
13228
13229 MVT XLenVT = Subtarget.getXLenVT();
13230 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13231 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13232 DAG.getUNDEF(ContainerVT), Zero, VL);
13233
13234 SDValue SplatValue = DAG.getSignedConstant(
13235 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13236 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13237 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13238
13239 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13240 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13241 if (!VT.isFixedLengthVector())
13242 return Result;
13243 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13244}
13245
13246SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13247 SelectionDAG &DAG) const {
13248 SDLoc DL(Op);
13249 MVT VT = Op.getSimpleValueType();
13250
13251 SDValue Op1 = Op.getOperand(0);
13252 SDValue Op2 = Op.getOperand(1);
13253 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13254 // NOTE: Mask is dropped.
13255 SDValue VL = Op.getOperand(4);
13256
13257 MVT ContainerVT = VT;
13258 if (VT.isFixedLengthVector()) {
13259 ContainerVT = getContainerForFixedLengthVector(VT);
13260 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13261 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13262 }
13263
13265 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13266
13267 switch (Condition) {
13268 default:
13269 break;
13270 // X != Y --> (X^Y)
13271 case ISD::SETNE:
13272 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13273 break;
13274 // X == Y --> ~(X^Y)
13275 case ISD::SETEQ: {
13276 SDValue Temp =
13277 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13278 Result =
13279 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13280 break;
13281 }
13282 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13283 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13284 case ISD::SETGT:
13285 case ISD::SETULT: {
13286 SDValue Temp =
13287 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13288 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13289 break;
13290 }
13291 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13292 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13293 case ISD::SETLT:
13294 case ISD::SETUGT: {
13295 SDValue Temp =
13296 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13297 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13298 break;
13299 }
13300 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13301 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13302 case ISD::SETGE:
13303 case ISD::SETULE: {
13304 SDValue Temp =
13305 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13306 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13307 break;
13308 }
13309 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13310 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13311 case ISD::SETLE:
13312 case ISD::SETUGE: {
13313 SDValue Temp =
13314 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13315 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13316 break;
13317 }
13318 }
13319
13320 if (!VT.isFixedLengthVector())
13321 return Result;
13322 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13323}
13324
13325// Lower Floating-Point/Integer Type-Convert VP SDNodes
13326SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13327 SelectionDAG &DAG) const {
13328 SDLoc DL(Op);
13329
13330 SDValue Src = Op.getOperand(0);
13331 SDValue Mask = Op.getOperand(1);
13332 SDValue VL = Op.getOperand(2);
13333 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13334
13335 MVT DstVT = Op.getSimpleValueType();
13336 MVT SrcVT = Src.getSimpleValueType();
13337 if (DstVT.isFixedLengthVector()) {
13338 DstVT = getContainerForFixedLengthVector(DstVT);
13339 SrcVT = getContainerForFixedLengthVector(SrcVT);
13340 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13341 MVT MaskVT = getMaskTypeFor(DstVT);
13342 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13343 }
13344
13345 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13346 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13347
13349 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13350 if (SrcVT.isInteger()) {
13351 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13352
13353 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13354 ? RISCVISD::VSEXT_VL
13355 : RISCVISD::VZEXT_VL;
13356
13357 // Do we need to do any pre-widening before converting?
13358 if (SrcEltSize == 1) {
13359 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13360 MVT XLenVT = Subtarget.getXLenVT();
13361 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13362 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13363 DAG.getUNDEF(IntVT), Zero, VL);
13364 SDValue One = DAG.getSignedConstant(
13365 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13366 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13367 DAG.getUNDEF(IntVT), One, VL);
13368 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13369 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13370 } else if (DstEltSize > (2 * SrcEltSize)) {
13371 // Widen before converting.
13372 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13373 DstVT.getVectorElementCount());
13374 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13375 }
13376
13377 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13378 } else {
13379 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13380 "Wrong input/output vector types");
13381
13382 // Convert f16 to f32 then convert f32 to i64.
13383 if (DstEltSize > (2 * SrcEltSize)) {
13384 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13385 MVT InterimFVT =
13386 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13387 Src =
13388 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13389 }
13390
13391 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13392 }
13393 } else { // Narrowing + Conversion
13394 if (SrcVT.isInteger()) {
13395 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13396 // First do a narrowing convert to an FP type half the size, then round
13397 // the FP type to a small FP type if needed.
13398
13399 MVT InterimFVT = DstVT;
13400 if (SrcEltSize > (2 * DstEltSize)) {
13401 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13402 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13403 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13404 }
13405
13406 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13407
13408 if (InterimFVT != DstVT) {
13409 Src = Result;
13410 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13411 }
13412 } else {
13413 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13414 "Wrong input/output vector types");
13415 // First do a narrowing conversion to an integer half the size, then
13416 // truncate if needed.
13417
13418 if (DstEltSize == 1) {
13419 // First convert to the same size integer, then convert to mask using
13420 // setcc.
13421 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13422 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13423 DstVT.getVectorElementCount());
13424 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13425
13426 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13427 // otherwise the conversion was undefined.
13428 MVT XLenVT = Subtarget.getXLenVT();
13429 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13430 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13431 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13432 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13433 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13434 DAG.getUNDEF(DstVT), Mask, VL});
13435 } else {
13436 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13437 DstVT.getVectorElementCount());
13438
13439 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13440
13441 while (InterimIVT != DstVT) {
13442 SrcEltSize /= 2;
13443 Src = Result;
13444 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13445 DstVT.getVectorElementCount());
13446 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13447 Src, Mask, VL);
13448 }
13449 }
13450 }
13451 }
13452
13453 MVT VT = Op.getSimpleValueType();
13454 if (!VT.isFixedLengthVector())
13455 return Result;
13456 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13457}
13458
13459SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13460 SelectionDAG &DAG) const {
13461 SDLoc DL(Op);
13462 MVT VT = Op.getSimpleValueType();
13463 MVT XLenVT = Subtarget.getXLenVT();
13464
13465 SDValue Mask = Op.getOperand(0);
13466 SDValue TrueVal = Op.getOperand(1);
13467 SDValue FalseVal = Op.getOperand(2);
13468 SDValue VL = Op.getOperand(3);
13469
13470 // Use default legalization if a vector of EVL type would be legal.
13471 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13473 if (isTypeLegal(EVLVecVT))
13474 return SDValue();
13475
13476 MVT ContainerVT = VT;
13477 if (VT.isFixedLengthVector()) {
13478 ContainerVT = getContainerForFixedLengthVector(VT);
13479 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13480 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13481 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13482 }
13483
13484 // Promote to a vector of i8.
13485 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13486
13487 // Promote TrueVal and FalseVal using VLMax.
13488 // FIXME: Is there a better way to do this?
13489 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13490 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13491 DAG.getUNDEF(PromotedVT),
13492 DAG.getConstant(1, DL, XLenVT), VLMax);
13493 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13494 DAG.getUNDEF(PromotedVT),
13495 DAG.getConstant(0, DL, XLenVT), VLMax);
13496 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13497 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13498 // Any element past VL uses FalseVal, so use VLMax
13499 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13500 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13501
13502 // VP_MERGE the two promoted values.
13503 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13504 TrueVal, FalseVal, FalseVal, VL);
13505
13506 // Convert back to mask.
13507 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13508 SDValue Result = DAG.getNode(
13509 RISCVISD::SETCC_VL, DL, ContainerVT,
13510 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13511 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13512
13513 if (VT.isFixedLengthVector())
13514 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13515 return Result;
13516}
13517
13518SDValue
13519RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13520 SelectionDAG &DAG) const {
13521 using namespace SDPatternMatch;
13522
13523 SDLoc DL(Op);
13524
13525 SDValue Op1 = Op.getOperand(0);
13526 SDValue Op2 = Op.getOperand(1);
13527 SDValue Offset = Op.getOperand(2);
13528 SDValue Mask = Op.getOperand(3);
13529 SDValue EVL1 = Op.getOperand(4);
13530 SDValue EVL2 = Op.getOperand(5);
13531
13532 const MVT XLenVT = Subtarget.getXLenVT();
13533 MVT VT = Op.getSimpleValueType();
13534 MVT ContainerVT = VT;
13535 if (VT.isFixedLengthVector()) {
13536 ContainerVT = getContainerForFixedLengthVector(VT);
13537 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13538 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13539 MVT MaskVT = getMaskTypeFor(ContainerVT);
13540 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13541 }
13542
13543 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13544 if (IsMaskVector) {
13545 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13546
13547 // Expand input operands
13548 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13549 DAG.getUNDEF(ContainerVT),
13550 DAG.getConstant(1, DL, XLenVT), EVL1);
13551 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13552 DAG.getUNDEF(ContainerVT),
13553 DAG.getConstant(0, DL, XLenVT), EVL1);
13554 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13555 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13556
13557 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13558 DAG.getUNDEF(ContainerVT),
13559 DAG.getConstant(1, DL, XLenVT), EVL2);
13560 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13561 DAG.getUNDEF(ContainerVT),
13562 DAG.getConstant(0, DL, XLenVT), EVL2);
13563 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13564 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13565 }
13566
13567 auto getVectorFirstEle = [](SDValue Vec) {
13568 SDValue FirstEle;
13569 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13570 return FirstEle;
13571
13572 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13574 return Vec.getOperand(0);
13575
13576 return SDValue();
13577 };
13578
13579 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13580 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13581 MVT EltVT = ContainerVT.getVectorElementType();
13583 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13584 EltVT == MVT::bf16) {
13585 EltVT = EltVT.changeTypeToInteger();
13586 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13587 Op2 = DAG.getBitcast(ContainerVT, Op2);
13588 FirstEle =
13589 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13590 }
13591 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13592 : RISCVISD::VSLIDE1UP_VL,
13593 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13594 FirstEle, Mask, EVL2);
13595 Result = DAG.getBitcast(
13597 Result);
13598 return VT.isFixedLengthVector()
13599 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13600 : Result;
13601 }
13602
13603 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13604 SDValue DownOffset, UpOffset;
13605 if (ImmValue >= 0) {
13606 // The operand is a TargetConstant, we need to rebuild it as a regular
13607 // constant.
13608 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13609 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13610 } else {
13611 // The operand is a TargetConstant, we need to rebuild it as a regular
13612 // constant rather than negating the original operand.
13613 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13614 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13615 }
13616
13617 if (ImmValue != 0)
13618 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13619 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13620 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13621 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13622 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13623
13624 if (IsMaskVector) {
13625 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13626 Result = DAG.getNode(
13627 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13628 {Result, DAG.getConstant(0, DL, ContainerVT),
13629 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13630 Mask, EVL2});
13631 }
13632
13633 if (!VT.isFixedLengthVector())
13634 return Result;
13635 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13636}
13637
13638SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13639 SelectionDAG &DAG) const {
13640 SDLoc DL(Op);
13641 SDValue Val = Op.getOperand(0);
13642 SDValue Mask = Op.getOperand(1);
13643 SDValue VL = Op.getOperand(2);
13644 MVT VT = Op.getSimpleValueType();
13645
13646 MVT ContainerVT = VT;
13647 if (VT.isFixedLengthVector()) {
13648 ContainerVT = getContainerForFixedLengthVector(VT);
13649 MVT MaskVT = getMaskTypeFor(ContainerVT);
13650 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13651 }
13652
13654 if (VT.getScalarType() == MVT::i1) {
13655 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13656 Result =
13657 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13658 ContainerVT, VL);
13659 } else {
13660 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13661 SDValue LHS =
13662 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13663 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13664 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13665 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13666 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13667 DAG.getUNDEF(ContainerVT), Mask, VL});
13668 }
13669 } else {
13670 Result =
13671 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13672 }
13673
13674 if (!VT.isFixedLengthVector())
13675 return Result;
13676 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13677}
13678
13679SDValue
13680RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13681 SelectionDAG &DAG) const {
13682 SDLoc DL(Op);
13683 MVT VT = Op.getSimpleValueType();
13684 MVT XLenVT = Subtarget.getXLenVT();
13685
13686 SDValue Op1 = Op.getOperand(0);
13687 SDValue Mask = Op.getOperand(1);
13688 SDValue EVL = Op.getOperand(2);
13689
13690 MVT ContainerVT = VT;
13691 if (VT.isFixedLengthVector()) {
13692 ContainerVT = getContainerForFixedLengthVector(VT);
13693 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13694 MVT MaskVT = getMaskTypeFor(ContainerVT);
13695 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13696 }
13697
13698 MVT GatherVT = ContainerVT;
13699 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13700 // Check if we are working with mask vectors
13701 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13702 if (IsMaskVector) {
13703 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13704
13705 // Expand input operand
13706 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13707 DAG.getUNDEF(IndicesVT),
13708 DAG.getConstant(1, DL, XLenVT), EVL);
13709 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13710 DAG.getUNDEF(IndicesVT),
13711 DAG.getConstant(0, DL, XLenVT), EVL);
13712 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13713 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13714 }
13715
13716 unsigned EltSize = GatherVT.getScalarSizeInBits();
13717 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13718 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13719 unsigned MaxVLMAX =
13720 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13721
13722 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13723 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13724 // to use vrgatherei16.vv.
13725 // TODO: It's also possible to use vrgatherei16.vv for other types to
13726 // decrease register width for the index calculation.
13727 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13728 if (MaxVLMAX > 256 && EltSize == 8) {
13729 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13730 // Split the vector in half and reverse each half using a full register
13731 // reverse.
13732 // Swap the halves and concatenate them.
13733 // Slide the concatenated result by (VLMax - VL).
13734 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13735 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13736 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13737
13738 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13739 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13740
13741 // Reassemble the low and high pieces reversed.
13742 // NOTE: this Result is unmasked (because we do not need masks for
13743 // shuffles). If in the future this has to change, we can use a SELECT_VL
13744 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13745 SDValue Result =
13746 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13747
13748 // Slide off any elements from past EVL that were reversed into the low
13749 // elements.
13750 unsigned MinElts = GatherVT.getVectorMinNumElements();
13751 SDValue VLMax =
13752 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13753 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13754
13755 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13756 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13757
13758 if (IsMaskVector) {
13759 // Truncate Result back to a mask vector
13760 Result =
13761 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13762 {Result, DAG.getConstant(0, DL, GatherVT),
13764 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13765 }
13766
13767 if (!VT.isFixedLengthVector())
13768 return Result;
13769 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13770 }
13771
13772 // Just promote the int type to i16 which will double the LMUL.
13773 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13774 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13775 }
13776
13777 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13778 SDValue VecLen =
13779 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13780 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13781 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13782 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13783 DAG.getUNDEF(IndicesVT), Mask, EVL);
13784 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13785 DAG.getUNDEF(GatherVT), Mask, EVL);
13786
13787 if (IsMaskVector) {
13788 // Truncate Result back to a mask vector
13789 Result = DAG.getNode(
13790 RISCVISD::SETCC_VL, DL, ContainerVT,
13791 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13792 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13793 }
13794
13795 if (!VT.isFixedLengthVector())
13796 return Result;
13797 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13798}
13799
13800SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13801 SelectionDAG &DAG) const {
13802 MVT VT = Op.getSimpleValueType();
13803 if (VT.getVectorElementType() != MVT::i1)
13804 return lowerVPOp(Op, DAG);
13805
13806 // It is safe to drop mask parameter as masked-off elements are undef.
13807 SDValue Op1 = Op->getOperand(0);
13808 SDValue Op2 = Op->getOperand(1);
13809 SDValue VL = Op->getOperand(3);
13810
13811 MVT ContainerVT = VT;
13812 const bool IsFixed = VT.isFixedLengthVector();
13813 if (IsFixed) {
13814 ContainerVT = getContainerForFixedLengthVector(VT);
13815 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13816 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13817 }
13818
13819 SDLoc DL(Op);
13820 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13821 if (!IsFixed)
13822 return Val;
13823 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13824}
13825
13826SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13827 SelectionDAG &DAG) const {
13828 SDLoc DL(Op);
13829 MVT XLenVT = Subtarget.getXLenVT();
13830 MVT VT = Op.getSimpleValueType();
13831 MVT ContainerVT = VT;
13832 if (VT.isFixedLengthVector())
13833 ContainerVT = getContainerForFixedLengthVector(VT);
13834
13835 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13836
13837 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13838 // Check if the mask is known to be all ones
13839 SDValue Mask = VPNode->getMask();
13840 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13841
13842 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13843 : Intrinsic::riscv_vlse_mask,
13844 DL, XLenVT);
13845 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13846 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13847 VPNode->getStride()};
13848 if (!IsUnmasked) {
13849 if (VT.isFixedLengthVector()) {
13850 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13851 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13852 }
13853 Ops.push_back(Mask);
13854 }
13855 Ops.push_back(VPNode->getVectorLength());
13856 if (!IsUnmasked) {
13857 SDValue Policy =
13859 Ops.push_back(Policy);
13860 }
13861
13862 SDValue Result =
13864 VPNode->getMemoryVT(), VPNode->getMemOperand());
13865 SDValue Chain = Result.getValue(1);
13866
13867 if (VT.isFixedLengthVector())
13868 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13869
13870 return DAG.getMergeValues({Result, Chain}, DL);
13871}
13872
13873SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13874 SelectionDAG &DAG) const {
13875 SDLoc DL(Op);
13876 MVT XLenVT = Subtarget.getXLenVT();
13877
13878 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13879 SDValue StoreVal = VPNode->getValue();
13880 MVT VT = StoreVal.getSimpleValueType();
13881 MVT ContainerVT = VT;
13882 if (VT.isFixedLengthVector()) {
13883 ContainerVT = getContainerForFixedLengthVector(VT);
13884 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13885 }
13886
13887 // Check if the mask is known to be all ones
13888 SDValue Mask = VPNode->getMask();
13889 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13890
13891 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13892 : Intrinsic::riscv_vsse_mask,
13893 DL, XLenVT);
13894 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13895 VPNode->getBasePtr(), VPNode->getStride()};
13896 if (!IsUnmasked) {
13897 if (VT.isFixedLengthVector()) {
13898 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13899 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13900 }
13901 Ops.push_back(Mask);
13902 }
13903 Ops.push_back(VPNode->getVectorLength());
13904
13905 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
13906 Ops, VPNode->getMemoryVT(),
13907 VPNode->getMemOperand());
13908}
13909
13910// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13911// matched to a RVV indexed load. The RVV indexed load instructions only
13912// support the "unsigned unscaled" addressing mode; indices are implicitly
13913// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13914// signed or scaled indexing is extended to the XLEN value type and scaled
13915// accordingly.
13916SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
13917 SelectionDAG &DAG) const {
13918 SDLoc DL(Op);
13919 MVT VT = Op.getSimpleValueType();
13920
13921 const auto *MemSD = cast<MemSDNode>(Op.getNode());
13922 EVT MemVT = MemSD->getMemoryVT();
13923 MachineMemOperand *MMO = MemSD->getMemOperand();
13924 SDValue Chain = MemSD->getChain();
13925 SDValue BasePtr = MemSD->getBasePtr();
13926
13927 [[maybe_unused]] ISD::LoadExtType LoadExtType;
13928 SDValue Index, Mask, PassThru, VL;
13929
13930 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
13931 Index = VPGN->getIndex();
13932 Mask = VPGN->getMask();
13933 PassThru = DAG.getUNDEF(VT);
13934 VL = VPGN->getVectorLength();
13935 // VP doesn't support extending loads.
13937 } else {
13938 // Else it must be a MGATHER.
13939 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
13940 Index = MGN->getIndex();
13941 Mask = MGN->getMask();
13942 PassThru = MGN->getPassThru();
13943 LoadExtType = MGN->getExtensionType();
13944 }
13945
13946 MVT IndexVT = Index.getSimpleValueType();
13947 MVT XLenVT = Subtarget.getXLenVT();
13948
13950 "Unexpected VTs!");
13951 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
13952 // Targets have to explicitly opt-in for extending vector loads.
13953 assert(LoadExtType == ISD::NON_EXTLOAD &&
13954 "Unexpected extending MGATHER/VP_GATHER");
13955
13956 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
13957 // the selection of the masked intrinsics doesn't do this for us.
13958 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13959
13960 MVT ContainerVT = VT;
13961 if (VT.isFixedLengthVector()) {
13962 ContainerVT = getContainerForFixedLengthVector(VT);
13963 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
13964 ContainerVT.getVectorElementCount());
13965
13966 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
13967
13968 if (!IsUnmasked) {
13969 MVT MaskVT = getMaskTypeFor(ContainerVT);
13970 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13971 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
13972 }
13973 }
13974
13975 if (!VL)
13976 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13977
13978 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
13979 IndexVT = IndexVT.changeVectorElementType(XLenVT);
13980 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
13981 }
13982
13983 unsigned IntID =
13984 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
13985 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13986 if (IsUnmasked)
13987 Ops.push_back(DAG.getUNDEF(ContainerVT));
13988 else
13989 Ops.push_back(PassThru);
13990 Ops.push_back(BasePtr);
13991 Ops.push_back(Index);
13992 if (!IsUnmasked)
13993 Ops.push_back(Mask);
13994 Ops.push_back(VL);
13995 if (!IsUnmasked)
13997
13998 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13999 SDValue Result =
14000 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14001 Chain = Result.getValue(1);
14002
14003 if (VT.isFixedLengthVector())
14004 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14005
14006 return DAG.getMergeValues({Result, Chain}, DL);
14007}
14008
14009// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14010// matched to a RVV indexed store. The RVV indexed store instructions only
14011// support the "unsigned unscaled" addressing mode; indices are implicitly
14012// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14013// signed or scaled indexing is extended to the XLEN value type and scaled
14014// accordingly.
14015SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14016 SelectionDAG &DAG) const {
14017 SDLoc DL(Op);
14018 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14019 EVT MemVT = MemSD->getMemoryVT();
14020 MachineMemOperand *MMO = MemSD->getMemOperand();
14021 SDValue Chain = MemSD->getChain();
14022 SDValue BasePtr = MemSD->getBasePtr();
14023
14024 [[maybe_unused]] bool IsTruncatingStore = false;
14025 SDValue Index, Mask, Val, VL;
14026
14027 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14028 Index = VPSN->getIndex();
14029 Mask = VPSN->getMask();
14030 Val = VPSN->getValue();
14031 VL = VPSN->getVectorLength();
14032 // VP doesn't support truncating stores.
14033 IsTruncatingStore = false;
14034 } else {
14035 // Else it must be a MSCATTER.
14036 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14037 Index = MSN->getIndex();
14038 Mask = MSN->getMask();
14039 Val = MSN->getValue();
14040 IsTruncatingStore = MSN->isTruncatingStore();
14041 }
14042
14043 MVT VT = Val.getSimpleValueType();
14044 MVT IndexVT = Index.getSimpleValueType();
14045 MVT XLenVT = Subtarget.getXLenVT();
14046
14048 "Unexpected VTs!");
14049 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14050 // Targets have to explicitly opt-in for extending vector loads and
14051 // truncating vector stores.
14052 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14053
14054 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14055 // the selection of the masked intrinsics doesn't do this for us.
14056 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14057
14058 MVT ContainerVT = VT;
14059 if (VT.isFixedLengthVector()) {
14060 ContainerVT = getContainerForFixedLengthVector(VT);
14061 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14062 ContainerVT.getVectorElementCount());
14063
14064 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14065 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14066
14067 if (!IsUnmasked) {
14068 MVT MaskVT = getMaskTypeFor(ContainerVT);
14069 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14070 }
14071 }
14072
14073 if (!VL)
14074 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14075
14076 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14077 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14078 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14079 }
14080
14081 unsigned IntID =
14082 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14083 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14084 Ops.push_back(Val);
14085 Ops.push_back(BasePtr);
14086 Ops.push_back(Index);
14087 if (!IsUnmasked)
14088 Ops.push_back(Mask);
14089 Ops.push_back(VL);
14090
14092 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14093}
14094
14095SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14096 SelectionDAG &DAG) const {
14097 const MVT XLenVT = Subtarget.getXLenVT();
14098 SDLoc DL(Op);
14099 SDValue Chain = Op->getOperand(0);
14100 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14101 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14102 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14103
14104 // Encoding used for rounding mode in RISC-V differs from that used in
14105 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14106 // table, which consists of a sequence of 4-bit fields, each representing
14107 // corresponding FLT_ROUNDS mode.
14108 static const int Table =
14114
14115 SDValue Shift =
14116 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14117 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14118 DAG.getConstant(Table, DL, XLenVT), Shift);
14119 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14120 DAG.getConstant(7, DL, XLenVT));
14121
14122 return DAG.getMergeValues({Masked, Chain}, DL);
14123}
14124
14125SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14126 SelectionDAG &DAG) const {
14127 const MVT XLenVT = Subtarget.getXLenVT();
14128 SDLoc DL(Op);
14129 SDValue Chain = Op->getOperand(0);
14130 SDValue RMValue = Op->getOperand(1);
14131 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14132
14133 // Encoding used for rounding mode in RISC-V differs from that used in
14134 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14135 // a table, which consists of a sequence of 4-bit fields, each representing
14136 // corresponding RISC-V mode.
14137 static const unsigned Table =
14143
14144 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14145
14146 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14147 DAG.getConstant(2, DL, XLenVT));
14148 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14149 DAG.getConstant(Table, DL, XLenVT), Shift);
14150 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14151 DAG.getConstant(0x7, DL, XLenVT));
14152 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14153 RMValue);
14154}
14155
14156SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14157 SelectionDAG &DAG) const {
14158 const MVT XLenVT = Subtarget.getXLenVT();
14159 SDLoc DL(Op);
14160 SDValue Chain = Op->getOperand(0);
14161 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14162 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14163 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14164}
14165
14166SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14167 SelectionDAG &DAG) const {
14168 const MVT XLenVT = Subtarget.getXLenVT();
14169 SDLoc DL(Op);
14170 SDValue Chain = Op->getOperand(0);
14171 SDValue EnvValue = Op->getOperand(1);
14172 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14173
14174 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14175 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14176 EnvValue);
14177}
14178
14179SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14180 SelectionDAG &DAG) const {
14181 const MVT XLenVT = Subtarget.getXLenVT();
14182 SDLoc DL(Op);
14183 SDValue Chain = Op->getOperand(0);
14184 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14185 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14186
14187 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14188 EnvValue);
14189}
14190
14191const uint64_t ModeMask64 = ~RISCVExceptFlags::ALL;
14192const uint32_t ModeMask32 = ~RISCVExceptFlags::ALL;
14193
14194SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14195 SelectionDAG &DAG) const {
14196 const MVT XLenVT = Subtarget.getXLenVT();
14197 SDLoc DL(Op);
14198 SDValue Chain = Op->getOperand(0);
14199 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14200 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14201 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14202 Chain = Result.getValue(1);
14203 return DAG.getMergeValues({Result, Chain}, DL);
14204}
14205
14206SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14207 SelectionDAG &DAG) const {
14208 const MVT XLenVT = Subtarget.getXLenVT();
14209 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14210 SDLoc DL(Op);
14211 SDValue Chain = Op->getOperand(0);
14212 SDValue EnvValue = Op->getOperand(1);
14213 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14214 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14215
14216 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14217 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14218 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14219 ModeMask);
14220 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14221 EnvValue);
14222}
14223
14224SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14225 SelectionDAG &DAG) const {
14226 const MVT XLenVT = Subtarget.getXLenVT();
14227 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14228 SDLoc DL(Op);
14229 SDValue Chain = Op->getOperand(0);
14230 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14231 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14232
14233 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14234 ModeMask);
14235}
14236
14237SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14238 SelectionDAG &DAG) const {
14240
14241 bool isRISCV64 = Subtarget.is64Bit();
14242 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14243
14244 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14245 return DAG.getFrameIndex(FI, PtrVT);
14246}
14247
14248// Returns the opcode of the target-specific SDNode that implements the 32-bit
14249// form of the given Opcode.
14250static unsigned getRISCVWOpcode(unsigned Opcode) {
14251 switch (Opcode) {
14252 default:
14253 llvm_unreachable("Unexpected opcode");
14254 case ISD::SHL:
14255 return RISCVISD::SLLW;
14256 case ISD::SRA:
14257 return RISCVISD::SRAW;
14258 case ISD::SRL:
14259 return RISCVISD::SRLW;
14260 case ISD::SDIV:
14261 return RISCVISD::DIVW;
14262 case ISD::UDIV:
14263 return RISCVISD::DIVUW;
14264 case ISD::UREM:
14265 return RISCVISD::REMUW;
14266 case ISD::ROTL:
14267 return RISCVISD::ROLW;
14268 case ISD::ROTR:
14269 return RISCVISD::RORW;
14270 }
14271}
14272
14273// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14274// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14275// otherwise be promoted to i64, making it difficult to select the
14276// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14277// type i8/i16/i32 is lost.
14279 unsigned ExtOpc = ISD::ANY_EXTEND) {
14280 SDLoc DL(N);
14281 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14282 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14283 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14284 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14285 // ReplaceNodeResults requires we maintain the same type for the return value.
14286 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14287}
14288
14289// Converts the given 32-bit operation to a i64 operation with signed extension
14290// semantic to reduce the signed extension instructions.
14292 SDLoc DL(N);
14293 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14294 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14295 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14296 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14297 DAG.getValueType(MVT::i32));
14298 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14299}
14300
14303 SelectionDAG &DAG) const {
14304 SDLoc DL(N);
14305 switch (N->getOpcode()) {
14306 default:
14307 llvm_unreachable("Don't know how to custom type legalize this operation!");
14310 case ISD::FP_TO_SINT:
14311 case ISD::FP_TO_UINT: {
14312 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14313 "Unexpected custom legalisation");
14314 bool IsStrict = N->isStrictFPOpcode();
14315 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14316 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14317 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14318 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14320 if (!isTypeLegal(Op0.getValueType()))
14321 return;
14322 if (IsStrict) {
14323 SDValue Chain = N->getOperand(0);
14324 // In absence of Zfh, promote f16 to f32, then convert.
14325 if (Op0.getValueType() == MVT::f16 &&
14326 !Subtarget.hasStdExtZfhOrZhinx()) {
14327 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14328 {Chain, Op0});
14329 Chain = Op0.getValue(1);
14330 }
14331 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14332 : RISCVISD::STRICT_FCVT_WU_RV64;
14333 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14334 SDValue Res = DAG.getNode(
14335 Opc, DL, VTs, Chain, Op0,
14336 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14337 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14338 Results.push_back(Res.getValue(1));
14339 return;
14340 }
14341 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14342 // convert.
14343 if ((Op0.getValueType() == MVT::f16 &&
14344 !Subtarget.hasStdExtZfhOrZhinx()) ||
14345 Op0.getValueType() == MVT::bf16)
14346 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14347
14348 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14349 SDValue Res =
14350 DAG.getNode(Opc, DL, MVT::i64, Op0,
14351 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14352 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14353 return;
14354 }
14355 // If the FP type needs to be softened, emit a library call using the 'si'
14356 // version. If we left it to default legalization we'd end up with 'di'. If
14357 // the FP type doesn't need to be softened just let generic type
14358 // legalization promote the result type.
14359 RTLIB::Libcall LC;
14360 if (IsSigned)
14361 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14362 else
14363 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14364 MakeLibCallOptions CallOptions;
14365 EVT OpVT = Op0.getValueType();
14366 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14367 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14368 SDValue Result;
14369 std::tie(Result, Chain) =
14370 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14371 Results.push_back(Result);
14372 if (IsStrict)
14373 Results.push_back(Chain);
14374 break;
14375 }
14376 case ISD::LROUND: {
14377 SDValue Op0 = N->getOperand(0);
14378 EVT Op0VT = Op0.getValueType();
14379 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14381 if (!isTypeLegal(Op0VT))
14382 return;
14383
14384 // In absence of Zfh, promote f16 to f32, then convert.
14385 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14386 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14387
14388 SDValue Res =
14389 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14390 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14391 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14392 return;
14393 }
14394 // If the FP type needs to be softened, emit a library call to lround. We'll
14395 // need to truncate the result. We assume any value that doesn't fit in i32
14396 // is allowed to return an unspecified value.
14397 RTLIB::Libcall LC =
14398 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14399 MakeLibCallOptions CallOptions;
14400 EVT OpVT = Op0.getValueType();
14401 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14402 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14403 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14404 Results.push_back(Result);
14405 break;
14406 }
14409 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14410 "has custom type legalization on riscv32");
14411
14412 SDValue LoCounter, HiCounter;
14413 MVT XLenVT = Subtarget.getXLenVT();
14414 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14415 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14416 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14417 } else {
14418 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14419 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14420 }
14421 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14422 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14423 N->getOperand(0), LoCounter, HiCounter);
14424
14425 Results.push_back(
14426 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14427 Results.push_back(RCW.getValue(2));
14428 break;
14429 }
14430 case ISD::LOAD: {
14431 if (!ISD::isNON_EXTLoad(N))
14432 return;
14433
14434 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14435 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14436 LoadSDNode *Ld = cast<LoadSDNode>(N);
14437
14438 if (N->getValueType(0) == MVT::i64) {
14439 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14440 "Unexpected custom legalisation");
14441
14442 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14443 return;
14444
14445 SDLoc DL(N);
14446 SDValue Result = DAG.getMemIntrinsicNode(
14447 RISCVISD::LD_RV32, DL,
14448 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14449 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14450 SDValue Lo = Result.getValue(0);
14451 SDValue Hi = Result.getValue(1);
14452 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14453 Results.append({Pair, Result.getValue(2)});
14454 return;
14455 }
14456
14457 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14458 "Unexpected custom legalisation");
14459
14460 SDLoc dl(N);
14461 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14462 Ld->getBasePtr(), Ld->getMemoryVT(),
14463 Ld->getMemOperand());
14464 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14465 Results.push_back(Res.getValue(1));
14466 return;
14467 }
14468 case ISD::MUL: {
14469 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14470 unsigned XLen = Subtarget.getXLen();
14471 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14472 if (Size > XLen) {
14473 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14474 SDValue LHS = N->getOperand(0);
14475 SDValue RHS = N->getOperand(1);
14476 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14477
14478 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14479 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14480 // We need exactly one side to be unsigned.
14481 if (LHSIsU == RHSIsU)
14482 return;
14483
14484 auto MakeMULPair = [&](SDValue S, SDValue U) {
14485 MVT XLenVT = Subtarget.getXLenVT();
14486 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14487 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14488 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14489 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14490 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14491 };
14492
14493 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14494 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14495
14496 // The other operand should be signed, but still prefer MULH when
14497 // possible.
14498 if (RHSIsU && LHSIsS && !RHSIsS)
14499 Results.push_back(MakeMULPair(LHS, RHS));
14500 else if (LHSIsU && RHSIsS && !LHSIsS)
14501 Results.push_back(MakeMULPair(RHS, LHS));
14502
14503 return;
14504 }
14505 [[fallthrough]];
14506 }
14507 case ISD::ADD:
14508 case ISD::SUB:
14509 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14510 "Unexpected custom legalisation");
14511 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14512 break;
14513 case ISD::SHL:
14514 case ISD::SRA:
14515 case ISD::SRL:
14516 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14517 "Unexpected custom legalisation");
14518 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14519 // If we can use a BSET instruction, allow default promotion to apply.
14520 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14521 isOneConstant(N->getOperand(0)))
14522 break;
14523 Results.push_back(customLegalizeToWOp(N, DAG));
14524 break;
14525 }
14526
14527 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14528 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14529 // shift amount.
14530 if (N->getOpcode() == ISD::SHL) {
14531 SDLoc DL(N);
14532 SDValue NewOp0 =
14533 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14534 SDValue NewOp1 =
14535 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14536 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14537 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14538 DAG.getValueType(MVT::i32));
14539 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14540 }
14541
14542 break;
14543 case ISD::ROTL:
14544 case ISD::ROTR:
14545 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14546 "Unexpected custom legalisation");
14547 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14548 Subtarget.hasVendorXTHeadBb()) &&
14549 "Unexpected custom legalization");
14550 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14551 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14552 return;
14553 Results.push_back(customLegalizeToWOp(N, DAG));
14554 break;
14555 case ISD::CTTZ:
14557 case ISD::CTLZ:
14558 case ISD::CTLZ_ZERO_UNDEF: {
14559 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14560 "Unexpected custom legalisation");
14561
14562 SDValue NewOp0 =
14563 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14564 bool IsCTZ =
14565 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14566 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14567 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14568 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14569 return;
14570 }
14571 case ISD::SDIV:
14572 case ISD::UDIV:
14573 case ISD::UREM: {
14574 MVT VT = N->getSimpleValueType(0);
14575 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14576 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14577 "Unexpected custom legalisation");
14578 // Don't promote division/remainder by constant since we should expand those
14579 // to multiply by magic constant.
14581 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14582 !isIntDivCheap(N->getValueType(0), Attr))
14583 return;
14584
14585 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14586 // the upper 32 bits. For other types we need to sign or zero extend
14587 // based on the opcode.
14588 unsigned ExtOpc = ISD::ANY_EXTEND;
14589 if (VT != MVT::i32)
14590 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14592
14593 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14594 break;
14595 }
14596 case ISD::SADDO: {
14597 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14598 "Unexpected custom legalisation");
14599
14600 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14601 // use the default legalization.
14602 if (!isa<ConstantSDNode>(N->getOperand(1)))
14603 return;
14604
14605 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14606 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14607 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14608 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14609 DAG.getValueType(MVT::i32));
14610
14611 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14612
14613 // For an addition, the result should be less than one of the operands (LHS)
14614 // if and only if the other operand (RHS) is negative, otherwise there will
14615 // be overflow.
14616 // For a subtraction, the result should be less than one of the operands
14617 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14618 // otherwise there will be overflow.
14619 EVT OType = N->getValueType(1);
14620 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14621 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14622
14623 SDValue Overflow =
14624 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14625 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14626 Results.push_back(Overflow);
14627 return;
14628 }
14629 case ISD::UADDO:
14630 case ISD::USUBO: {
14631 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14632 "Unexpected custom legalisation");
14633 bool IsAdd = N->getOpcode() == ISD::UADDO;
14634 // Create an ADDW or SUBW.
14635 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14636 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14637 SDValue Res =
14638 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14639 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14640 DAG.getValueType(MVT::i32));
14641
14642 SDValue Overflow;
14643 if (IsAdd && isOneConstant(RHS)) {
14644 // Special case uaddo X, 1 overflowed if the addition result is 0.
14645 // The general case (X + C) < C is not necessarily beneficial. Although we
14646 // reduce the live range of X, we may introduce the materialization of
14647 // constant C, especially when the setcc result is used by branch. We have
14648 // no compare with constant and branch instructions.
14649 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14650 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14651 } else if (IsAdd && isAllOnesConstant(RHS)) {
14652 // Special case uaddo X, -1 overflowed if X != 0.
14653 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14654 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14655 } else {
14656 // Sign extend the LHS and perform an unsigned compare with the ADDW
14657 // result. Since the inputs are sign extended from i32, this is equivalent
14658 // to comparing the lower 32 bits.
14659 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14660 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14661 IsAdd ? ISD::SETULT : ISD::SETUGT);
14662 }
14663
14664 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14665 Results.push_back(Overflow);
14666 return;
14667 }
14668 case ISD::UADDSAT:
14669 case ISD::USUBSAT: {
14670 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14671 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14672 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14673 // promotion for UADDO/USUBO.
14674 Results.push_back(expandAddSubSat(N, DAG));
14675 return;
14676 }
14677 case ISD::SADDSAT:
14678 case ISD::SSUBSAT: {
14679 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14680 "Unexpected custom legalisation");
14681 Results.push_back(expandAddSubSat(N, DAG));
14682 return;
14683 }
14684 case ISD::ABS: {
14685 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14686 "Unexpected custom legalisation");
14687
14688 if (Subtarget.hasStdExtZbb()) {
14689 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14690 // This allows us to remember that the result is sign extended. Expanding
14691 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14692 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14693 N->getOperand(0));
14694 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14695 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14696 return;
14697 }
14698
14699 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14700 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14701
14702 // Freeze the source so we can increase it's use count.
14703 Src = DAG.getFreeze(Src);
14704
14705 // Copy sign bit to all bits using the sraiw pattern.
14706 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14707 DAG.getValueType(MVT::i32));
14708 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14709 DAG.getConstant(31, DL, MVT::i64));
14710
14711 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14712 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14713
14714 // NOTE: The result is only required to be anyextended, but sext is
14715 // consistent with type legalization of sub.
14716 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14717 DAG.getValueType(MVT::i32));
14718 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14719 return;
14720 }
14721 case ISD::BITCAST: {
14722 EVT VT = N->getValueType(0);
14723 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14724 SDValue Op0 = N->getOperand(0);
14725 EVT Op0VT = Op0.getValueType();
14726 MVT XLenVT = Subtarget.getXLenVT();
14727 if (VT == MVT::i16 &&
14728 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14729 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14730 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14731 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14732 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14733 Subtarget.hasStdExtFOrZfinx()) {
14734 SDValue FPConv =
14735 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14736 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14737 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14738 Subtarget.hasStdExtDOrZdinx()) {
14739 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14740 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14741 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14742 NewReg.getValue(0), NewReg.getValue(1));
14743 Results.push_back(RetReg);
14744 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14745 isTypeLegal(Op0VT)) {
14746 // Custom-legalize bitcasts from fixed-length vector types to illegal
14747 // scalar types in order to improve codegen. Bitcast the vector to a
14748 // one-element vector type whose element type is the same as the result
14749 // type, and extract the first element.
14750 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14751 if (isTypeLegal(BVT)) {
14752 SDValue BVec = DAG.getBitcast(BVT, Op0);
14753 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14754 }
14755 }
14756 break;
14757 }
14758 case ISD::BITREVERSE: {
14759 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14760 "Unexpected custom legalisation");
14761 MVT XLenVT = Subtarget.getXLenVT();
14762 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14763 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14764 // ReplaceNodeResults requires we maintain the same type for the return
14765 // value.
14766 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14767 break;
14768 }
14769 case RISCVISD::BREV8:
14770 case RISCVISD::ORC_B: {
14771 MVT VT = N->getSimpleValueType(0);
14772 MVT XLenVT = Subtarget.getXLenVT();
14773 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14774 "Unexpected custom legalisation");
14775 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14776 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14777 "Unexpected extension");
14778 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14779 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14780 // ReplaceNodeResults requires we maintain the same type for the return
14781 // value.
14782 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14783 break;
14784 }
14786 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14787 // type is illegal (currently only vXi64 RV32).
14788 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14789 // transferred to the destination register. We issue two of these from the
14790 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14791 // first element.
14792 SDValue Vec = N->getOperand(0);
14793 SDValue Idx = N->getOperand(1);
14794
14795 // The vector type hasn't been legalized yet so we can't issue target
14796 // specific nodes if it needs legalization.
14797 // FIXME: We would manually legalize if it's important.
14798 if (!isTypeLegal(Vec.getValueType()))
14799 return;
14800
14801 MVT VecVT = Vec.getSimpleValueType();
14802
14803 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14804 VecVT.getVectorElementType() == MVT::i64 &&
14805 "Unexpected EXTRACT_VECTOR_ELT legalization");
14806
14807 // If this is a fixed vector, we need to convert it to a scalable vector.
14808 MVT ContainerVT = VecVT;
14809 if (VecVT.isFixedLengthVector()) {
14810 ContainerVT = getContainerForFixedLengthVector(VecVT);
14811 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14812 }
14813
14814 MVT XLenVT = Subtarget.getXLenVT();
14815
14816 // Use a VL of 1 to avoid processing more elements than we need.
14817 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14818
14819 // Unless the index is known to be 0, we must slide the vector down to get
14820 // the desired element into index 0.
14821 if (!isNullConstant(Idx)) {
14822 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14823 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14824 }
14825
14826 // Extract the lower XLEN bits of the correct vector element.
14827 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14828
14829 // To extract the upper XLEN bits of the vector element, shift the first
14830 // element right by 32 bits and re-extract the lower XLEN bits.
14831 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14832 DAG.getUNDEF(ContainerVT),
14833 DAG.getConstant(32, DL, XLenVT), VL);
14834 SDValue LShr32 =
14835 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14836 DAG.getUNDEF(ContainerVT), Mask, VL);
14837
14838 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14839
14840 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14841 break;
14842 }
14844 unsigned IntNo = N->getConstantOperandVal(0);
14845 switch (IntNo) {
14846 default:
14848 "Don't know how to custom type legalize this intrinsic!");
14849 case Intrinsic::experimental_get_vector_length: {
14850 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14851 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14852 return;
14853 }
14854 case Intrinsic::experimental_cttz_elts: {
14855 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14856 Results.push_back(
14857 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14858 return;
14859 }
14860 case Intrinsic::riscv_orc_b:
14861 case Intrinsic::riscv_brev8:
14862 case Intrinsic::riscv_sha256sig0:
14863 case Intrinsic::riscv_sha256sig1:
14864 case Intrinsic::riscv_sha256sum0:
14865 case Intrinsic::riscv_sha256sum1:
14866 case Intrinsic::riscv_sm3p0:
14867 case Intrinsic::riscv_sm3p1: {
14868 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14869 return;
14870 unsigned Opc;
14871 switch (IntNo) {
14872 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14873 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14874 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14875 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14876 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14877 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14878 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14879 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14880 }
14881
14882 SDValue NewOp =
14883 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14884 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14885 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14886 return;
14887 }
14888 case Intrinsic::riscv_sm4ks:
14889 case Intrinsic::riscv_sm4ed: {
14890 unsigned Opc =
14891 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14892 SDValue NewOp0 =
14893 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14894 SDValue NewOp1 =
14895 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14896 SDValue Res =
14897 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14898 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14899 return;
14900 }
14901 case Intrinsic::riscv_mopr: {
14902 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14903 return;
14904 SDValue NewOp =
14905 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14906 SDValue Res = DAG.getNode(
14907 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
14908 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
14909 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14910 return;
14911 }
14912 case Intrinsic::riscv_moprr: {
14913 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14914 return;
14915 SDValue NewOp0 =
14916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14917 SDValue NewOp1 =
14918 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14919 SDValue Res = DAG.getNode(
14920 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
14921 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
14922 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14923 return;
14924 }
14925 case Intrinsic::riscv_clmul: {
14926 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14927 return;
14928
14929 SDValue NewOp0 =
14930 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14931 SDValue NewOp1 =
14932 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14933 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
14934 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14935 return;
14936 }
14937 case Intrinsic::riscv_clmulh:
14938 case Intrinsic::riscv_clmulr: {
14939 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14940 return;
14941
14942 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
14943 // to the full 128-bit clmul result of multiplying two xlen values.
14944 // Perform clmulr or clmulh on the shifted values. Finally, extract the
14945 // upper 32 bits.
14946 //
14947 // The alternative is to mask the inputs to 32 bits and use clmul, but
14948 // that requires two shifts to mask each input without zext.w.
14949 // FIXME: If the inputs are known zero extended or could be freely
14950 // zero extended, the mask form would be better.
14951 SDValue NewOp0 =
14952 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14953 SDValue NewOp1 =
14954 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14955 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
14956 DAG.getConstant(32, DL, MVT::i64));
14957 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
14958 DAG.getConstant(32, DL, MVT::i64));
14959 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
14960 : RISCVISD::CLMULR;
14961 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
14962 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
14963 DAG.getConstant(32, DL, MVT::i64));
14964 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14965 return;
14966 }
14967 case Intrinsic::riscv_vmv_x_s: {
14968 EVT VT = N->getValueType(0);
14969 MVT XLenVT = Subtarget.getXLenVT();
14970 if (VT.bitsLT(XLenVT)) {
14971 // Simple case just extract using vmv.x.s and truncate.
14972 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
14973 Subtarget.getXLenVT(), N->getOperand(1));
14974 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
14975 return;
14976 }
14977
14978 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
14979 "Unexpected custom legalization");
14980
14981 // We need to do the move in two steps.
14982 SDValue Vec = N->getOperand(1);
14983 MVT VecVT = Vec.getSimpleValueType();
14984
14985 // First extract the lower XLEN bits of the element.
14986 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14987
14988 // To extract the upper XLEN bits of the vector element, shift the first
14989 // element right by 32 bits and re-extract the lower XLEN bits.
14990 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
14991
14992 SDValue ThirtyTwoV =
14993 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
14994 DAG.getConstant(32, DL, XLenVT), VL);
14995 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
14996 DAG.getUNDEF(VecVT), Mask, VL);
14997 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14998
14999 Results.push_back(
15000 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15001 break;
15002 }
15003 }
15004 break;
15005 }
15006 case ISD::VECREDUCE_ADD:
15007 case ISD::VECREDUCE_AND:
15008 case ISD::VECREDUCE_OR:
15009 case ISD::VECREDUCE_XOR:
15014 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15015 Results.push_back(V);
15016 break;
15017 case ISD::VP_REDUCE_ADD:
15018 case ISD::VP_REDUCE_AND:
15019 case ISD::VP_REDUCE_OR:
15020 case ISD::VP_REDUCE_XOR:
15021 case ISD::VP_REDUCE_SMAX:
15022 case ISD::VP_REDUCE_UMAX:
15023 case ISD::VP_REDUCE_SMIN:
15024 case ISD::VP_REDUCE_UMIN:
15025 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15026 Results.push_back(V);
15027 break;
15028 case ISD::GET_ROUNDING: {
15029 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15030 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15031 Results.push_back(Res.getValue(0));
15032 Results.push_back(Res.getValue(1));
15033 break;
15034 }
15035 }
15036}
15037
15038/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15039/// which corresponds to it.
15040static unsigned getVecReduceOpcode(unsigned Opc) {
15041 switch (Opc) {
15042 default:
15043 llvm_unreachable("Unhandled binary to transform reduction");
15044 case ISD::ADD:
15045 return ISD::VECREDUCE_ADD;
15046 case ISD::UMAX:
15047 return ISD::VECREDUCE_UMAX;
15048 case ISD::SMAX:
15049 return ISD::VECREDUCE_SMAX;
15050 case ISD::UMIN:
15051 return ISD::VECREDUCE_UMIN;
15052 case ISD::SMIN:
15053 return ISD::VECREDUCE_SMIN;
15054 case ISD::AND:
15055 return ISD::VECREDUCE_AND;
15056 case ISD::OR:
15057 return ISD::VECREDUCE_OR;
15058 case ISD::XOR:
15059 return ISD::VECREDUCE_XOR;
15060 case ISD::FADD:
15061 // Note: This is the associative form of the generic reduction opcode.
15062 return ISD::VECREDUCE_FADD;
15063 }
15064}
15065
15066/// Perform two related transforms whose purpose is to incrementally recognize
15067/// an explode_vector followed by scalar reduction as a vector reduction node.
15068/// This exists to recover from a deficiency in SLP which can't handle
15069/// forests with multiple roots sharing common nodes. In some cases, one
15070/// of the trees will be vectorized, and the other will remain (unprofitably)
15071/// scalarized.
15072static SDValue
15074 const RISCVSubtarget &Subtarget) {
15075
15076 // This transforms need to run before all integer types have been legalized
15077 // to i64 (so that the vector element type matches the add type), and while
15078 // it's safe to introduce odd sized vector types.
15080 return SDValue();
15081
15082 // Without V, this transform isn't useful. We could form the (illegal)
15083 // operations and let them be scalarized again, but there's really no point.
15084 if (!Subtarget.hasVInstructions())
15085 return SDValue();
15086
15087 const SDLoc DL(N);
15088 const EVT VT = N->getValueType(0);
15089 const unsigned Opc = N->getOpcode();
15090
15091 // For FADD, we only handle the case with reassociation allowed. We
15092 // could handle strict reduction order, but at the moment, there's no
15093 // known reason to, and the complexity isn't worth it.
15094 // TODO: Handle fminnum and fmaxnum here
15095 if (!VT.isInteger() &&
15096 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
15097 return SDValue();
15098
15099 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15100 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15101 "Inconsistent mappings");
15102 SDValue LHS = N->getOperand(0);
15103 SDValue RHS = N->getOperand(1);
15104
15105 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15106 return SDValue();
15107
15108 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15109 std::swap(LHS, RHS);
15110
15111 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15112 !isa<ConstantSDNode>(RHS.getOperand(1)))
15113 return SDValue();
15114
15115 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15116 SDValue SrcVec = RHS.getOperand(0);
15117 EVT SrcVecVT = SrcVec.getValueType();
15118 assert(SrcVecVT.getVectorElementType() == VT);
15119 if (SrcVecVT.isScalableVector())
15120 return SDValue();
15121
15122 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15123 return SDValue();
15124
15125 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15126 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15127 // root of our reduction tree. TODO: We could extend this to any two
15128 // adjacent aligned constant indices if desired.
15129 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15130 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15131 uint64_t LHSIdx =
15132 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15133 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15134 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15135 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15136 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15137 }
15138 }
15139
15140 // Match (binop (reduce (extract_subvector V, 0),
15141 // (extract_vector_elt V, sizeof(SubVec))))
15142 // into a reduction of one more element from the original vector V.
15143 if (LHS.getOpcode() != ReduceOpc)
15144 return SDValue();
15145
15146 SDValue ReduceVec = LHS.getOperand(0);
15147 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15148 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15149 isNullConstant(ReduceVec.getOperand(1)) &&
15150 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15151 // For illegal types (e.g. 3xi32), most will be combined again into a
15152 // wider (hopefully legal) type. If this is a terminal state, we are
15153 // relying on type legalization here to produce something reasonable
15154 // and this lowering quality could probably be improved. (TODO)
15155 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15156 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15157 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15158 ReduceVec->getFlags() & N->getFlags());
15159 }
15160
15161 return SDValue();
15162}
15163
15164
15165// Try to fold (<bop> x, (reduction.<bop> vec, start))
15167 const RISCVSubtarget &Subtarget) {
15168 auto BinOpToRVVReduce = [](unsigned Opc) {
15169 switch (Opc) {
15170 default:
15171 llvm_unreachable("Unhandled binary to transform reduction");
15172 case ISD::ADD:
15173 return RISCVISD::VECREDUCE_ADD_VL;
15174 case ISD::UMAX:
15175 return RISCVISD::VECREDUCE_UMAX_VL;
15176 case ISD::SMAX:
15177 return RISCVISD::VECREDUCE_SMAX_VL;
15178 case ISD::UMIN:
15179 return RISCVISD::VECREDUCE_UMIN_VL;
15180 case ISD::SMIN:
15181 return RISCVISD::VECREDUCE_SMIN_VL;
15182 case ISD::AND:
15183 return RISCVISD::VECREDUCE_AND_VL;
15184 case ISD::OR:
15185 return RISCVISD::VECREDUCE_OR_VL;
15186 case ISD::XOR:
15187 return RISCVISD::VECREDUCE_XOR_VL;
15188 case ISD::FADD:
15189 return RISCVISD::VECREDUCE_FADD_VL;
15190 case ISD::FMAXNUM:
15191 return RISCVISD::VECREDUCE_FMAX_VL;
15192 case ISD::FMINNUM:
15193 return RISCVISD::VECREDUCE_FMIN_VL;
15194 }
15195 };
15196
15197 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15198 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15199 isNullConstant(V.getOperand(1)) &&
15200 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15201 };
15202
15203 unsigned Opc = N->getOpcode();
15204 unsigned ReduceIdx;
15205 if (IsReduction(N->getOperand(0), Opc))
15206 ReduceIdx = 0;
15207 else if (IsReduction(N->getOperand(1), Opc))
15208 ReduceIdx = 1;
15209 else
15210 return SDValue();
15211
15212 // Skip if FADD disallows reassociation but the combiner needs.
15213 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15214 return SDValue();
15215
15216 SDValue Extract = N->getOperand(ReduceIdx);
15217 SDValue Reduce = Extract.getOperand(0);
15218 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15219 return SDValue();
15220
15221 SDValue ScalarV = Reduce.getOperand(2);
15222 EVT ScalarVT = ScalarV.getValueType();
15223 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15224 ScalarV.getOperand(0)->isUndef() &&
15225 isNullConstant(ScalarV.getOperand(2)))
15226 ScalarV = ScalarV.getOperand(1);
15227
15228 // Make sure that ScalarV is a splat with VL=1.
15229 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15230 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15231 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15232 return SDValue();
15233
15234 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15235 return SDValue();
15236
15237 // Check the scalar of ScalarV is neutral element
15238 // TODO: Deal with value other than neutral element.
15239 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15240 0))
15241 return SDValue();
15242
15243 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15244 // FIXME: We might be able to improve this if operand 0 is undef.
15245 if (!isNonZeroAVL(Reduce.getOperand(5)))
15246 return SDValue();
15247
15248 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15249
15250 SDLoc DL(N);
15251 SDValue NewScalarV =
15252 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15253 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15254
15255 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15256 if (ScalarVT != ScalarV.getValueType())
15257 NewScalarV =
15258 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15259
15260 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15261 NewScalarV, Reduce.getOperand(3),
15262 Reduce.getOperand(4), Reduce.getOperand(5)};
15263 SDValue NewReduce =
15264 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15265 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15266 Extract.getOperand(1));
15267}
15268
15269// Optimize (add (shl x, c0), (shl y, c1)) ->
15270// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15271// or
15272// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15274 const RISCVSubtarget &Subtarget) {
15275 const bool HasStdExtZba = Subtarget.hasStdExtZba();
15276 const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf();
15277 const bool HasVendorXqciac = Subtarget.hasVendorXqciac();
15278 // Perform this optimization only in the zba/xandesperf/xqciac extension.
15279 if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac)
15280 return SDValue();
15281
15282 // Skip for vector types and larger types.
15283 EVT VT = N->getValueType(0);
15284 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15285 return SDValue();
15286
15287 // The two operand nodes must be SHL and have no other use.
15288 SDValue N0 = N->getOperand(0);
15289 SDValue N1 = N->getOperand(1);
15290 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15291 !N0->hasOneUse() || !N1->hasOneUse())
15292 return SDValue();
15293
15294 // Check c0 and c1.
15295 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15296 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15297 if (!N0C || !N1C)
15298 return SDValue();
15299 int64_t C0 = N0C->getSExtValue();
15300 int64_t C1 = N1C->getSExtValue();
15301 if (C0 <= 0 || C1 <= 0)
15302 return SDValue();
15303
15304 int64_t Diff = std::abs(C0 - C1);
15305 bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3;
15306 bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf;
15307
15308 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
15309 if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) ||
15310 (IsShXaddDiff && !HasShXadd && HasVendorXqciac))
15311 return SDValue();
15312
15313 // Skip if QC_SHLADD is not applicable.
15314 if (Diff == 0 || Diff > 31)
15315 return SDValue();
15316
15317 // Build nodes.
15318 SDLoc DL(N);
15319 int64_t Bits = std::min(C0, C1);
15320 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15321 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15322 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15323 DAG.getConstant(Diff, DL, VT), NS);
15324 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15325}
15326
15327// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15328// or 3.
15330 SelectionDAG &DAG) {
15331 using namespace llvm::SDPatternMatch;
15332
15333 // Looking for a reg-reg add and not an addi.
15334 if (isa<ConstantSDNode>(N->getOperand(1)))
15335 return SDValue();
15336
15337 // Based on testing it seems that performance degrades if the ADDI has
15338 // more than 2 uses.
15339 if (AddI->use_size() > 2)
15340 return SDValue();
15341
15342 APInt AddVal;
15343 SDValue SHLVal;
15344 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15345 return SDValue();
15346
15347 APInt VShift;
15348 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15349 return SDValue();
15350
15351 if (VShift.slt(1) || VShift.sgt(3))
15352 return SDValue();
15353
15354 SDLoc DL(N);
15355 EVT VT = N->getValueType(0);
15356 // The shift must be positive but the add can be signed.
15357 uint64_t ShlConst = VShift.getZExtValue();
15358 int64_t AddConst = AddVal.getSExtValue();
15359
15360 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15361 DAG.getConstant(ShlConst, DL, VT), Other);
15362 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15363 DAG.getSignedConstant(AddConst, DL, VT));
15364}
15365
15366// Optimize (add (add (shl x, c0), c1), y) ->
15367// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15369 const RISCVSubtarget &Subtarget) {
15370 // Perform this optimization only in the zba extension.
15371 if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
15372 return SDValue();
15373
15374 // Skip for vector types and larger types.
15375 EVT VT = N->getValueType(0);
15376 if (VT != Subtarget.getXLenVT())
15377 return SDValue();
15378
15379 SDValue AddI = N->getOperand(0);
15380 SDValue Other = N->getOperand(1);
15381 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15382 return V;
15383 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15384 return V;
15385 return SDValue();
15386}
15387
15388// Combine a constant select operand into its use:
15389//
15390// (and (select cond, -1, c), x)
15391// -> (select cond, x, (and x, c)) [AllOnes=1]
15392// (or (select cond, 0, c), x)
15393// -> (select cond, x, (or x, c)) [AllOnes=0]
15394// (xor (select cond, 0, c), x)
15395// -> (select cond, x, (xor x, c)) [AllOnes=0]
15396// (add (select cond, 0, c), x)
15397// -> (select cond, x, (add x, c)) [AllOnes=0]
15398// (sub x, (select cond, 0, c))
15399// -> (select cond, x, (sub x, c)) [AllOnes=0]
15401 SelectionDAG &DAG, bool AllOnes,
15402 const RISCVSubtarget &Subtarget) {
15403 EVT VT = N->getValueType(0);
15404
15405 // Skip vectors.
15406 if (VT.isVector())
15407 return SDValue();
15408
15409 if (!Subtarget.hasConditionalMoveFusion()) {
15410 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15411 if ((!Subtarget.hasStdExtZicond() &&
15412 !Subtarget.hasVendorXVentanaCondOps()) ||
15413 N->getOpcode() != ISD::AND)
15414 return SDValue();
15415
15416 // Maybe harmful when condition code has multiple use.
15417 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15418 return SDValue();
15419
15420 // Maybe harmful when VT is wider than XLen.
15421 if (VT.getSizeInBits() > Subtarget.getXLen())
15422 return SDValue();
15423 }
15424
15425 if ((Slct.getOpcode() != ISD::SELECT &&
15426 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15427 !Slct.hasOneUse())
15428 return SDValue();
15429
15430 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15432 };
15433
15434 bool SwapSelectOps;
15435 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15436 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15437 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15438 SDValue NonConstantVal;
15439 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15440 SwapSelectOps = false;
15441 NonConstantVal = FalseVal;
15442 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15443 SwapSelectOps = true;
15444 NonConstantVal = TrueVal;
15445 } else
15446 return SDValue();
15447
15448 // Slct is now know to be the desired identity constant when CC is true.
15449 TrueVal = OtherOp;
15450 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15451 // Unless SwapSelectOps says the condition should be false.
15452 if (SwapSelectOps)
15453 std::swap(TrueVal, FalseVal);
15454
15455 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15456 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15457 {Slct.getOperand(0), Slct.getOperand(1),
15458 Slct.getOperand(2), TrueVal, FalseVal});
15459
15460 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15461 {Slct.getOperand(0), TrueVal, FalseVal});
15462}
15463
15464// Attempt combineSelectAndUse on each operand of a commutative operator N.
15466 bool AllOnes,
15467 const RISCVSubtarget &Subtarget) {
15468 SDValue N0 = N->getOperand(0);
15469 SDValue N1 = N->getOperand(1);
15470 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15471 return Result;
15472 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15473 return Result;
15474 return SDValue();
15475}
15476
15477// Transform (add (mul x, c0), c1) ->
15478// (add (mul (add x, c1/c0), c0), c1%c0).
15479// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15480// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15481// to an infinite loop in DAGCombine if transformed.
15482// Or transform (add (mul x, c0), c1) ->
15483// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15484// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15485// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15486// lead to an infinite loop in DAGCombine if transformed.
15487// Or transform (add (mul x, c0), c1) ->
15488// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15489// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15490// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15491// lead to an infinite loop in DAGCombine if transformed.
15492// Or transform (add (mul x, c0), c1) ->
15493// (mul (add x, c1/c0), c0).
15494// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15496 const RISCVSubtarget &Subtarget) {
15497 // Skip for vector types and larger types.
15498 EVT VT = N->getValueType(0);
15499 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15500 return SDValue();
15501 // The first operand node must be a MUL and has no other use.
15502 SDValue N0 = N->getOperand(0);
15503 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15504 return SDValue();
15505 // Check if c0 and c1 match above conditions.
15506 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15507 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15508 if (!N0C || !N1C)
15509 return SDValue();
15510 // If N0C has multiple uses it's possible one of the cases in
15511 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15512 // in an infinite loop.
15513 if (!N0C->hasOneUse())
15514 return SDValue();
15515 int64_t C0 = N0C->getSExtValue();
15516 int64_t C1 = N1C->getSExtValue();
15517 int64_t CA, CB;
15518 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15519 return SDValue();
15520 // Search for proper CA (non-zero) and CB that both are simm12.
15521 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15522 !isInt<12>(C0 * (C1 / C0))) {
15523 CA = C1 / C0;
15524 CB = C1 % C0;
15525 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15526 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15527 CA = C1 / C0 + 1;
15528 CB = C1 % C0 - C0;
15529 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15530 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15531 CA = C1 / C0 - 1;
15532 CB = C1 % C0 + C0;
15533 } else
15534 return SDValue();
15535 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15536 SDLoc DL(N);
15537 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15538 DAG.getSignedConstant(CA, DL, VT));
15539 SDValue New1 =
15540 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15541 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15542}
15543
15544// add (zext, zext) -> zext (add (zext, zext))
15545// sub (zext, zext) -> sext (sub (zext, zext))
15546// mul (zext, zext) -> zext (mul (zext, zext))
15547// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15548// udiv (zext, zext) -> zext (udiv (zext, zext))
15549// srem (zext, zext) -> zext (srem (zext, zext))
15550// urem (zext, zext) -> zext (urem (zext, zext))
15551//
15552// where the sum of the extend widths match, and the the range of the bin op
15553// fits inside the width of the narrower bin op. (For profitability on rvv, we
15554// use a power of two for both inner and outer extend.)
15556
15557 EVT VT = N->getValueType(0);
15558 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15559 return SDValue();
15560
15561 SDValue N0 = N->getOperand(0);
15562 SDValue N1 = N->getOperand(1);
15564 return SDValue();
15565 if (!N0.hasOneUse() || !N1.hasOneUse())
15566 return SDValue();
15567
15568 SDValue Src0 = N0.getOperand(0);
15569 SDValue Src1 = N1.getOperand(0);
15570 EVT SrcVT = Src0.getValueType();
15571 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15572 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15573 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15574 return SDValue();
15575
15576 LLVMContext &C = *DAG.getContext();
15578 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15579
15580 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15581 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15582
15583 // Src0 and Src1 are zero extended, so they're always positive if signed.
15584 //
15585 // sub can produce a negative from two positive operands, so it needs sign
15586 // extended. Other nodes produce a positive from two positive operands, so
15587 // zero extend instead.
15588 unsigned OuterExtend =
15589 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15590
15591 return DAG.getNode(
15592 OuterExtend, SDLoc(N), VT,
15593 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15594}
15595
15596// Try to turn (add (xor bool, 1) -1) into (neg bool).
15598 SDValue N0 = N->getOperand(0);
15599 SDValue N1 = N->getOperand(1);
15600 EVT VT = N->getValueType(0);
15601 SDLoc DL(N);
15602
15603 // RHS should be -1.
15604 if (!isAllOnesConstant(N1))
15605 return SDValue();
15606
15607 // Look for (xor X, 1).
15608 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15609 return SDValue();
15610
15611 // First xor input should be 0 or 1.
15613 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15614 return SDValue();
15615
15616 // Emit a negate of the setcc.
15617 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15618 N0.getOperand(0));
15619}
15620
15623 const RISCVSubtarget &Subtarget) {
15624 SelectionDAG &DAG = DCI.DAG;
15625 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15626 return V;
15627 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15628 return V;
15629 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15630 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15631 return V;
15632 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15633 return V;
15634 }
15635 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15636 return V;
15637 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15638 return V;
15639 if (SDValue V = combineBinOpOfZExt(N, DAG))
15640 return V;
15641
15642 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15643 // (select lhs, rhs, cc, x, (add x, y))
15644 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15645}
15646
15647// Try to turn a sub boolean RHS and constant LHS into an addi.
15649 SDValue N0 = N->getOperand(0);
15650 SDValue N1 = N->getOperand(1);
15651 EVT VT = N->getValueType(0);
15652 SDLoc DL(N);
15653
15654 // Require a constant LHS.
15655 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15656 if (!N0C)
15657 return SDValue();
15658
15659 // All our optimizations involve subtracting 1 from the immediate and forming
15660 // an ADDI. Make sure the new immediate is valid for an ADDI.
15661 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15662 if (!ImmValMinus1.isSignedIntN(12))
15663 return SDValue();
15664
15665 SDValue NewLHS;
15666 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15667 // (sub constant, (setcc x, y, eq/neq)) ->
15668 // (add (setcc x, y, neq/eq), constant - 1)
15669 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15670 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15671 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15672 return SDValue();
15673 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15674 NewLHS =
15675 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15676 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15677 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15678 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15679 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15680 NewLHS = N1.getOperand(0);
15681 } else
15682 return SDValue();
15683
15684 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15685 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15686}
15687
15688// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15689// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15690// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15691// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15693 const RISCVSubtarget &Subtarget) {
15694 if (!Subtarget.hasStdExtZbb())
15695 return SDValue();
15696
15697 EVT VT = N->getValueType(0);
15698
15699 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15700 return SDValue();
15701
15702 SDValue N0 = N->getOperand(0);
15703 SDValue N1 = N->getOperand(1);
15704
15705 if (N0->getOpcode() != ISD::SHL)
15706 return SDValue();
15707
15708 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15709 if (!ShAmtCLeft)
15710 return SDValue();
15711 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15712
15713 if (ShiftedAmount >= 8)
15714 return SDValue();
15715
15716 SDValue LeftShiftOperand = N0->getOperand(0);
15717 SDValue RightShiftOperand = N1;
15718
15719 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15720 if (N1->getOpcode() != ISD::SRL)
15721 return SDValue();
15722 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15723 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15724 return SDValue();
15725 RightShiftOperand = N1.getOperand(0);
15726 }
15727
15728 // At least one shift should have a single use.
15729 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15730 return SDValue();
15731
15732 if (LeftShiftOperand != RightShiftOperand)
15733 return SDValue();
15734
15735 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15736 Mask <<= ShiftedAmount;
15737 // Check that X has indeed the right shape (only the Y-th bit can be set in
15738 // every byte).
15739 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15740 return SDValue();
15741
15742 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15743}
15744
15746 const RISCVSubtarget &Subtarget) {
15747 if (SDValue V = combineSubOfBoolean(N, DAG))
15748 return V;
15749
15750 EVT VT = N->getValueType(0);
15751 SDValue N0 = N->getOperand(0);
15752 SDValue N1 = N->getOperand(1);
15753 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15754 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15755 isNullConstant(N1.getOperand(1))) {
15756 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15757 if (CCVal == ISD::SETLT) {
15758 SDLoc DL(N);
15759 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15760 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15761 DAG.getConstant(ShAmt, DL, VT));
15762 }
15763 }
15764
15765 if (SDValue V = combineBinOpOfZExt(N, DAG))
15766 return V;
15767 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15768 return V;
15769
15770 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15771 // (select lhs, rhs, cc, x, (sub x, y))
15772 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15773}
15774
15775// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15776// Legalizing setcc can introduce xors like this. Doing this transform reduces
15777// the number of xors and may allow the xor to fold into a branch condition.
15779 SDValue N0 = N->getOperand(0);
15780 SDValue N1 = N->getOperand(1);
15781 bool IsAnd = N->getOpcode() == ISD::AND;
15782
15783 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15784 return SDValue();
15785
15786 if (!N0.hasOneUse() || !N1.hasOneUse())
15787 return SDValue();
15788
15789 SDValue N01 = N0.getOperand(1);
15790 SDValue N11 = N1.getOperand(1);
15791
15792 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15793 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15794 // operation is And, allow one of the Xors to use -1.
15795 if (isOneConstant(N01)) {
15796 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15797 return SDValue();
15798 } else if (isOneConstant(N11)) {
15799 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15800 if (!(IsAnd && isAllOnesConstant(N01)))
15801 return SDValue();
15802 } else
15803 return SDValue();
15804
15805 EVT VT = N->getValueType(0);
15806
15807 SDValue N00 = N0.getOperand(0);
15808 SDValue N10 = N1.getOperand(0);
15809
15810 // The LHS of the xors needs to be 0/1.
15812 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15813 return SDValue();
15814
15815 // Invert the opcode and insert a new xor.
15816 SDLoc DL(N);
15817 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15818 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15819 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15820}
15821
15822// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15823// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15824// value to an unsigned value. This will be lowered to vmax and series of
15825// vnclipu instructions later. This can be extended to other truncated types
15826// other than i8 by replacing 256 and 255 with the equivalent constants for the
15827// type.
15829 EVT VT = N->getValueType(0);
15830 SDValue N0 = N->getOperand(0);
15831 EVT SrcVT = N0.getValueType();
15832
15833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15834 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15835 return SDValue();
15836
15837 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15838 return SDValue();
15839
15840 SDValue Cond = N0.getOperand(0);
15841 SDValue True = N0.getOperand(1);
15842 SDValue False = N0.getOperand(2);
15843
15844 if (Cond.getOpcode() != ISD::SETCC)
15845 return SDValue();
15846
15847 // FIXME: Support the version of this pattern with the select operands
15848 // swapped.
15849 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15850 if (CCVal != ISD::SETULT)
15851 return SDValue();
15852
15853 SDValue CondLHS = Cond.getOperand(0);
15854 SDValue CondRHS = Cond.getOperand(1);
15855
15856 if (CondLHS != True)
15857 return SDValue();
15858
15859 unsigned ScalarBits = VT.getScalarSizeInBits();
15860
15861 // FIXME: Support other constants.
15862 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15863 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15864 return SDValue();
15865
15866 if (False.getOpcode() != ISD::SIGN_EXTEND)
15867 return SDValue();
15868
15869 False = False.getOperand(0);
15870
15871 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15872 return SDValue();
15873
15874 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15875 if (!FalseRHSC || !FalseRHSC->isZero())
15876 return SDValue();
15877
15878 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15879 if (CCVal2 != ISD::SETGT)
15880 return SDValue();
15881
15882 // Emit the signed to unsigned saturation pattern.
15883 SDLoc DL(N);
15884 SDValue Max =
15885 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15886 SDValue Min =
15887 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15888 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15889 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15890}
15891
15893 const RISCVSubtarget &Subtarget) {
15894 SDValue N0 = N->getOperand(0);
15895 EVT VT = N->getValueType(0);
15896
15897 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15898 // extending X. This is safe since we only need the LSB after the shift and
15899 // shift amounts larger than 31 would produce poison. If we wait until
15900 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
15901 // to use a BEXT instruction.
15902 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15903 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15904 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
15905 SDLoc DL(N0);
15906 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
15907 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
15908 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
15909 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
15910 }
15911
15912 return combineTruncSelectToSMaxUSat(N, DAG);
15913}
15914
15915// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15916// truncation. But RVV doesn't have truncation instructions for more than twice
15917// the bitwidth.
15918//
15919// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15920//
15921// vsetvli a0, zero, e32, m2, ta, ma
15922// vnsrl.wi v12, v8, 0
15923// vsetvli zero, zero, e16, m1, ta, ma
15924// vnsrl.wi v8, v12, 0
15925// vsetvli zero, zero, e8, mf2, ta, ma
15926// vnsrl.wi v8, v8, 0
15927//
15928// So reverse the combine so we generate an vmseq/vmsne again:
15929//
15930// and (lshr (trunc X), ShAmt), 1
15931// -->
15932// zext (icmp ne (and X, (1 << ShAmt)), 0)
15933//
15934// and (lshr (not (trunc X)), ShAmt), 1
15935// -->
15936// zext (icmp eq (and X, (1 << ShAmt)), 0)
15938 const RISCVSubtarget &Subtarget) {
15939 using namespace SDPatternMatch;
15940 SDLoc DL(N);
15941
15942 if (!Subtarget.hasVInstructions())
15943 return SDValue();
15944
15945 EVT VT = N->getValueType(0);
15946 if (!VT.isVector())
15947 return SDValue();
15948
15949 APInt ShAmt;
15950 SDValue Inner;
15951 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
15952 m_One())))
15953 return SDValue();
15954
15955 SDValue X;
15956 bool IsNot;
15957 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
15958 IsNot = true;
15959 else if (sd_match(Inner, m_Trunc(m_Value(X))))
15960 IsNot = false;
15961 else
15962 return SDValue();
15963
15964 EVT WideVT = X.getValueType();
15965 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
15966 return SDValue();
15967
15968 SDValue Res =
15969 DAG.getNode(ISD::AND, DL, WideVT, X,
15970 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
15971 Res = DAG.getSetCC(DL,
15972 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
15973 WideVT.getVectorElementCount()),
15974 Res, DAG.getConstant(0, DL, WideVT),
15975 IsNot ? ISD::SETEQ : ISD::SETNE);
15976 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
15977}
15978
15981 SelectionDAG &DAG = DCI.DAG;
15982 if (N->getOpcode() != ISD::AND)
15983 return SDValue();
15984
15985 SDValue N0 = N->getOperand(0);
15986 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
15987 return SDValue();
15988 if (!N0.hasOneUse())
15989 return SDValue();
15990
15991 AtomicSDNode *ALoad = cast<AtomicSDNode>(N0.getNode());
15993 return SDValue();
15994
15995 EVT LoadedVT = ALoad->getMemoryVT();
15996 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
15997 if (!MaskConst)
15998 return SDValue();
15999 uint64_t Mask = MaskConst->getZExtValue();
16000 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16001 if (Mask != ExpectedMask)
16002 return SDValue();
16003
16004 SDValue ZextLoad = DAG.getAtomicLoad(
16005 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16006 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16007 DCI.CombineTo(N, ZextLoad);
16008 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16010 return SDValue(N, 0);
16011}
16012
16013// Combines two comparison operation and logic operation to one selection
16014// operation(min, max) and logic operation. Returns new constructed Node if
16015// conditions for optimization are satisfied.
16018 const RISCVSubtarget &Subtarget) {
16019 SelectionDAG &DAG = DCI.DAG;
16020
16021 SDValue N0 = N->getOperand(0);
16022 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16023 // extending X. This is safe since we only need the LSB after the shift and
16024 // shift amounts larger than 31 would produce poison. If we wait until
16025 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16026 // to use a BEXT instruction.
16027 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16028 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16029 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16030 N0.hasOneUse()) {
16031 SDLoc DL(N);
16032 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16033 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16034 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16035 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16036 DAG.getConstant(1, DL, MVT::i64));
16037 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16038 }
16039
16040 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16041 return V;
16042
16043 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16044 return V;
16045 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16046 return V;
16047 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16048 return V;
16049
16050 if (DCI.isAfterLegalizeDAG())
16051 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16052 return V;
16053
16054 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16055 // (select lhs, rhs, cc, x, (and x, y))
16056 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16057}
16058
16059// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16060// FIXME: Generalize to other binary operators with same operand.
16062 SelectionDAG &DAG) {
16063 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16064
16065 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16066 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16067 !N0.hasOneUse() || !N1.hasOneUse())
16068 return SDValue();
16069
16070 // Should have the same condition.
16071 SDValue Cond = N0.getOperand(1);
16072 if (Cond != N1.getOperand(1))
16073 return SDValue();
16074
16075 SDValue TrueV = N0.getOperand(0);
16076 SDValue FalseV = N1.getOperand(0);
16077
16078 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16079 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16080 !isOneConstant(TrueV.getOperand(1)) ||
16081 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16082 return SDValue();
16083
16084 EVT VT = N->getValueType(0);
16085 SDLoc DL(N);
16086
16087 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16088 Cond);
16089 SDValue NewN1 =
16090 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16091 SDValue NewOr =
16092 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16093 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16094}
16095
16097 const RISCVSubtarget &Subtarget) {
16098 SelectionDAG &DAG = DCI.DAG;
16099
16100 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16101 return V;
16102 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16103 return V;
16104
16105 if (DCI.isAfterLegalizeDAG())
16106 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16107 return V;
16108
16109 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16110 // We may be able to pull a common operation out of the true and false value.
16111 SDValue N0 = N->getOperand(0);
16112 SDValue N1 = N->getOperand(1);
16113 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16114 return V;
16115 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16116 return V;
16117
16118 // fold (or (select cond, 0, y), x) ->
16119 // (select cond, x, (or x, y))
16120 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16121}
16122
16124 const RISCVSubtarget &Subtarget) {
16125 SDValue N0 = N->getOperand(0);
16126 SDValue N1 = N->getOperand(1);
16127
16128 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16129 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
16130 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16131 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16132 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16133 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16134 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16135 SDLoc DL(N);
16136 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16137 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16138 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16139 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
16140 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16141 }
16142
16143 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16144 // NOTE: Assumes ROL being legal means ROLW is legal.
16145 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16146 if (N0.getOpcode() == RISCVISD::SLLW &&
16148 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16149 SDLoc DL(N);
16150 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16151 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16152 }
16153
16154 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16155 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16156 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16157 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
16158 if (ConstN00 && CC == ISD::SETLT) {
16159 EVT VT = N0.getValueType();
16160 SDLoc DL(N0);
16161 const APInt &Imm = ConstN00->getAPIntValue();
16162 if ((Imm + 1).isSignedIntN(12))
16163 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16164 DAG.getConstant(Imm + 1, DL, VT), CC);
16165 }
16166 }
16167
16168 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16169 return V;
16170 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16171 return V;
16172
16173 // fold (xor (select cond, 0, y), x) ->
16174 // (select cond, x, (xor x, y))
16175 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16176}
16177
16178// Try to expand a multiply to a sequence of shifts and add/subs,
16179// for a machine without native mul instruction.
16181 uint64_t MulAmt) {
16182 SDLoc DL(N);
16183 EVT VT = N->getValueType(0);
16185
16186 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16187 SDValue N0 = N->getOperand(0);
16188
16189 // Find the Non-adjacent form of the multiplier.
16190 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16191 if (E & 1) {
16192 bool IsAdd = (E & 3) == 1;
16193 E -= IsAdd ? 1 : -1;
16194 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16195 DAG.getShiftAmountConstant(I, VT, DL));
16196 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16197 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16198 }
16199 }
16200
16201 return Result;
16202}
16203
16204// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16206 uint64_t MulAmt) {
16207 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16209 uint64_t ShiftAmt1;
16210 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16211 Op = ISD::SUB;
16212 ShiftAmt1 = MulAmt + MulAmtLowBit;
16213 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16214 Op = ISD::ADD;
16215 ShiftAmt1 = MulAmt - MulAmtLowBit;
16216 } else {
16217 return SDValue();
16218 }
16219 EVT VT = N->getValueType(0);
16220 SDLoc DL(N);
16221 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16222 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16223 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16224 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16225 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16226}
16227
16228// Try to expand a scalar multiply to a faster sequence.
16231 const RISCVSubtarget &Subtarget) {
16232
16233 EVT VT = N->getValueType(0);
16234
16235 // LI + MUL is usually smaller than the alternative sequence.
16237 return SDValue();
16238
16239 if (VT != Subtarget.getXLenVT())
16240 return SDValue();
16241
16242 bool ShouldExpandMul =
16243 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16244 !Subtarget.hasStdExtZmmul();
16245 if (!ShouldExpandMul)
16246 return SDValue();
16247
16248 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16249 if (!CNode)
16250 return SDValue();
16251 uint64_t MulAmt = CNode->getZExtValue();
16252
16253 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16254 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16255 return SDValue();
16256
16257 const bool HasShlAdd = Subtarget.hasStdExtZba() ||
16258 Subtarget.hasVendorXTHeadBa() ||
16259 Subtarget.hasVendorXAndesPerf();
16260
16261 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16262 // We're adding additional uses of X here, and in principle, we should be freezing
16263 // X before doing so. However, adding freeze here causes real regressions, and no
16264 // other target properly freezes X in these cases either.
16265 SDValue X = N->getOperand(0);
16266
16267 if (HasShlAdd) {
16268 for (uint64_t Divisor : {3, 5, 9}) {
16269 if (MulAmt % Divisor != 0)
16270 continue;
16271 uint64_t MulAmt2 = MulAmt / Divisor;
16272 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16273 if (isPowerOf2_64(MulAmt2)) {
16274 SDLoc DL(N);
16275 SDValue X = N->getOperand(0);
16276 // Put the shift first if we can fold a zext into the
16277 // shift forming a slli.uw.
16278 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16279 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16280 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16281 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16282 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16283 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16284 Shl);
16285 }
16286 // Otherwise, put rhe shl second so that it can fold with following
16287 // instructions (e.g. sext or add).
16288 SDValue Mul359 =
16289 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16290 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16291 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16292 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16293 }
16294
16295 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16296 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16297 SDLoc DL(N);
16298 SDValue Mul359 =
16299 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16300 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16301 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16302 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16303 Mul359);
16304 }
16305 }
16306
16307 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16308 // shXadd. First check if this a sum of two power of 2s because that's
16309 // easy. Then count how many zeros are up to the first bit.
16310 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16311 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16312 if (ScaleShift >= 1 && ScaleShift < 4) {
16313 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16314 SDLoc DL(N);
16315 SDValue Shift1 =
16316 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16317 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16318 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16319 }
16320 }
16321
16322 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16323 // This is the two instruction form, there are also three instruction
16324 // variants we could implement. e.g.
16325 // (2^(1,2,3) * 3,5,9 + 1) << C2
16326 // 2^(C1>3) * 3,5,9 +/- 1
16327 for (uint64_t Divisor : {3, 5, 9}) {
16328 uint64_t C = MulAmt - 1;
16329 if (C <= Divisor)
16330 continue;
16331 unsigned TZ = llvm::countr_zero(C);
16332 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16333 SDLoc DL(N);
16334 SDValue Mul359 =
16335 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16336 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16337 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16338 DAG.getConstant(TZ, DL, VT), X);
16339 }
16340 }
16341
16342 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16343 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16344 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16345 if (ScaleShift >= 1 && ScaleShift < 4) {
16346 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16347 SDLoc DL(N);
16348 SDValue Shift1 =
16349 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16350 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16351 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16352 DAG.getConstant(ScaleShift, DL, VT), X));
16353 }
16354 }
16355
16356 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16357 for (uint64_t Offset : {3, 5, 9}) {
16358 if (isPowerOf2_64(MulAmt + Offset)) {
16359 unsigned ShAmt = Log2_64(MulAmt + Offset);
16360 if (ShAmt >= VT.getSizeInBits())
16361 continue;
16362 SDLoc DL(N);
16363 SDValue Shift1 =
16364 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16365 SDValue Mul359 =
16366 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16367 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16368 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16369 }
16370 }
16371
16372 for (uint64_t Divisor : {3, 5, 9}) {
16373 if (MulAmt % Divisor != 0)
16374 continue;
16375 uint64_t MulAmt2 = MulAmt / Divisor;
16376 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16377 // of 25 which happen to be quite common.
16378 for (uint64_t Divisor2 : {3, 5, 9}) {
16379 if (MulAmt2 % Divisor2 != 0)
16380 continue;
16381 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16382 if (isPowerOf2_64(MulAmt3)) {
16383 SDLoc DL(N);
16384 SDValue Mul359A =
16385 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16386 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16387 SDValue Mul359B = DAG.getNode(
16388 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16389 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16390 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16391 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16392 }
16393 }
16394 }
16395 }
16396
16397 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16398 return V;
16399
16400 if (!Subtarget.hasStdExtZmmul())
16401 return expandMulToNAFSequence(N, DAG, MulAmt);
16402
16403 return SDValue();
16404}
16405
16406// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16407// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16408// Same for other equivalent types with other equivalent constants.
16410 EVT VT = N->getValueType(0);
16411 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16412
16413 // Do this for legal vectors unless they are i1 or i8 vectors.
16414 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16415 return SDValue();
16416
16417 if (N->getOperand(0).getOpcode() != ISD::AND ||
16418 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16419 return SDValue();
16420
16421 SDValue And = N->getOperand(0);
16422 SDValue Srl = And.getOperand(0);
16423
16424 APInt V1, V2, V3;
16425 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16426 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16428 return SDValue();
16429
16430 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16431 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16432 V3 != (HalfSize - 1))
16433 return SDValue();
16434
16435 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16436 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16437 VT.getVectorElementCount() * 2);
16438 SDLoc DL(N);
16439 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16440 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16441 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16442 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16443}
16444
16447 const RISCVSubtarget &Subtarget) {
16448 EVT VT = N->getValueType(0);
16449 if (!VT.isVector())
16450 return expandMul(N, DAG, DCI, Subtarget);
16451
16452 SDLoc DL(N);
16453 SDValue N0 = N->getOperand(0);
16454 SDValue N1 = N->getOperand(1);
16455 SDValue MulOper;
16456 unsigned AddSubOpc;
16457
16458 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16459 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16460 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16461 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16462 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16463 AddSubOpc = V->getOpcode();
16464 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16465 SDValue Opnd = V->getOperand(1);
16466 MulOper = V->getOperand(0);
16467 if (AddSubOpc == ISD::SUB)
16468 std::swap(Opnd, MulOper);
16469 if (isOneOrOneSplat(Opnd))
16470 return true;
16471 }
16472 return false;
16473 };
16474
16475 if (IsAddSubWith1(N0)) {
16476 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16477 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16478 }
16479
16480 if (IsAddSubWith1(N1)) {
16481 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16482 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16483 }
16484
16485 if (SDValue V = combineBinOpOfZExt(N, DAG))
16486 return V;
16487
16489 return V;
16490
16491 return SDValue();
16492}
16493
16494/// According to the property that indexed load/store instructions zero-extend
16495/// their indices, try to narrow the type of index operand.
16496static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16497 if (isIndexTypeSigned(IndexType))
16498 return false;
16499
16500 if (!N->hasOneUse())
16501 return false;
16502
16503 EVT VT = N.getValueType();
16504 SDLoc DL(N);
16505
16506 // In general, what we're doing here is seeing if we can sink a truncate to
16507 // a smaller element type into the expression tree building our index.
16508 // TODO: We can generalize this and handle a bunch more cases if useful.
16509
16510 // Narrow a buildvector to the narrowest element type. This requires less
16511 // work and less register pressure at high LMUL, and creates smaller constants
16512 // which may be cheaper to materialize.
16513 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16514 KnownBits Known = DAG.computeKnownBits(N);
16515 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16516 LLVMContext &C = *DAG.getContext();
16517 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16518 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16519 N = DAG.getNode(ISD::TRUNCATE, DL,
16520 VT.changeVectorElementType(ResultVT), N);
16521 return true;
16522 }
16523 }
16524
16525 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16526 if (N.getOpcode() != ISD::SHL)
16527 return false;
16528
16529 SDValue N0 = N.getOperand(0);
16530 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16531 N0.getOpcode() != RISCVISD::VZEXT_VL)
16532 return false;
16533 if (!N0->hasOneUse())
16534 return false;
16535
16536 APInt ShAmt;
16537 SDValue N1 = N.getOperand(1);
16538 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16539 return false;
16540
16541 SDValue Src = N0.getOperand(0);
16542 EVT SrcVT = Src.getValueType();
16543 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16544 unsigned ShAmtV = ShAmt.getZExtValue();
16545 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16546 NewElen = std::max(NewElen, 8U);
16547
16548 // Skip if NewElen is not narrower than the original extended type.
16549 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16550 return false;
16551
16552 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16553 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16554
16555 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16556 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16557 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16558 return true;
16559}
16560
16561/// Try to map an integer comparison with size > XLEN to vector instructions
16562/// before type legalization splits it up into chunks.
16563static SDValue
16565 const SDLoc &DL, SelectionDAG &DAG,
16566 const RISCVSubtarget &Subtarget) {
16567 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16568
16569 if (!Subtarget.hasVInstructions())
16570 return SDValue();
16571
16572 MVT XLenVT = Subtarget.getXLenVT();
16573 EVT OpVT = X.getValueType();
16574 // We're looking for an oversized integer equality comparison.
16575 if (!OpVT.isScalarInteger())
16576 return SDValue();
16577
16578 unsigned OpSize = OpVT.getSizeInBits();
16579 // The size should be larger than XLen and smaller than the maximum vector
16580 // size.
16581 if (OpSize <= Subtarget.getXLen() ||
16582 OpSize > Subtarget.getRealMinVLen() *
16584 return SDValue();
16585
16586 // Don't perform this combine if constructing the vector will be expensive.
16587 auto IsVectorBitCastCheap = [](SDValue X) {
16589 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16590 X.getOpcode() == ISD::LOAD;
16591 };
16592 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16593 return SDValue();
16594
16596 Attribute::NoImplicitFloat))
16597 return SDValue();
16598
16599 // Bail out for non-byte-sized types.
16600 if (!OpVT.isByteSized())
16601 return SDValue();
16602
16603 unsigned VecSize = OpSize / 8;
16604 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16605 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16606
16607 SDValue VecX = DAG.getBitcast(VecVT, X);
16608 SDValue VecY = DAG.getBitcast(VecVT, Y);
16609 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16610 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16611
16612 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16613 DAG.getCondCode(ISD::SETNE), Mask, VL);
16614 return DAG.getSetCC(DL, VT,
16615 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16616 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16617 VL),
16618 DAG.getConstant(0, DL, XLenVT), CC);
16619}
16620
16621// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16622// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16623// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
16624// can become a sext.w instead of a shift pair.
16627 const RISCVSubtarget &Subtarget) {
16628 SelectionDAG &DAG = DCI.DAG;
16629 SDLoc dl(N);
16630 SDValue N0 = N->getOperand(0);
16631 SDValue N1 = N->getOperand(1);
16632 EVT VT = N->getValueType(0);
16633 EVT OpVT = N0.getValueType();
16634
16635 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16636 // Looking for an equality compare.
16637 if (!isIntEqualitySetCC(Cond))
16638 return SDValue();
16639
16640 if (SDValue V =
16641 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16642 return V;
16643
16644 // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
16645 if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
16646 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16647 isa<ConstantSDNode>(N0.getOperand(1))) {
16648 const APInt &AndRHSC =
16649 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
16650 if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
16651 unsigned ShiftBits = AndRHSC.countr_zero();
16652 SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
16653 DAG.getConstant(ShiftBits, dl, VT));
16654 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16655 }
16656 }
16657
16658 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16659 return SDValue();
16660
16661 // RHS needs to be a constant.
16662 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16663 if (!N1C)
16664 return SDValue();
16665
16666 // LHS needs to be (and X, 0xffffffff).
16667 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16668 !isa<ConstantSDNode>(N0.getOperand(1)) ||
16669 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16670 return SDValue();
16671
16672 // Don't do this if the sign bit is provably zero, it will be turned back into
16673 // an AND.
16674 APInt SignMask = APInt::getOneBitSet(64, 31);
16675 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16676 return SDValue();
16677
16678 const APInt &C1 = N1C->getAPIntValue();
16679
16680 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16681 // to be equal.
16682 if (C1.getActiveBits() > 32)
16683 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16684
16685 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16686 N0.getOperand(0), DAG.getValueType(MVT::i32));
16687 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16688 dl, OpVT), Cond);
16689}
16690
16691static SDValue
16693 const RISCVSubtarget &Subtarget) {
16694 SelectionDAG &DAG = DCI.DAG;
16695 SDValue Src = N->getOperand(0);
16696 EVT VT = N->getValueType(0);
16697 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16698 unsigned Opc = Src.getOpcode();
16699 SDLoc DL(N);
16700
16701 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16702 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16703 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16704 Subtarget.hasStdExtZfhmin())
16705 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16706
16707 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16708 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16709 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16710 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16711 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16712 Src.getOperand(1));
16713
16714 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16715 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16716 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16717
16718 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16719 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16720 isAllOnesConstant(Src.getOperand(1)) &&
16721 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16722 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16723 DAG.getAllOnesConstant(DL, VT));
16724
16725 return SDValue();
16726}
16727
16728namespace {
16729// Forward declaration of the structure holding the necessary information to
16730// apply a combine.
16731struct CombineResult;
16732
16733enum ExtKind : uint8_t {
16734 ZExt = 1 << 0,
16735 SExt = 1 << 1,
16736 FPExt = 1 << 2,
16737 BF16Ext = 1 << 3
16738};
16739/// Helper class for folding sign/zero extensions.
16740/// In particular, this class is used for the following combines:
16741/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16742/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16743/// mul | mul_vl -> vwmul(u) | vwmul_su
16744/// shl | shl_vl -> vwsll
16745/// fadd -> vfwadd | vfwadd_w
16746/// fsub -> vfwsub | vfwsub_w
16747/// fmul -> vfwmul
16748/// An object of this class represents an operand of the operation we want to
16749/// combine.
16750/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16751/// NodeExtensionHelper for `a` and one for `b`.
16752///
16753/// This class abstracts away how the extension is materialized and
16754/// how its number of users affect the combines.
16755///
16756/// In particular:
16757/// - VWADD_W is conceptually == add(op0, sext(op1))
16758/// - VWADDU_W == add(op0, zext(op1))
16759/// - VWSUB_W == sub(op0, sext(op1))
16760/// - VWSUBU_W == sub(op0, zext(op1))
16761/// - VFWADD_W == fadd(op0, fpext(op1))
16762/// - VFWSUB_W == fsub(op0, fpext(op1))
16763/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16764/// zext|sext(smaller_value).
16765struct NodeExtensionHelper {
16766 /// Records if this operand is like being zero extended.
16767 bool SupportsZExt;
16768 /// Records if this operand is like being sign extended.
16769 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16770 /// instance, a splat constant (e.g., 3), would support being both sign and
16771 /// zero extended.
16772 bool SupportsSExt;
16773 /// Records if this operand is like being floating point extended.
16774 bool SupportsFPExt;
16775 /// Records if this operand is extended from bf16.
16776 bool SupportsBF16Ext;
16777 /// This boolean captures whether we care if this operand would still be
16778 /// around after the folding happens.
16779 bool EnforceOneUse;
16780 /// Original value that this NodeExtensionHelper represents.
16781 SDValue OrigOperand;
16782
16783 /// Get the value feeding the extension or the value itself.
16784 /// E.g., for zext(a), this would return a.
16785 SDValue getSource() const {
16786 switch (OrigOperand.getOpcode()) {
16787 case ISD::ZERO_EXTEND:
16788 case ISD::SIGN_EXTEND:
16789 case RISCVISD::VSEXT_VL:
16790 case RISCVISD::VZEXT_VL:
16791 case RISCVISD::FP_EXTEND_VL:
16792 return OrigOperand.getOperand(0);
16793 default:
16794 return OrigOperand;
16795 }
16796 }
16797
16798 /// Check if this instance represents a splat.
16799 bool isSplat() const {
16800 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16801 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16802 }
16803
16804 /// Get the extended opcode.
16805 unsigned getExtOpc(ExtKind SupportsExt) const {
16806 switch (SupportsExt) {
16807 case ExtKind::SExt:
16808 return RISCVISD::VSEXT_VL;
16809 case ExtKind::ZExt:
16810 return RISCVISD::VZEXT_VL;
16811 case ExtKind::FPExt:
16812 case ExtKind::BF16Ext:
16813 return RISCVISD::FP_EXTEND_VL;
16814 }
16815 llvm_unreachable("Unknown ExtKind enum");
16816 }
16817
16818 /// Get or create a value that can feed \p Root with the given extension \p
16819 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16820 /// operand. \see ::getSource().
16821 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16822 const RISCVSubtarget &Subtarget,
16823 std::optional<ExtKind> SupportsExt) const {
16824 if (!SupportsExt.has_value())
16825 return OrigOperand;
16826
16827 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16828
16829 SDValue Source = getSource();
16830 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16831 if (Source.getValueType() == NarrowVT)
16832 return Source;
16833
16834 unsigned ExtOpc = getExtOpc(*SupportsExt);
16835
16836 // If we need an extension, we should be changing the type.
16837 SDLoc DL(OrigOperand);
16838 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16839 switch (OrigOperand.getOpcode()) {
16840 case ISD::ZERO_EXTEND:
16841 case ISD::SIGN_EXTEND:
16842 case RISCVISD::VSEXT_VL:
16843 case RISCVISD::VZEXT_VL:
16844 case RISCVISD::FP_EXTEND_VL:
16845 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
16846 case ISD::SPLAT_VECTOR:
16847 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
16848 case RISCVISD::VMV_V_X_VL:
16849 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
16850 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
16851 case RISCVISD::VFMV_V_F_VL:
16852 Source = Source.getOperand(1);
16853 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
16854 Source = Source.getOperand(0);
16855 assert(Source.getValueType() == NarrowVT.getVectorElementType());
16856 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
16857 DAG.getUNDEF(NarrowVT), Source, VL);
16858 default:
16859 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
16860 // and that operand should already have the right NarrowVT so no
16861 // extension should be required at this point.
16862 llvm_unreachable("Unsupported opcode");
16863 }
16864 }
16865
16866 /// Helper function to get the narrow type for \p Root.
16867 /// The narrow type is the type of \p Root where we divided the size of each
16868 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
16869 /// \pre Both the narrow type and the original type should be legal.
16870 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
16871 MVT VT = Root->getSimpleValueType(0);
16872
16873 // Determine the narrow size.
16874 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
16875
16876 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
16877 : SupportsExt == ExtKind::FPExt
16878 ? MVT::getFloatingPointVT(NarrowSize)
16879 : MVT::getIntegerVT(NarrowSize);
16880
16881 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
16882 "Trying to extend something we can't represent");
16883 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
16884 return NarrowVT;
16885 }
16886
16887 /// Get the opcode to materialize:
16888 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
16889 static unsigned getSExtOpcode(unsigned Opcode) {
16890 switch (Opcode) {
16891 case ISD::ADD:
16892 case RISCVISD::ADD_VL:
16893 case RISCVISD::VWADD_W_VL:
16894 case RISCVISD::VWADDU_W_VL:
16895 case ISD::OR:
16896 case RISCVISD::OR_VL:
16897 return RISCVISD::VWADD_VL;
16898 case ISD::SUB:
16899 case RISCVISD::SUB_VL:
16900 case RISCVISD::VWSUB_W_VL:
16901 case RISCVISD::VWSUBU_W_VL:
16902 return RISCVISD::VWSUB_VL;
16903 case ISD::MUL:
16904 case RISCVISD::MUL_VL:
16905 return RISCVISD::VWMUL_VL;
16906 default:
16907 llvm_unreachable("Unexpected opcode");
16908 }
16909 }
16910
16911 /// Get the opcode to materialize:
16912 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
16913 static unsigned getZExtOpcode(unsigned Opcode) {
16914 switch (Opcode) {
16915 case ISD::ADD:
16916 case RISCVISD::ADD_VL:
16917 case RISCVISD::VWADD_W_VL:
16918 case RISCVISD::VWADDU_W_VL:
16919 case ISD::OR:
16920 case RISCVISD::OR_VL:
16921 return RISCVISD::VWADDU_VL;
16922 case ISD::SUB:
16923 case RISCVISD::SUB_VL:
16924 case RISCVISD::VWSUB_W_VL:
16925 case RISCVISD::VWSUBU_W_VL:
16926 return RISCVISD::VWSUBU_VL;
16927 case ISD::MUL:
16928 case RISCVISD::MUL_VL:
16929 return RISCVISD::VWMULU_VL;
16930 case ISD::SHL:
16931 case RISCVISD::SHL_VL:
16932 return RISCVISD::VWSLL_VL;
16933 default:
16934 llvm_unreachable("Unexpected opcode");
16935 }
16936 }
16937
16938 /// Get the opcode to materialize:
16939 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
16940 static unsigned getFPExtOpcode(unsigned Opcode) {
16941 switch (Opcode) {
16942 case RISCVISD::FADD_VL:
16943 case RISCVISD::VFWADD_W_VL:
16944 return RISCVISD::VFWADD_VL;
16945 case RISCVISD::FSUB_VL:
16946 case RISCVISD::VFWSUB_W_VL:
16947 return RISCVISD::VFWSUB_VL;
16948 case RISCVISD::FMUL_VL:
16949 return RISCVISD::VFWMUL_VL;
16950 case RISCVISD::VFMADD_VL:
16951 return RISCVISD::VFWMADD_VL;
16952 case RISCVISD::VFMSUB_VL:
16953 return RISCVISD::VFWMSUB_VL;
16954 case RISCVISD::VFNMADD_VL:
16955 return RISCVISD::VFWNMADD_VL;
16956 case RISCVISD::VFNMSUB_VL:
16957 return RISCVISD::VFWNMSUB_VL;
16958 default:
16959 llvm_unreachable("Unexpected opcode");
16960 }
16961 }
16962
16963 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
16964 /// newOpcode(a, b).
16965 static unsigned getSUOpcode(unsigned Opcode) {
16966 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
16967 "SU is only supported for MUL");
16968 return RISCVISD::VWMULSU_VL;
16969 }
16970
16971 /// Get the opcode to materialize
16972 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
16973 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
16974 switch (Opcode) {
16975 case ISD::ADD:
16976 case RISCVISD::ADD_VL:
16977 case ISD::OR:
16978 case RISCVISD::OR_VL:
16979 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
16980 : RISCVISD::VWADDU_W_VL;
16981 case ISD::SUB:
16982 case RISCVISD::SUB_VL:
16983 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
16984 : RISCVISD::VWSUBU_W_VL;
16985 case RISCVISD::FADD_VL:
16986 return RISCVISD::VFWADD_W_VL;
16987 case RISCVISD::FSUB_VL:
16988 return RISCVISD::VFWSUB_W_VL;
16989 default:
16990 llvm_unreachable("Unexpected opcode");
16991 }
16992 }
16993
16994 using CombineToTry = std::function<std::optional<CombineResult>(
16995 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
16996 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
16997 const RISCVSubtarget &)>;
16998
16999 /// Check if this node needs to be fully folded or extended for all users.
17000 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17001
17002 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17003 const RISCVSubtarget &Subtarget) {
17004 unsigned Opc = OrigOperand.getOpcode();
17005 MVT VT = OrigOperand.getSimpleValueType();
17006
17007 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17008 "Unexpected Opcode");
17009
17010 // The pasthru must be undef for tail agnostic.
17011 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17012 return;
17013
17014 // Get the scalar value.
17015 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17016 : OrigOperand.getOperand(1);
17017
17018 // See if we have enough sign bits or zero bits in the scalar to use a
17019 // widening opcode by splatting to smaller element size.
17020 unsigned EltBits = VT.getScalarSizeInBits();
17021 unsigned ScalarBits = Op.getValueSizeInBits();
17022 // If we're not getting all bits from the element, we need special handling.
17023 if (ScalarBits < EltBits) {
17024 // This should only occur on RV32.
17025 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17026 !Subtarget.is64Bit() && "Unexpected splat");
17027 // vmv.v.x sign extends narrow inputs.
17028 SupportsSExt = true;
17029
17030 // If the input is positive, then sign extend is also zero extend.
17031 if (DAG.SignBitIsZero(Op))
17032 SupportsZExt = true;
17033
17034 EnforceOneUse = false;
17035 return;
17036 }
17037
17038 unsigned NarrowSize = EltBits / 2;
17039 // If the narrow type cannot be expressed with a legal VMV,
17040 // this is not a valid candidate.
17041 if (NarrowSize < 8)
17042 return;
17043
17044 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17045 SupportsSExt = true;
17046
17047 if (DAG.MaskedValueIsZero(Op,
17048 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17049 SupportsZExt = true;
17050
17051 EnforceOneUse = false;
17052 }
17053
17054 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17055 return (NarrowEltVT == MVT::f32 ||
17056 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17057 }
17058
17059 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17060 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17061 }
17062
17063 /// Helper method to set the various fields of this struct based on the
17064 /// type of \p Root.
17065 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17066 const RISCVSubtarget &Subtarget) {
17067 SupportsZExt = false;
17068 SupportsSExt = false;
17069 SupportsFPExt = false;
17070 SupportsBF16Ext = false;
17071 EnforceOneUse = true;
17072 unsigned Opc = OrigOperand.getOpcode();
17073 // For the nodes we handle below, we end up using their inputs directly: see
17074 // getSource(). However since they either don't have a passthru or we check
17075 // that their passthru is undef, we can safely ignore their mask and VL.
17076 switch (Opc) {
17077 case ISD::ZERO_EXTEND:
17078 case ISD::SIGN_EXTEND: {
17079 MVT VT = OrigOperand.getSimpleValueType();
17080 if (!VT.isVector())
17081 break;
17082
17083 SDValue NarrowElt = OrigOperand.getOperand(0);
17084 MVT NarrowVT = NarrowElt.getSimpleValueType();
17085 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17086 if (NarrowVT.getVectorElementType() == MVT::i1)
17087 break;
17088
17089 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17090 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17091 break;
17092 }
17093 case RISCVISD::VZEXT_VL:
17094 SupportsZExt = true;
17095 break;
17096 case RISCVISD::VSEXT_VL:
17097 SupportsSExt = true;
17098 break;
17099 case RISCVISD::FP_EXTEND_VL: {
17100 MVT NarrowEltVT =
17102 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17103 SupportsFPExt = true;
17104 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17105 SupportsBF16Ext = true;
17106
17107 break;
17108 }
17109 case ISD::SPLAT_VECTOR:
17110 case RISCVISD::VMV_V_X_VL:
17111 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17112 break;
17113 case RISCVISD::VFMV_V_F_VL: {
17114 MVT VT = OrigOperand.getSimpleValueType();
17115
17116 if (!OrigOperand.getOperand(0).isUndef())
17117 break;
17118
17119 SDValue Op = OrigOperand.getOperand(1);
17120 if (Op.getOpcode() != ISD::FP_EXTEND)
17121 break;
17122
17123 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17124 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17125 if (NarrowSize != ScalarBits)
17126 break;
17127
17128 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17129 SupportsFPExt = true;
17130 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17131 Subtarget))
17132 SupportsBF16Ext = true;
17133 break;
17134 }
17135 default:
17136 break;
17137 }
17138 }
17139
17140 /// Check if \p Root supports any extension folding combines.
17141 static bool isSupportedRoot(const SDNode *Root,
17142 const RISCVSubtarget &Subtarget) {
17143 switch (Root->getOpcode()) {
17144 case ISD::ADD:
17145 case ISD::SUB:
17146 case ISD::MUL: {
17147 return Root->getValueType(0).isScalableVector();
17148 }
17149 case ISD::OR: {
17150 return Root->getValueType(0).isScalableVector() &&
17151 Root->getFlags().hasDisjoint();
17152 }
17153 // Vector Widening Integer Add/Sub/Mul Instructions
17154 case RISCVISD::ADD_VL:
17155 case RISCVISD::MUL_VL:
17156 case RISCVISD::VWADD_W_VL:
17157 case RISCVISD::VWADDU_W_VL:
17158 case RISCVISD::SUB_VL:
17159 case RISCVISD::VWSUB_W_VL:
17160 case RISCVISD::VWSUBU_W_VL:
17161 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17162 case RISCVISD::FADD_VL:
17163 case RISCVISD::FSUB_VL:
17164 case RISCVISD::FMUL_VL:
17165 case RISCVISD::VFWADD_W_VL:
17166 case RISCVISD::VFWSUB_W_VL:
17167 return true;
17168 case RISCVISD::OR_VL:
17169 return Root->getFlags().hasDisjoint();
17170 case ISD::SHL:
17171 return Root->getValueType(0).isScalableVector() &&
17172 Subtarget.hasStdExtZvbb();
17173 case RISCVISD::SHL_VL:
17174 return Subtarget.hasStdExtZvbb();
17175 case RISCVISD::VFMADD_VL:
17176 case RISCVISD::VFNMSUB_VL:
17177 case RISCVISD::VFNMADD_VL:
17178 case RISCVISD::VFMSUB_VL:
17179 return true;
17180 default:
17181 return false;
17182 }
17183 }
17184
17185 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17186 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17187 const RISCVSubtarget &Subtarget) {
17188 assert(isSupportedRoot(Root, Subtarget) &&
17189 "Trying to build an helper with an "
17190 "unsupported root");
17191 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17193 OrigOperand = Root->getOperand(OperandIdx);
17194
17195 unsigned Opc = Root->getOpcode();
17196 switch (Opc) {
17197 // We consider
17198 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17199 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17200 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17201 case RISCVISD::VWADD_W_VL:
17202 case RISCVISD::VWADDU_W_VL:
17203 case RISCVISD::VWSUB_W_VL:
17204 case RISCVISD::VWSUBU_W_VL:
17205 case RISCVISD::VFWADD_W_VL:
17206 case RISCVISD::VFWSUB_W_VL:
17207 if (OperandIdx == 1) {
17208 SupportsZExt =
17209 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
17210 SupportsSExt =
17211 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
17212 SupportsFPExt =
17213 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
17214 // There's no existing extension here, so we don't have to worry about
17215 // making sure it gets removed.
17216 EnforceOneUse = false;
17217 break;
17218 }
17219 [[fallthrough]];
17220 default:
17221 fillUpExtensionSupport(Root, DAG, Subtarget);
17222 break;
17223 }
17224 }
17225
17226 /// Helper function to get the Mask and VL from \p Root.
17227 static std::pair<SDValue, SDValue>
17228 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17229 const RISCVSubtarget &Subtarget) {
17230 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17231 switch (Root->getOpcode()) {
17232 case ISD::ADD:
17233 case ISD::SUB:
17234 case ISD::MUL:
17235 case ISD::OR:
17236 case ISD::SHL: {
17237 SDLoc DL(Root);
17238 MVT VT = Root->getSimpleValueType(0);
17239 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17240 }
17241 default:
17242 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17243 }
17244 }
17245
17246 /// Helper function to check if \p N is commutative with respect to the
17247 /// foldings that are supported by this class.
17248 static bool isCommutative(const SDNode *N) {
17249 switch (N->getOpcode()) {
17250 case ISD::ADD:
17251 case ISD::MUL:
17252 case ISD::OR:
17253 case RISCVISD::ADD_VL:
17254 case RISCVISD::MUL_VL:
17255 case RISCVISD::OR_VL:
17256 case RISCVISD::VWADD_W_VL:
17257 case RISCVISD::VWADDU_W_VL:
17258 case RISCVISD::FADD_VL:
17259 case RISCVISD::FMUL_VL:
17260 case RISCVISD::VFWADD_W_VL:
17261 case RISCVISD::VFMADD_VL:
17262 case RISCVISD::VFNMSUB_VL:
17263 case RISCVISD::VFNMADD_VL:
17264 case RISCVISD::VFMSUB_VL:
17265 return true;
17266 case ISD::SUB:
17267 case RISCVISD::SUB_VL:
17268 case RISCVISD::VWSUB_W_VL:
17269 case RISCVISD::VWSUBU_W_VL:
17270 case RISCVISD::FSUB_VL:
17271 case RISCVISD::VFWSUB_W_VL:
17272 case ISD::SHL:
17273 case RISCVISD::SHL_VL:
17274 return false;
17275 default:
17276 llvm_unreachable("Unexpected opcode");
17277 }
17278 }
17279
17280 /// Get a list of combine to try for folding extensions in \p Root.
17281 /// Note that each returned CombineToTry function doesn't actually modify
17282 /// anything. Instead they produce an optional CombineResult that if not None,
17283 /// need to be materialized for the combine to be applied.
17284 /// \see CombineResult::materialize.
17285 /// If the related CombineToTry function returns std::nullopt, that means the
17286 /// combine didn't match.
17287 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17288};
17289
17290/// Helper structure that holds all the necessary information to materialize a
17291/// combine that does some extension folding.
17292struct CombineResult {
17293 /// Opcode to be generated when materializing the combine.
17294 unsigned TargetOpcode;
17295 // No value means no extension is needed.
17296 std::optional<ExtKind> LHSExt;
17297 std::optional<ExtKind> RHSExt;
17298 /// Root of the combine.
17299 SDNode *Root;
17300 /// LHS of the TargetOpcode.
17301 NodeExtensionHelper LHS;
17302 /// RHS of the TargetOpcode.
17303 NodeExtensionHelper RHS;
17304
17305 CombineResult(unsigned TargetOpcode, SDNode *Root,
17306 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17307 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17308 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17309 LHS(LHS), RHS(RHS) {}
17310
17311 /// Return a value that uses TargetOpcode and that can be used to replace
17312 /// Root.
17313 /// The actual replacement is *not* done in that method.
17314 SDValue materialize(SelectionDAG &DAG,
17315 const RISCVSubtarget &Subtarget) const {
17316 SDValue Mask, VL, Passthru;
17317 std::tie(Mask, VL) =
17318 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17319 switch (Root->getOpcode()) {
17320 default:
17321 Passthru = Root->getOperand(2);
17322 break;
17323 case ISD::ADD:
17324 case ISD::SUB:
17325 case ISD::MUL:
17326 case ISD::OR:
17327 case ISD::SHL:
17328 Passthru = DAG.getUNDEF(Root->getValueType(0));
17329 break;
17330 }
17331 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17332 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17333 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17334 Passthru, Mask, VL);
17335 }
17336};
17337
17338/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17339/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17340/// are zext) and LHS and RHS can be folded into Root.
17341/// AllowExtMask define which form `ext` can take in this pattern.
17342///
17343/// \note If the pattern can match with both zext and sext, the returned
17344/// CombineResult will feature the zext result.
17345///
17346/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17347/// can be used to apply the pattern.
17348static std::optional<CombineResult>
17349canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17350 const NodeExtensionHelper &RHS,
17351 uint8_t AllowExtMask, SelectionDAG &DAG,
17352 const RISCVSubtarget &Subtarget) {
17353 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17354 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17355 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17356 /*RHSExt=*/{ExtKind::ZExt});
17357 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17358 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17359 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17360 /*RHSExt=*/{ExtKind::SExt});
17361 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17362 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17363 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17364 /*RHSExt=*/{ExtKind::FPExt});
17365 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17366 RHS.SupportsBF16Ext)
17367 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17368 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17369 /*RHSExt=*/{ExtKind::BF16Ext});
17370 return std::nullopt;
17371}
17372
17373/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17374/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17375/// are zext) and LHS and RHS can be folded into Root.
17376///
17377/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17378/// can be used to apply the pattern.
17379static std::optional<CombineResult>
17380canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17381 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17382 const RISCVSubtarget &Subtarget) {
17383 return canFoldToVWWithSameExtensionImpl(
17384 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17385 Subtarget);
17386}
17387
17388/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17389///
17390/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17391/// can be used to apply the pattern.
17392static std::optional<CombineResult>
17393canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17394 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17395 const RISCVSubtarget &Subtarget) {
17396 if (RHS.SupportsFPExt)
17397 return CombineResult(
17398 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17399 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17400
17401 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17402 // sext/zext?
17403 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17404 // purposes.
17405 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17406 return CombineResult(
17407 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17408 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17409 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17410 return CombineResult(
17411 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17412 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17413 return std::nullopt;
17414}
17415
17416/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17417///
17418/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17419/// can be used to apply the pattern.
17420static std::optional<CombineResult>
17421canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17422 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17423 const RISCVSubtarget &Subtarget) {
17424 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
17425 Subtarget);
17426}
17427
17428/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17429///
17430/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17431/// can be used to apply the pattern.
17432static std::optional<CombineResult>
17433canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17434 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17435 const RISCVSubtarget &Subtarget) {
17436 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17437 Subtarget);
17438}
17439
17440/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17441///
17442/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17443/// can be used to apply the pattern.
17444static std::optional<CombineResult>
17445canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17446 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17447 const RISCVSubtarget &Subtarget) {
17448 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
17449 Subtarget);
17450}
17451
17452/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17453///
17454/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17455/// can be used to apply the pattern.
17456static std::optional<CombineResult>
17457canFoldToVWWithBF16EXT(SDNode *Root, const NodeExtensionHelper &LHS,
17458 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17459 const RISCVSubtarget &Subtarget) {
17460 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17461 Subtarget);
17462}
17463
17464/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17465///
17466/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17467/// can be used to apply the pattern.
17468static std::optional<CombineResult>
17469canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17470 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17471 const RISCVSubtarget &Subtarget) {
17472
17473 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17474 return std::nullopt;
17475 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17476 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17477 /*RHSExt=*/{ExtKind::ZExt});
17478}
17479
17481NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17482 SmallVector<CombineToTry> Strategies;
17483 switch (Root->getOpcode()) {
17484 case ISD::ADD:
17485 case ISD::SUB:
17486 case ISD::OR:
17487 case RISCVISD::ADD_VL:
17488 case RISCVISD::SUB_VL:
17489 case RISCVISD::OR_VL:
17490 case RISCVISD::FADD_VL:
17491 case RISCVISD::FSUB_VL:
17492 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17493 Strategies.push_back(canFoldToVWWithSameExtension);
17494 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17495 Strategies.push_back(canFoldToVW_W);
17496 break;
17497 case RISCVISD::FMUL_VL:
17498 case RISCVISD::VFMADD_VL:
17499 case RISCVISD::VFMSUB_VL:
17500 case RISCVISD::VFNMADD_VL:
17501 case RISCVISD::VFNMSUB_VL:
17502 Strategies.push_back(canFoldToVWWithSameExtension);
17503 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17504 Strategies.push_back(canFoldToVWWithBF16EXT);
17505 break;
17506 case ISD::MUL:
17507 case RISCVISD::MUL_VL:
17508 // mul -> vwmul(u)
17509 Strategies.push_back(canFoldToVWWithSameExtension);
17510 // mul -> vwmulsu
17511 Strategies.push_back(canFoldToVW_SU);
17512 break;
17513 case ISD::SHL:
17514 case RISCVISD::SHL_VL:
17515 // shl -> vwsll
17516 Strategies.push_back(canFoldToVWWithZEXT);
17517 break;
17518 case RISCVISD::VWADD_W_VL:
17519 case RISCVISD::VWSUB_W_VL:
17520 // vwadd_w|vwsub_w -> vwadd|vwsub
17521 Strategies.push_back(canFoldToVWWithSEXT);
17522 break;
17523 case RISCVISD::VWADDU_W_VL:
17524 case RISCVISD::VWSUBU_W_VL:
17525 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17526 Strategies.push_back(canFoldToVWWithZEXT);
17527 break;
17528 case RISCVISD::VFWADD_W_VL:
17529 case RISCVISD::VFWSUB_W_VL:
17530 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17531 Strategies.push_back(canFoldToVWWithFPEXT);
17532 break;
17533 default:
17534 llvm_unreachable("Unexpected opcode");
17535 }
17536 return Strategies;
17537}
17538} // End anonymous namespace.
17539
17541 // TODO: Extend this to other binops using generic identity logic
17542 assert(N->getOpcode() == RISCVISD::ADD_VL);
17543 SDValue A = N->getOperand(0);
17544 SDValue B = N->getOperand(1);
17545 SDValue Passthru = N->getOperand(2);
17546 if (!Passthru.isUndef())
17547 // TODO:This could be a vmerge instead
17548 return SDValue();
17549 ;
17551 return A;
17552 // Peek through fixed to scalable
17553 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17554 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17555 return A;
17556 return SDValue();
17557}
17558
17559/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17560/// The supported combines are:
17561/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17562/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17563/// mul | mul_vl -> vwmul(u) | vwmul_su
17564/// shl | shl_vl -> vwsll
17565/// fadd_vl -> vfwadd | vfwadd_w
17566/// fsub_vl -> vfwsub | vfwsub_w
17567/// fmul_vl -> vfwmul
17568/// vwadd_w(u) -> vwadd(u)
17569/// vwsub_w(u) -> vwsub(u)
17570/// vfwadd_w -> vfwadd
17571/// vfwsub_w -> vfwsub
17574 const RISCVSubtarget &Subtarget) {
17575 SelectionDAG &DAG = DCI.DAG;
17576 if (DCI.isBeforeLegalize())
17577 return SDValue();
17578
17579 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17580 return SDValue();
17581
17582 SmallVector<SDNode *> Worklist;
17583 SmallPtrSet<SDNode *, 8> Inserted;
17584 Worklist.push_back(N);
17585 Inserted.insert(N);
17586 SmallVector<CombineResult> CombinesToApply;
17587
17588 while (!Worklist.empty()) {
17589 SDNode *Root = Worklist.pop_back_val();
17590
17591 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17592 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17593 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17594 &Inserted](const NodeExtensionHelper &Op) {
17595 if (Op.needToPromoteOtherUsers()) {
17596 for (SDUse &Use : Op.OrigOperand->uses()) {
17597 SDNode *TheUser = Use.getUser();
17598 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17599 return false;
17600 // We only support the first 2 operands of FMA.
17601 if (Use.getOperandNo() >= 2)
17602 return false;
17603 if (Inserted.insert(TheUser).second)
17604 Worklist.push_back(TheUser);
17605 }
17606 }
17607 return true;
17608 };
17609
17610 // Control the compile time by limiting the number of node we look at in
17611 // total.
17612 if (Inserted.size() > ExtensionMaxWebSize)
17613 return SDValue();
17614
17616 NodeExtensionHelper::getSupportedFoldings(Root);
17617
17618 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17619 bool Matched = false;
17620 for (int Attempt = 0;
17621 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17622 ++Attempt) {
17623
17624 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17625 FoldingStrategies) {
17626 std::optional<CombineResult> Res =
17627 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17628 if (Res) {
17629 Matched = true;
17630 CombinesToApply.push_back(*Res);
17631 // All the inputs that are extended need to be folded, otherwise
17632 // we would be leaving the old input (since it is may still be used),
17633 // and the new one.
17634 if (Res->LHSExt.has_value())
17635 if (!AppendUsersIfNeeded(LHS))
17636 return SDValue();
17637 if (Res->RHSExt.has_value())
17638 if (!AppendUsersIfNeeded(RHS))
17639 return SDValue();
17640 break;
17641 }
17642 }
17643 std::swap(LHS, RHS);
17644 }
17645 // Right now we do an all or nothing approach.
17646 if (!Matched)
17647 return SDValue();
17648 }
17649 // Store the value for the replacement of the input node separately.
17650 SDValue InputRootReplacement;
17651 // We do the RAUW after we materialize all the combines, because some replaced
17652 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17653 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17654 // yet-to-be-visited CombinesToApply roots.
17656 ValuesToReplace.reserve(CombinesToApply.size());
17657 for (CombineResult Res : CombinesToApply) {
17658 SDValue NewValue = Res.materialize(DAG, Subtarget);
17659 if (!InputRootReplacement) {
17660 assert(Res.Root == N &&
17661 "First element is expected to be the current node");
17662 InputRootReplacement = NewValue;
17663 } else {
17664 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17665 }
17666 }
17667 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17668 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17669 DCI.AddToWorklist(OldNewValues.second.getNode());
17670 }
17671 return InputRootReplacement;
17672}
17673
17674// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17675// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17676// y will be the Passthru and cond will be the Mask.
17678 unsigned Opc = N->getOpcode();
17679 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17680 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17681
17682 SDValue Y = N->getOperand(0);
17683 SDValue MergeOp = N->getOperand(1);
17684 unsigned MergeOpc = MergeOp.getOpcode();
17685
17686 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17687 return SDValue();
17688
17689 SDValue X = MergeOp->getOperand(1);
17690
17691 if (!MergeOp.hasOneUse())
17692 return SDValue();
17693
17694 // Passthru should be undef
17695 SDValue Passthru = N->getOperand(2);
17696 if (!Passthru.isUndef())
17697 return SDValue();
17698
17699 // Mask should be all ones
17700 SDValue Mask = N->getOperand(3);
17701 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17702 return SDValue();
17703
17704 // False value of MergeOp should be all zeros
17705 SDValue Z = MergeOp->getOperand(2);
17706
17707 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17708 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17709 Z = Z.getOperand(1);
17710
17711 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17712 return SDValue();
17713
17714 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17715 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17716 N->getFlags());
17717}
17718
17721 const RISCVSubtarget &Subtarget) {
17722 [[maybe_unused]] unsigned Opc = N->getOpcode();
17723 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17724 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17725
17726 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17727 return V;
17728
17729 return combineVWADDSUBWSelect(N, DCI.DAG);
17730}
17731
17732// Helper function for performMemPairCombine.
17733// Try to combine the memory loads/stores LSNode1 and LSNode2
17734// into a single memory pair operation.
17736 LSBaseSDNode *LSNode2, SDValue BasePtr,
17737 uint64_t Imm) {
17739 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17740
17741 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17742 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17743 return SDValue();
17744
17746 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17747
17748 // The new operation has twice the width.
17749 MVT XLenVT = Subtarget.getXLenVT();
17750 EVT MemVT = LSNode1->getMemoryVT();
17751 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17752 MachineMemOperand *MMO = LSNode1->getMemOperand();
17754 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17755
17756 if (LSNode1->getOpcode() == ISD::LOAD) {
17757 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17758 unsigned Opcode;
17759 if (MemVT == MVT::i32)
17760 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17761 else
17762 Opcode = RISCVISD::TH_LDD;
17763
17764 SDValue Res = DAG.getMemIntrinsicNode(
17765 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17766 {LSNode1->getChain(), BasePtr,
17767 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17768 NewMemVT, NewMMO);
17769
17770 SDValue Node1 =
17771 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17772 SDValue Node2 =
17773 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17774
17775 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17776 return Node1;
17777 } else {
17778 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17779
17780 SDValue Res = DAG.getMemIntrinsicNode(
17781 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17782 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17783 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17784 NewMemVT, NewMMO);
17785
17786 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17787 return Res;
17788 }
17789}
17790
17791// Try to combine two adjacent loads/stores to a single pair instruction from
17792// the XTHeadMemPair vendor extension.
17795 SelectionDAG &DAG = DCI.DAG;
17797 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17798
17799 // Target does not support load/store pair.
17800 if (!Subtarget.hasVendorXTHeadMemPair())
17801 return SDValue();
17802
17803 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17804 EVT MemVT = LSNode1->getMemoryVT();
17805 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17806
17807 // No volatile, indexed or atomic loads/stores.
17808 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17809 return SDValue();
17810
17811 // Function to get a base + constant representation from a memory value.
17812 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17813 if (Ptr->getOpcode() == ISD::ADD)
17814 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17815 return {Ptr->getOperand(0), C1->getZExtValue()};
17816 return {Ptr, 0};
17817 };
17818
17819 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17820
17821 SDValue Chain = N->getOperand(0);
17822 for (SDUse &Use : Chain->uses()) {
17823 if (Use.getUser() != N && Use.getResNo() == 0 &&
17824 Use.getUser()->getOpcode() == N->getOpcode()) {
17825 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
17826
17827 // No volatile, indexed or atomic loads/stores.
17828 if (!LSNode2->isSimple() || LSNode2->isIndexed())
17829 continue;
17830
17831 // Check if LSNode1 and LSNode2 have the same type and extension.
17832 if (LSNode1->getOpcode() == ISD::LOAD)
17833 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
17834 cast<LoadSDNode>(LSNode1)->getExtensionType())
17835 continue;
17836
17837 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17838 continue;
17839
17840 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
17841
17842 // Check if the base pointer is the same for both instruction.
17843 if (Base1 != Base2)
17844 continue;
17845
17846 // Check if the offsets match the XTHeadMemPair encoding constraints.
17847 bool Valid = false;
17848 if (MemVT == MVT::i32) {
17849 // Check for adjacent i32 values and a 2-bit index.
17850 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
17851 Valid = true;
17852 } else if (MemVT == MVT::i64) {
17853 // Check for adjacent i64 values and a 2-bit index.
17854 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
17855 Valid = true;
17856 }
17857
17858 if (!Valid)
17859 continue;
17860
17861 // Try to combine.
17862 if (SDValue Res =
17863 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
17864 return Res;
17865 }
17866 }
17867
17868 return SDValue();
17869}
17870
17871// Fold
17872// (fp_to_int (froundeven X)) -> fcvt X, rne
17873// (fp_to_int (ftrunc X)) -> fcvt X, rtz
17874// (fp_to_int (ffloor X)) -> fcvt X, rdn
17875// (fp_to_int (fceil X)) -> fcvt X, rup
17876// (fp_to_int (fround X)) -> fcvt X, rmm
17877// (fp_to_int (frint X)) -> fcvt X
17880 const RISCVSubtarget &Subtarget) {
17881 SelectionDAG &DAG = DCI.DAG;
17882 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17883 MVT XLenVT = Subtarget.getXLenVT();
17884
17885 SDValue Src = N->getOperand(0);
17886
17887 // Don't do this for strict-fp Src.
17888 if (Src->isStrictFPOpcode())
17889 return SDValue();
17890
17891 // Ensure the FP type is legal.
17892 if (!TLI.isTypeLegal(Src.getValueType()))
17893 return SDValue();
17894
17895 // Don't do this for f16 with Zfhmin and not Zfh.
17896 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
17897 return SDValue();
17898
17899 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
17900 // If the result is invalid, we didn't find a foldable instruction.
17901 if (FRM == RISCVFPRndMode::Invalid)
17902 return SDValue();
17903
17904 SDLoc DL(N);
17905 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
17906 EVT VT = N->getValueType(0);
17907
17908 if (VT.isVector() && TLI.isTypeLegal(VT)) {
17909 MVT SrcVT = Src.getSimpleValueType();
17910 MVT SrcContainerVT = SrcVT;
17911 MVT ContainerVT = VT.getSimpleVT();
17912 SDValue XVal = Src.getOperand(0);
17913
17914 // For widening and narrowing conversions we just combine it into a
17915 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
17916 // end up getting lowered to their appropriate pseudo instructions based on
17917 // their operand types
17918 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
17919 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
17920 return SDValue();
17921
17922 // Make fixed-length vectors scalable first
17923 if (SrcVT.isFixedLengthVector()) {
17924 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
17925 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
17926 ContainerVT =
17927 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
17928 }
17929
17930 auto [Mask, VL] =
17931 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
17932
17933 SDValue FpToInt;
17934 if (FRM == RISCVFPRndMode::RTZ) {
17935 // Use the dedicated trunc static rounding mode if we're truncating so we
17936 // don't need to generate calls to fsrmi/fsrm
17937 unsigned Opc =
17938 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
17939 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
17940 } else {
17941 unsigned Opc =
17942 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
17943 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
17944 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
17945 }
17946
17947 // If converted from fixed-length to scalable, convert back
17948 if (VT.isFixedLengthVector())
17949 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
17950
17951 return FpToInt;
17952 }
17953
17954 // Only handle XLen or i32 types. Other types narrower than XLen will
17955 // eventually be legalized to XLenVT.
17956 if (VT != MVT::i32 && VT != XLenVT)
17957 return SDValue();
17958
17959 unsigned Opc;
17960 if (VT == XLenVT)
17961 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
17962 else
17963 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
17964
17965 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
17966 DAG.getTargetConstant(FRM, DL, XLenVT));
17967 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
17968}
17969
17970// Fold
17971// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
17972// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
17973// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
17974// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
17975// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
17976// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
17979 const RISCVSubtarget &Subtarget) {
17980 SelectionDAG &DAG = DCI.DAG;
17981 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17982 MVT XLenVT = Subtarget.getXLenVT();
17983
17984 // Only handle XLen types. Other types narrower than XLen will eventually be
17985 // legalized to XLenVT.
17986 EVT DstVT = N->getValueType(0);
17987 if (DstVT != XLenVT)
17988 return SDValue();
17989
17990 SDValue Src = N->getOperand(0);
17991
17992 // Don't do this for strict-fp Src.
17993 if (Src->isStrictFPOpcode())
17994 return SDValue();
17995
17996 // Ensure the FP type is also legal.
17997 if (!TLI.isTypeLegal(Src.getValueType()))
17998 return SDValue();
17999
18000 // Don't do this for f16 with Zfhmin and not Zfh.
18001 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18002 return SDValue();
18003
18004 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18005
18006 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18007 if (FRM == RISCVFPRndMode::Invalid)
18008 return SDValue();
18009
18010 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18011
18012 unsigned Opc;
18013 if (SatVT == DstVT)
18014 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18015 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18016 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18017 else
18018 return SDValue();
18019 // FIXME: Support other SatVTs by clamping before or after the conversion.
18020
18021 Src = Src.getOperand(0);
18022
18023 SDLoc DL(N);
18024 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18025 DAG.getTargetConstant(FRM, DL, XLenVT));
18026
18027 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18028 // extend.
18029 if (Opc == RISCVISD::FCVT_WU_RV64)
18030 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18031
18032 // RISC-V FP-to-int conversions saturate to the destination register size, but
18033 // don't produce 0 for nan.
18034 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18035 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18036}
18037
18038// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18039// smaller than XLenVT.
18041 const RISCVSubtarget &Subtarget) {
18042 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18043
18044 SDValue Src = N->getOperand(0);
18045 if (Src.getOpcode() != ISD::BSWAP)
18046 return SDValue();
18047
18048 EVT VT = N->getValueType(0);
18049 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18050 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
18051 return SDValue();
18052
18053 SDLoc DL(N);
18054 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18055}
18056
18058 const RISCVSubtarget &Subtarget) {
18059 // Fold:
18060 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18061
18062 // Check if its first operand is a vp.load.
18063 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18064 if (!VPLoad)
18065 return SDValue();
18066
18067 EVT LoadVT = VPLoad->getValueType(0);
18068 // We do not have a strided_load version for masks, and the evl of vp.reverse
18069 // and vp.load should always be the same.
18070 if (!LoadVT.getVectorElementType().isByteSized() ||
18071 N->getOperand(2) != VPLoad->getVectorLength() ||
18072 !N->getOperand(0).hasOneUse())
18073 return SDValue();
18074
18075 // Check if the mask of outer vp.reverse are all 1's.
18076 if (!isOneOrOneSplat(N->getOperand(1)))
18077 return SDValue();
18078
18079 SDValue LoadMask = VPLoad->getMask();
18080 // If Mask is all ones, then load is unmasked and can be reversed.
18081 if (!isOneOrOneSplat(LoadMask)) {
18082 // If the mask is not all ones, we can reverse the load if the mask was also
18083 // reversed by an unmasked vp.reverse with the same EVL.
18084 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18085 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18086 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18087 return SDValue();
18088 LoadMask = LoadMask.getOperand(0);
18089 }
18090
18091 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18092 SDLoc DL(N);
18093 MVT XLenVT = Subtarget.getXLenVT();
18094 SDValue NumElem = VPLoad->getVectorLength();
18095 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18096
18097 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18098 DAG.getConstant(1, DL, XLenVT));
18099 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18100 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18101 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18102 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18103
18105 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18107 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18108 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18109
18110 SDValue Ret = DAG.getStridedLoadVP(
18111 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18112 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18113
18114 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18115
18116 return Ret;
18117}
18118
18120 const RISCVSubtarget &Subtarget) {
18121 // Fold:
18122 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18123 // -1, MASK)
18124 auto *VPStore = cast<VPStoreSDNode>(N);
18125
18126 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18127 return SDValue();
18128
18129 SDValue VPReverse = VPStore->getValue();
18130 EVT ReverseVT = VPReverse->getValueType(0);
18131
18132 // We do not have a strided_store version for masks, and the evl of vp.reverse
18133 // and vp.store should always be the same.
18134 if (!ReverseVT.getVectorElementType().isByteSized() ||
18135 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18136 !VPReverse.hasOneUse())
18137 return SDValue();
18138
18139 SDValue StoreMask = VPStore->getMask();
18140 // If Mask is all ones, then load is unmasked and can be reversed.
18141 if (!isOneOrOneSplat(StoreMask)) {
18142 // If the mask is not all ones, we can reverse the store if the mask was
18143 // also reversed by an unmasked vp.reverse with the same EVL.
18144 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18145 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18146 StoreMask.getOperand(2) != VPStore->getVectorLength())
18147 return SDValue();
18148 StoreMask = StoreMask.getOperand(0);
18149 }
18150
18151 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18152 SDLoc DL(N);
18153 MVT XLenVT = Subtarget.getXLenVT();
18154 SDValue NumElem = VPStore->getVectorLength();
18155 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18156
18157 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18158 DAG.getConstant(1, DL, XLenVT));
18159 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18160 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18161 SDValue Base =
18162 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18163 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18164
18166 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18168 PtrInfo, VPStore->getMemOperand()->getFlags(),
18169 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18170
18171 return DAG.getStridedStoreVP(
18172 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18173 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18174 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18175 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18176}
18177
18178// Peephole avgceil pattern.
18179// %1 = zext <N x i8> %a to <N x i32>
18180// %2 = zext <N x i8> %b to <N x i32>
18181// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18182// %4 = add nuw nsw <N x i32> %3, %2
18183// %5 = lshr <N x i32> %4, splat (i32 1)
18184// %6 = trunc <N x i32> %5 to <N x i8>
18186 const RISCVSubtarget &Subtarget) {
18187 EVT VT = N->getValueType(0);
18188
18189 // Ignore fixed vectors.
18190 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18191 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18192 return SDValue();
18193
18194 SDValue In = N->getOperand(0);
18195 SDValue Mask = N->getOperand(1);
18196 SDValue VL = N->getOperand(2);
18197
18198 // Input should be a vp_srl with same mask and VL.
18199 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18200 In.getOperand(3) != VL)
18201 return SDValue();
18202
18203 // Shift amount should be 1.
18204 if (!isOneOrOneSplat(In.getOperand(1)))
18205 return SDValue();
18206
18207 // Shifted value should be a vp_add with same mask and VL.
18208 SDValue LHS = In.getOperand(0);
18209 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18210 LHS.getOperand(3) != VL)
18211 return SDValue();
18212
18213 SDValue Operands[3];
18214
18215 // Matches another VP_ADD with same VL and Mask.
18216 auto FindAdd = [&](SDValue V, SDValue Other) {
18217 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18218 V.getOperand(3) != VL)
18219 return false;
18220
18221 Operands[0] = Other;
18222 Operands[1] = V.getOperand(1);
18223 Operands[2] = V.getOperand(0);
18224 return true;
18225 };
18226
18227 // We need to find another VP_ADD in one of the operands.
18228 SDValue LHS0 = LHS.getOperand(0);
18229 SDValue LHS1 = LHS.getOperand(1);
18230 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18231 return SDValue();
18232
18233 // Now we have three operands of two additions. Check that one of them is a
18234 // constant vector with ones.
18235 auto I = llvm::find_if(Operands,
18236 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18237 if (I == std::end(Operands))
18238 return SDValue();
18239 // We found a vector with ones, move if it to the end of the Operands array.
18240 std::swap(*I, Operands[2]);
18241
18242 // Make sure the other 2 operands can be promoted from the result type.
18243 for (SDValue Op : drop_end(Operands)) {
18244 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18245 Op.getOperand(2) != VL)
18246 return SDValue();
18247 // Input must be the same size or smaller than our result.
18248 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18249 return SDValue();
18250 }
18251
18252 // Pattern is detected.
18253 // Rebuild the zero extends in case the inputs are smaller than our result.
18254 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18255 Operands[0].getOperand(0), Mask, VL);
18256 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18257 Operands[1].getOperand(0), Mask, VL);
18258 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18259 // mode.
18260 SDLoc DL(N);
18261 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18262 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18263}
18264
18265// Convert from one FMA opcode to another based on whether we are negating the
18266// multiply result and/or the accumulator.
18267// NOTE: Only supports RVV operations with VL.
18268static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18269 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18270 if (NegMul) {
18271 // clang-format off
18272 switch (Opcode) {
18273 default: llvm_unreachable("Unexpected opcode");
18274 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18275 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18276 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18277 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18278 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18279 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18280 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18281 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18282 }
18283 // clang-format on
18284 }
18285
18286 // Negating the accumulator changes ADD<->SUB.
18287 if (NegAcc) {
18288 // clang-format off
18289 switch (Opcode) {
18290 default: llvm_unreachable("Unexpected opcode");
18291 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18292 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18293 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18294 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18295 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18296 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18297 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18298 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18299 }
18300 // clang-format on
18301 }
18302
18303 return Opcode;
18304}
18305
18307 // Fold FNEG_VL into FMA opcodes.
18308 // The first operand of strict-fp is chain.
18309 bool IsStrict =
18310 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18311 unsigned Offset = IsStrict ? 1 : 0;
18312 SDValue A = N->getOperand(0 + Offset);
18313 SDValue B = N->getOperand(1 + Offset);
18314 SDValue C = N->getOperand(2 + Offset);
18315 SDValue Mask = N->getOperand(3 + Offset);
18316 SDValue VL = N->getOperand(4 + Offset);
18317
18318 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18319 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18320 V.getOperand(2) == VL) {
18321 // Return the negated input.
18322 V = V.getOperand(0);
18323 return true;
18324 }
18325
18326 return false;
18327 };
18328
18329 bool NegA = invertIfNegative(A);
18330 bool NegB = invertIfNegative(B);
18331 bool NegC = invertIfNegative(C);
18332
18333 // If no operands are negated, we're done.
18334 if (!NegA && !NegB && !NegC)
18335 return SDValue();
18336
18337 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18338 if (IsStrict)
18339 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18340 {N->getOperand(0), A, B, C, Mask, VL});
18341 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18342 VL);
18343}
18344
18347 const RISCVSubtarget &Subtarget) {
18348 SelectionDAG &DAG = DCI.DAG;
18349
18351 return V;
18352
18353 // FIXME: Ignore strict opcodes for now.
18354 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18355 return SDValue();
18356
18357 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18358}
18359
18361 const RISCVSubtarget &Subtarget) {
18362 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18363
18364 EVT VT = N->getValueType(0);
18365
18366 if (VT != Subtarget.getXLenVT())
18367 return SDValue();
18368
18369 if (!isa<ConstantSDNode>(N->getOperand(1)))
18370 return SDValue();
18371 uint64_t ShAmt = N->getConstantOperandVal(1);
18372
18373 SDValue N0 = N->getOperand(0);
18374
18375 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18376 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18377 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18378 unsigned ExtSize =
18379 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18380 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18381 N0.getOperand(0).hasOneUse() &&
18382 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
18383 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18384 if (LShAmt < ExtSize) {
18385 unsigned Size = VT.getSizeInBits();
18386 SDLoc ShlDL(N0.getOperand(0));
18387 SDValue Shl =
18388 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18389 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18390 SDLoc DL(N);
18391 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18392 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18393 }
18394 }
18395 }
18396
18397 if (ShAmt > 32 || VT != MVT::i64)
18398 return SDValue();
18399
18400 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18401 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18402 //
18403 // Also try these folds where an add or sub is in the middle.
18404 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18405 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18406 SDValue Shl;
18407 ConstantSDNode *AddC = nullptr;
18408
18409 // We might have an ADD or SUB between the SRA and SHL.
18410 bool IsAdd = N0.getOpcode() == ISD::ADD;
18411 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18412 // Other operand needs to be a constant we can modify.
18413 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18414 if (!AddC)
18415 return SDValue();
18416
18417 // AddC needs to have at least 32 trailing zeros.
18418 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18419 return SDValue();
18420
18421 // All users should be a shift by constant less than or equal to 32. This
18422 // ensures we'll do this optimization for each of them to produce an
18423 // add/sub+sext_inreg they can all share.
18424 for (SDNode *U : N0->users()) {
18425 if (U->getOpcode() != ISD::SRA ||
18426 !isa<ConstantSDNode>(U->getOperand(1)) ||
18427 U->getConstantOperandVal(1) > 32)
18428 return SDValue();
18429 }
18430
18431 Shl = N0.getOperand(IsAdd ? 0 : 1);
18432 } else {
18433 // Not an ADD or SUB.
18434 Shl = N0;
18435 }
18436
18437 // Look for a shift left by 32.
18438 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18439 Shl.getConstantOperandVal(1) != 32)
18440 return SDValue();
18441
18442 // We if we didn't look through an add/sub, then the shl should have one use.
18443 // If we did look through an add/sub, the sext_inreg we create is free so
18444 // we're only creating 2 new instructions. It's enough to only remove the
18445 // original sra+add/sub.
18446 if (!AddC && !Shl.hasOneUse())
18447 return SDValue();
18448
18449 SDLoc DL(N);
18450 SDValue In = Shl.getOperand(0);
18451
18452 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18453 // constant.
18454 if (AddC) {
18455 SDValue ShiftedAddC =
18456 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18457 if (IsAdd)
18458 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18459 else
18460 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18461 }
18462
18463 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18464 DAG.getValueType(MVT::i32));
18465 if (ShAmt == 32)
18466 return SExt;
18467
18468 return DAG.getNode(
18469 ISD::SHL, DL, MVT::i64, SExt,
18470 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18471}
18472
18473// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18474// the result is used as the condition of a br_cc or select_cc we can invert,
18475// inverting the setcc is free, and Z is 0/1. Caller will invert the
18476// br_cc/select_cc.
18478 bool IsAnd = Cond.getOpcode() == ISD::AND;
18479 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18480 return SDValue();
18481
18482 if (!Cond.hasOneUse())
18483 return SDValue();
18484
18485 SDValue Setcc = Cond.getOperand(0);
18486 SDValue Xor = Cond.getOperand(1);
18487 // Canonicalize setcc to LHS.
18488 if (Setcc.getOpcode() != ISD::SETCC)
18489 std::swap(Setcc, Xor);
18490 // LHS should be a setcc and RHS should be an xor.
18491 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18492 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18493 return SDValue();
18494
18495 // If the condition is an And, SimplifyDemandedBits may have changed
18496 // (xor Z, 1) to (not Z).
18497 SDValue Xor1 = Xor.getOperand(1);
18498 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18499 return SDValue();
18500
18501 EVT VT = Cond.getValueType();
18502 SDValue Xor0 = Xor.getOperand(0);
18503
18504 // The LHS of the xor needs to be 0/1.
18506 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18507 return SDValue();
18508
18509 // We can only invert integer setccs.
18510 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18511 if (!SetCCOpVT.isScalarInteger())
18512 return SDValue();
18513
18514 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18515 if (ISD::isIntEqualitySetCC(CCVal)) {
18516 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18517 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18518 Setcc.getOperand(1), CCVal);
18519 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18520 // Invert (setlt 0, X) by converting to (setlt X, 1).
18521 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18522 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18523 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18524 // (setlt X, 1) by converting to (setlt 0, X).
18525 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18526 DAG.getConstant(0, SDLoc(Setcc), VT),
18527 Setcc.getOperand(0), CCVal);
18528 } else
18529 return SDValue();
18530
18531 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18532 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18533}
18534
18535// Perform common combines for BR_CC and SELECT_CC conditions.
18536static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18537 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18538 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18539
18540 // As far as arithmetic right shift always saves the sign,
18541 // shift can be omitted.
18542 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18543 // setge (sra X, N), 0 -> setge X, 0
18544 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18545 LHS.getOpcode() == ISD::SRA) {
18546 LHS = LHS.getOperand(0);
18547 return true;
18548 }
18549
18550 if (!ISD::isIntEqualitySetCC(CCVal))
18551 return false;
18552
18553 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18554 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18555 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18556 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18557 // If we're looking for eq 0 instead of ne 0, we need to invert the
18558 // condition.
18559 bool Invert = CCVal == ISD::SETEQ;
18560 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18561 if (Invert)
18562 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18563
18564 RHS = LHS.getOperand(1);
18565 LHS = LHS.getOperand(0);
18566 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18567
18568 CC = DAG.getCondCode(CCVal);
18569 return true;
18570 }
18571
18572 // If XOR is reused and has an immediate that will fit in XORI,
18573 // do not fold.
18574 auto isXorImmediate = [](const SDValue &Op) -> bool {
18575 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18576 return isInt<12>(XorCnst->getSExtValue());
18577 return false;
18578 };
18579 // Fold (X(i1) ^ 1) == 0 -> X != 0
18580 auto singleBitOp = [&DAG](const SDValue &VarOp,
18581 const SDValue &ConstOp) -> bool {
18582 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18583 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18584 return (XorCnst->getSExtValue() == 1) &&
18585 DAG.MaskedValueIsZero(VarOp, Mask);
18586 }
18587 return false;
18588 };
18589 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18590 for (const SDNode *UserNode : Op->users()) {
18591 const unsigned Opcode = UserNode->getOpcode();
18592 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18593 return false;
18594 }
18595 return true;
18596 };
18597 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18598 const SDValue &LHS, const SDValue &RHS) -> bool {
18599 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18600 (!isXorImmediate(LHS.getOperand(1)) ||
18601 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18602 onlyUsedBySelectOrBR(LHS));
18603 };
18604 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18605 if (isFoldableXorEq(LHS, RHS)) {
18606 RHS = LHS.getOperand(1);
18607 LHS = LHS.getOperand(0);
18608 return true;
18609 }
18610 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18611 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18612 const SDValue LHS0 = LHS.getOperand(0);
18613 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18614 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18615 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18616 LHS0.getOperand(1), LHS.getOperand(1));
18617 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18618 LHS0.getOperand(0), LHS.getOperand(1));
18619 return true;
18620 }
18621 }
18622
18623 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18624 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18625 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18626 SDValue LHS0 = LHS.getOperand(0);
18627 if (LHS0.getOpcode() == ISD::AND &&
18628 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18629 uint64_t Mask = LHS0.getConstantOperandVal(1);
18630 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18631 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18632 // XAndesPerf supports branch on test bit.
18633 if (Subtarget.hasVendorXAndesPerf()) {
18634 LHS =
18635 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18636 DAG.getConstant(Mask, DL, LHS.getValueType()));
18637 return true;
18638 }
18639
18640 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18641 CC = DAG.getCondCode(CCVal);
18642
18643 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18644 LHS = LHS0.getOperand(0);
18645 if (ShAmt != 0)
18646 LHS =
18647 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18648 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18649 return true;
18650 }
18651 }
18652 }
18653
18654 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18655 // This can occur when legalizing some floating point comparisons.
18656 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18657 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18658 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18659 CC = DAG.getCondCode(CCVal);
18660 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18661 return true;
18662 }
18663
18664 if (isNullConstant(RHS)) {
18665 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18666 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18667 CC = DAG.getCondCode(CCVal);
18668 LHS = NewCond;
18669 return true;
18670 }
18671 }
18672
18673 return false;
18674}
18675
18676// Fold
18677// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18678// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18679// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18680// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18681// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18682// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18684 SDValue TrueVal, SDValue FalseVal,
18685 bool Swapped) {
18686 bool Commutative = true;
18687 unsigned Opc = TrueVal.getOpcode();
18688 switch (Opc) {
18689 default:
18690 return SDValue();
18691 case ISD::SHL:
18692 case ISD::SRA:
18693 case ISD::SRL:
18694 case ISD::SUB:
18695 case ISD::ROTL:
18696 case ISD::ROTR:
18697 Commutative = false;
18698 break;
18699 case ISD::ADD:
18700 case ISD::OR:
18701 case ISD::XOR:
18702 break;
18703 }
18704
18705 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
18706 return SDValue();
18707
18708 unsigned OpToFold;
18709 if (FalseVal == TrueVal.getOperand(0))
18710 OpToFold = 0;
18711 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18712 OpToFold = 1;
18713 else
18714 return SDValue();
18715
18716 EVT VT = N->getValueType(0);
18717 SDLoc DL(N);
18718 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18719 EVT OtherOpVT = OtherOp.getValueType();
18720 SDValue IdentityOperand =
18721 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18722 if (!Commutative)
18723 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18724 assert(IdentityOperand && "No identity operand!");
18725
18726 if (Swapped)
18727 std::swap(OtherOp, IdentityOperand);
18728 SDValue NewSel =
18729 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18730 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18731}
18732
18733// This tries to get rid of `select` and `icmp` that are being used to handle
18734// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18736 SDValue Cond = N->getOperand(0);
18737
18738 // This represents either CTTZ or CTLZ instruction.
18739 SDValue CountZeroes;
18740
18741 SDValue ValOnZero;
18742
18743 if (Cond.getOpcode() != ISD::SETCC)
18744 return SDValue();
18745
18746 if (!isNullConstant(Cond->getOperand(1)))
18747 return SDValue();
18748
18749 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18750 if (CCVal == ISD::CondCode::SETEQ) {
18751 CountZeroes = N->getOperand(2);
18752 ValOnZero = N->getOperand(1);
18753 } else if (CCVal == ISD::CondCode::SETNE) {
18754 CountZeroes = N->getOperand(1);
18755 ValOnZero = N->getOperand(2);
18756 } else {
18757 return SDValue();
18758 }
18759
18760 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18761 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18762 CountZeroes = CountZeroes.getOperand(0);
18763
18764 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18765 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18766 CountZeroes.getOpcode() != ISD::CTLZ &&
18767 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18768 return SDValue();
18769
18770 if (!isNullConstant(ValOnZero))
18771 return SDValue();
18772
18773 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18774 if (Cond->getOperand(0) != CountZeroesArgument)
18775 return SDValue();
18776
18777 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18778 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18779 CountZeroes.getValueType(), CountZeroesArgument);
18780 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18781 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18782 CountZeroes.getValueType(), CountZeroesArgument);
18783 }
18784
18785 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18786 SDValue BitWidthMinusOne =
18787 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18788
18789 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18790 CountZeroes, BitWidthMinusOne);
18791 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18792}
18793
18795 const RISCVSubtarget &Subtarget) {
18796 SDValue Cond = N->getOperand(0);
18797 SDValue True = N->getOperand(1);
18798 SDValue False = N->getOperand(2);
18799 SDLoc DL(N);
18800 EVT VT = N->getValueType(0);
18801 EVT CondVT = Cond.getValueType();
18802
18803 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18804 return SDValue();
18805
18806 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18807 // BEXTI, where C is power of 2.
18808 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
18809 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
18810 SDValue LHS = Cond.getOperand(0);
18811 SDValue RHS = Cond.getOperand(1);
18812 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18813 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18814 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18815 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18816 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18817 return DAG.getSelect(DL, VT,
18818 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18819 False, True);
18820 }
18821 }
18822 return SDValue();
18823}
18824
18825static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18826 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
18827 return false;
18828
18829 SwapCC = false;
18830 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18831 std::swap(TrueVal, FalseVal);
18832 SwapCC = true;
18833 }
18834
18835 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
18836 return false;
18837
18838 SDValue A = FalseVal.getOperand(0);
18839 SDValue B = FalseVal.getOperand(1);
18840 // Add is commutative, so check both orders
18841 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
18842 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
18843}
18844
18845/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18846/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18847/// register pressure over the add followed by masked vsub sequence.
18849 SDLoc DL(N);
18850 EVT VT = N->getValueType(0);
18851 SDValue CC = N->getOperand(0);
18852 SDValue TrueVal = N->getOperand(1);
18853 SDValue FalseVal = N->getOperand(2);
18854
18855 bool SwapCC;
18856 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
18857 return SDValue();
18858
18859 SDValue Sub = SwapCC ? TrueVal : FalseVal;
18860 SDValue A = Sub.getOperand(0);
18861 SDValue B = Sub.getOperand(1);
18862
18863 // Arrange the select such that we can match a masked
18864 // vrsub.vi to perform the conditional negate
18865 SDValue NegB = DAG.getNegative(B, DL, VT);
18866 if (!SwapCC)
18867 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
18868 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
18869 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
18870}
18871
18873 const RISCVSubtarget &Subtarget) {
18874 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
18875 return Folded;
18876
18877 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
18878 return V;
18879
18880 if (Subtarget.hasConditionalMoveFusion())
18881 return SDValue();
18882
18883 SDValue TrueVal = N->getOperand(1);
18884 SDValue FalseVal = N->getOperand(2);
18885 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
18886 return V;
18887 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
18888}
18889
18890/// If we have a build_vector where each lane is binop X, C, where C
18891/// is a constant (but not necessarily the same constant on all lanes),
18892/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
18893/// We assume that materializing a constant build vector will be no more
18894/// expensive that performing O(n) binops.
18896 const RISCVSubtarget &Subtarget,
18897 const RISCVTargetLowering &TLI) {
18898 SDLoc DL(N);
18899 EVT VT = N->getValueType(0);
18900
18901 assert(!VT.isScalableVector() && "unexpected build vector");
18902
18903 if (VT.getVectorNumElements() == 1)
18904 return SDValue();
18905
18906 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
18907 if (!TLI.isBinOp(Opcode))
18908 return SDValue();
18909
18910 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
18911 return SDValue();
18912
18913 // This BUILD_VECTOR involves an implicit truncation, and sinking
18914 // truncates through binops is non-trivial.
18915 if (N->op_begin()->getValueType() != VT.getVectorElementType())
18916 return SDValue();
18917
18918 SmallVector<SDValue> LHSOps;
18919 SmallVector<SDValue> RHSOps;
18920 for (SDValue Op : N->ops()) {
18921 if (Op.isUndef()) {
18922 // We can't form a divide or remainder from undef.
18923 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
18924 return SDValue();
18925
18926 LHSOps.push_back(Op);
18927 RHSOps.push_back(Op);
18928 continue;
18929 }
18930
18931 // TODO: We can handle operations which have an neutral rhs value
18932 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
18933 // of profit in a more explicit manner.
18934 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
18935 return SDValue();
18936
18937 LHSOps.push_back(Op.getOperand(0));
18938 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
18939 !isa<ConstantFPSDNode>(Op.getOperand(1)))
18940 return SDValue();
18941 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
18942 // have different LHS and RHS types.
18943 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
18944 return SDValue();
18945
18946 RHSOps.push_back(Op.getOperand(1));
18947 }
18948
18949 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
18950 DAG.getBuildVector(VT, DL, RHSOps));
18951}
18952
18954 ElementCount OpEC = OpVT.getVectorElementCount();
18955 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
18956 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
18957}
18958
18959/// Given fixed length vectors A and B with equal element types, but possibly
18960/// different number of elements, return A + B where either A or B is zero
18961/// padded to the larger number of elements.
18963 SelectionDAG &DAG) {
18964 // NOTE: Manually doing the extract/add/insert scheme produces
18965 // significantly better codegen than the naive pad with zeros
18966 // and add scheme.
18967 EVT AVT = A.getValueType();
18968 EVT BVT = B.getValueType();
18971 std::swap(A, B);
18972 std::swap(AVT, BVT);
18973 }
18974
18975 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
18976 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
18977 return DAG.getInsertSubvector(DL, B, Res, 0);
18978}
18979
18981 SelectionDAG &DAG,
18982 const RISCVSubtarget &Subtarget,
18983 const RISCVTargetLowering &TLI) {
18984 // Note: We intentionally do not check the legality of the reduction type.
18985 // We want to handle the m4/m8 *src* types, and thus need to let illegal
18986 // intermediate types flow through here.
18987 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
18989 return SDValue();
18990
18991 // Recurse through adds (since generic dag canonicalizes to that
18992 // form). TODO: Handle disjoint or here.
18993 if (InVec->getOpcode() == ISD::ADD) {
18994 SDValue A = InVec.getOperand(0);
18995 SDValue B = InVec.getOperand(1);
18996 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
18997 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
18998 if (AOpt || BOpt) {
18999 if (AOpt)
19000 A = AOpt;
19001 if (BOpt)
19002 B = BOpt;
19003 // From here, we're doing A + B with mixed types, implicitly zero
19004 // padded to the wider type. Note that we *don't* need the result
19005 // type to be the original VT, and in fact prefer narrower ones
19006 // if possible.
19007 return getZeroPaddedAdd(DL, A, B, DAG);
19008 }
19009 }
19010
19011 // zext a <--> partial_reduce_umla 0, a, 1
19012 // sext a <--> partial_reduce_smla 0, a, 1
19013 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19014 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19015 SDValue A = InVec.getOperand(0);
19016 EVT OpVT = A.getValueType();
19017 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19018 return SDValue();
19019
19020 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19021 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19022 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19023 unsigned Opc =
19025 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19026 }
19027
19028 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19029 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19030 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19031 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19032 if (InVec.getOpcode() != ISD::MUL)
19033 return SDValue();
19034
19035 SDValue A = InVec.getOperand(0);
19036 SDValue B = InVec.getOperand(1);
19037
19038 if (!ISD::isExtOpcode(A.getOpcode()))
19039 return SDValue();
19040
19041 EVT OpVT = A.getOperand(0).getValueType();
19042 if (OpVT.getVectorElementType() != MVT::i8 ||
19043 OpVT != B.getOperand(0).getValueType() ||
19044 !TLI.isTypeLegal(A.getValueType()))
19045 return SDValue();
19046
19047 unsigned Opc;
19048 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19050 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19051 B.getOpcode() == ISD::ZERO_EXTEND)
19053 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19054 B.getOpcode() == ISD::ZERO_EXTEND)
19056 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19057 B.getOpcode() == ISD::SIGN_EXTEND) {
19059 std::swap(A, B);
19060 } else
19061 return SDValue();
19062
19063 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19064 return DAG.getNode(
19065 Opc, DL, ResVT,
19066 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19067}
19068
19070 const RISCVSubtarget &Subtarget,
19071 const RISCVTargetLowering &TLI) {
19072 if (!Subtarget.hasStdExtZvqdotq())
19073 return SDValue();
19074
19075 SDLoc DL(N);
19076 EVT VT = N->getValueType(0);
19077 SDValue InVec = N->getOperand(0);
19078 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19079 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19080 return SDValue();
19081}
19082
19084 const RISCVSubtarget &Subtarget,
19085 const RISCVTargetLowering &TLI) {
19086 SDValue InVec = N->getOperand(0);
19087 SDValue InVal = N->getOperand(1);
19088 SDValue EltNo = N->getOperand(2);
19089 SDLoc DL(N);
19090
19091 EVT VT = InVec.getValueType();
19092 if (VT.isScalableVector())
19093 return SDValue();
19094
19095 if (!InVec.hasOneUse())
19096 return SDValue();
19097
19098 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19099 // move the insert_vector_elts into the arms of the binop. Note that
19100 // the new RHS must be a constant.
19101 const unsigned InVecOpcode = InVec->getOpcode();
19102 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19103 InVal.hasOneUse()) {
19104 SDValue InVecLHS = InVec->getOperand(0);
19105 SDValue InVecRHS = InVec->getOperand(1);
19106 SDValue InValLHS = InVal->getOperand(0);
19107 SDValue InValRHS = InVal->getOperand(1);
19108
19110 return SDValue();
19111 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19112 return SDValue();
19113 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19114 // have different LHS and RHS types.
19115 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19116 return SDValue();
19118 InVecLHS, InValLHS, EltNo);
19120 InVecRHS, InValRHS, EltNo);
19121 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19122 }
19123
19124 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19125 // move the insert_vector_elt to the source operand of the concat_vector.
19126 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19127 return SDValue();
19128
19129 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19130 if (!IndexC)
19131 return SDValue();
19132 unsigned Elt = IndexC->getZExtValue();
19133
19134 EVT ConcatVT = InVec.getOperand(0).getValueType();
19135 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19136 return SDValue();
19137 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19138 unsigned NewIdx = Elt % ConcatNumElts;
19139
19140 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19141 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19142 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19143
19144 SmallVector<SDValue> ConcatOps(InVec->ops());
19145 ConcatOps[ConcatOpIdx] = ConcatOp;
19146 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19147}
19148
19149// If we're concatenating a series of vector loads like
19150// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19151// Then we can turn this into a strided load by widening the vector elements
19152// vlse32 p, stride=n
19154 const RISCVSubtarget &Subtarget,
19155 const RISCVTargetLowering &TLI) {
19156 SDLoc DL(N);
19157 EVT VT = N->getValueType(0);
19158
19159 // Only perform this combine on legal MVTs.
19160 if (!TLI.isTypeLegal(VT))
19161 return SDValue();
19162
19163 // TODO: Potentially extend this to scalable vectors
19164 if (VT.isScalableVector())
19165 return SDValue();
19166
19167 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19168 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19169 !SDValue(BaseLd, 0).hasOneUse())
19170 return SDValue();
19171
19172 EVT BaseLdVT = BaseLd->getValueType(0);
19173
19174 // Go through the loads and check that they're strided
19176 Lds.push_back(BaseLd);
19177 Align Align = BaseLd->getAlign();
19178 for (SDValue Op : N->ops().drop_front()) {
19179 auto *Ld = dyn_cast<LoadSDNode>(Op);
19180 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19181 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19182 Ld->getValueType(0) != BaseLdVT)
19183 return SDValue();
19184
19185 Lds.push_back(Ld);
19186
19187 // The common alignment is the most restrictive (smallest) of all the loads
19188 Align = std::min(Align, Ld->getAlign());
19189 }
19190
19191 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19192 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19193 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19194 // If the load ptrs can be decomposed into a common (Base + Index) with a
19195 // common constant stride, then return the constant stride.
19196 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19197 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19198 if (BIO1.equalBaseIndex(BIO2, DAG))
19199 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19200
19201 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19202 SDValue P1 = Ld1->getBasePtr();
19203 SDValue P2 = Ld2->getBasePtr();
19204 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19205 return {{P2.getOperand(1), false}};
19206 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19207 return {{P1.getOperand(1), true}};
19208
19209 return std::nullopt;
19210 };
19211
19212 // Get the distance between the first and second loads
19213 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19214 if (!BaseDiff)
19215 return SDValue();
19216
19217 // Check all the loads are the same distance apart
19218 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19219 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19220 return SDValue();
19221
19222 // TODO: At this point, we've successfully matched a generalized gather
19223 // load. Maybe we should emit that, and then move the specialized
19224 // matchers above and below into a DAG combine?
19225
19226 // Get the widened scalar type, e.g. v4i8 -> i64
19227 unsigned WideScalarBitWidth =
19228 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19229 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19230
19231 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19232 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19233 if (!TLI.isTypeLegal(WideVecVT))
19234 return SDValue();
19235
19236 // Check that the operation is legal
19237 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19238 return SDValue();
19239
19240 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19241 SDValue Stride =
19242 std::holds_alternative<SDValue>(StrideVariant)
19243 ? std::get<SDValue>(StrideVariant)
19244 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19245 Lds[0]->getOffset().getValueType());
19246 if (MustNegateStride)
19247 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19248
19249 SDValue AllOneMask =
19250 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19251 DAG.getConstant(1, DL, MVT::i1));
19252
19253 uint64_t MemSize;
19254 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19255 ConstStride && ConstStride->getSExtValue() >= 0)
19256 // total size = (elsize * n) + (stride - elsize) * (n-1)
19257 // = elsize + stride * (n-1)
19258 MemSize = WideScalarVT.getSizeInBits() +
19259 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19260 else
19261 // If Stride isn't constant, then we can't know how much it will load
19263
19265 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19266 Align);
19267
19268 SDValue StridedLoad = DAG.getStridedLoadVP(
19269 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19270 AllOneMask,
19271 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19272
19273 for (SDValue Ld : N->ops())
19274 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19275
19276 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19277}
19278
19280 const RISCVSubtarget &Subtarget,
19281 const RISCVTargetLowering &TLI) {
19282 SDLoc DL(N);
19283 EVT VT = N->getValueType(0);
19284 const unsigned ElementSize = VT.getScalarSizeInBits();
19285 const unsigned NumElts = VT.getVectorNumElements();
19286 SDValue V1 = N->getOperand(0);
19287 SDValue V2 = N->getOperand(1);
19288 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19289 MVT XLenVT = Subtarget.getXLenVT();
19290
19291 // Recognized a disguised select of add/sub.
19292 bool SwapCC;
19293 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19294 matchSelectAddSub(V1, V2, SwapCC)) {
19295 SDValue Sub = SwapCC ? V1 : V2;
19296 SDValue A = Sub.getOperand(0);
19297 SDValue B = Sub.getOperand(1);
19298
19299 SmallVector<SDValue> MaskVals;
19300 for (int MaskIndex : Mask) {
19301 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19302 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19303 }
19304 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19305 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19306 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19307
19308 // Arrange the select such that we can match a masked
19309 // vrsub.vi to perform the conditional negate
19310 SDValue NegB = DAG.getNegative(B, DL, VT);
19311 if (!SwapCC)
19312 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19313 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19314 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19315 }
19316
19317 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19318 // during the combine phase before type legalization, and relies on
19319 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19320 // for the source mask.
19321 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19322 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19323 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19324 return SDValue();
19325
19326 SmallVector<int, 8> NewMask;
19327 narrowShuffleMaskElts(2, Mask, NewMask);
19328
19329 LLVMContext &C = *DAG.getContext();
19330 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19331 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19332 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19333 DAG.getBitcast(NewVT, V2), NewMask);
19334 return DAG.getBitcast(VT, Res);
19335}
19336
19338 const RISCVSubtarget &Subtarget) {
19339 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19340
19341 if (N->getValueType(0).isFixedLengthVector())
19342 return SDValue();
19343
19344 SDValue Addend = N->getOperand(0);
19345 SDValue MulOp = N->getOperand(1);
19346
19347 if (N->getOpcode() == RISCVISD::ADD_VL) {
19348 SDValue AddPassthruOp = N->getOperand(2);
19349 if (!AddPassthruOp.isUndef())
19350 return SDValue();
19351 }
19352
19353 auto IsVWMulOpc = [](unsigned Opc) {
19354 switch (Opc) {
19355 case RISCVISD::VWMUL_VL:
19356 case RISCVISD::VWMULU_VL:
19357 case RISCVISD::VWMULSU_VL:
19358 return true;
19359 default:
19360 return false;
19361 }
19362 };
19363
19364 if (!IsVWMulOpc(MulOp.getOpcode()))
19365 std::swap(Addend, MulOp);
19366
19367 if (!IsVWMulOpc(MulOp.getOpcode()))
19368 return SDValue();
19369
19370 SDValue MulPassthruOp = MulOp.getOperand(2);
19371
19372 if (!MulPassthruOp.isUndef())
19373 return SDValue();
19374
19375 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19376 const RISCVSubtarget &Subtarget) {
19377 if (N->getOpcode() == ISD::ADD) {
19378 SDLoc DL(N);
19379 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19380 Subtarget);
19381 }
19382 return std::make_pair(N->getOperand(3), N->getOperand(4));
19383 }(N, DAG, Subtarget);
19384
19385 SDValue MulMask = MulOp.getOperand(3);
19386 SDValue MulVL = MulOp.getOperand(4);
19387
19388 if (AddMask != MulMask || AddVL != MulVL)
19389 return SDValue();
19390
19391 const auto &TSInfo =
19392 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19393 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19394
19395 SDLoc DL(N);
19396 EVT VT = N->getValueType(0);
19397 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19398 AddVL};
19399 return DAG.getNode(Opc, DL, VT, Ops);
19400}
19401
19403 const RISCVSubtarget &Subtarget) {
19404
19405 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19406
19407 if (!N->getValueType(0).isVector())
19408 return SDValue();
19409
19410 SDValue Addend = N->getOperand(0);
19411 SDValue DotOp = N->getOperand(1);
19412
19413 if (N->getOpcode() == RISCVISD::ADD_VL) {
19414 SDValue AddPassthruOp = N->getOperand(2);
19415 if (!AddPassthruOp.isUndef())
19416 return SDValue();
19417 }
19418
19419 auto IsVqdotqOpc = [](unsigned Opc) {
19420 switch (Opc) {
19421 case RISCVISD::VQDOT_VL:
19422 case RISCVISD::VQDOTU_VL:
19423 case RISCVISD::VQDOTSU_VL:
19424 return true;
19425 default:
19426 return false;
19427 }
19428 };
19429
19430 if (!IsVqdotqOpc(DotOp.getOpcode()))
19431 std::swap(Addend, DotOp);
19432
19433 if (!IsVqdotqOpc(DotOp.getOpcode()))
19434 return SDValue();
19435
19436 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19437 const RISCVSubtarget &Subtarget) {
19438 if (N->getOpcode() == ISD::ADD) {
19439 SDLoc DL(N);
19440 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19441 Subtarget);
19442 }
19443 return std::make_pair(N->getOperand(3), N->getOperand(4));
19444 }(N, DAG, Subtarget);
19445
19446 SDValue MulVL = DotOp.getOperand(4);
19447 if (AddVL != MulVL)
19448 return SDValue();
19449
19450 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19451 AddMask.getOperand(0) != MulVL)
19452 return SDValue();
19453
19454 SDValue AccumOp = DotOp.getOperand(2);
19455 SDLoc DL(N);
19456 EVT VT = N->getValueType(0);
19457 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19458 DAG.getUNDEF(VT), AddMask, AddVL);
19459
19460 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19461 DotOp.getOperand(3), DotOp->getOperand(4)};
19462 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19463}
19464
19465static bool
19467 ISD::MemIndexType &IndexType,
19469 if (!DCI.isBeforeLegalize())
19470 return false;
19471
19472 SelectionDAG &DAG = DCI.DAG;
19473 const MVT XLenVT =
19474 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19475
19476 const EVT IndexVT = Index.getValueType();
19477
19478 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19479 // mode, so anything else must be manually legalized.
19480 if (!isIndexTypeSigned(IndexType))
19481 return false;
19482
19483 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19484 // Any index legalization should first promote to XLenVT, so we don't lose
19485 // bits when scaling. This may create an illegal index type so we let
19486 // LLVM's legalization take care of the splitting.
19487 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19488 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19489 IndexVT.changeVectorElementType(XLenVT), Index);
19490 }
19491 IndexType = ISD::UNSIGNED_SCALED;
19492 return true;
19493}
19494
19495/// Match the index vector of a scatter or gather node as the shuffle mask
19496/// which performs the rearrangement if possible. Will only match if
19497/// all lanes are touched, and thus replacing the scatter or gather with
19498/// a unit strided access and shuffle is legal.
19499static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19500 SmallVector<int> &ShuffleMask) {
19501 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19502 return false;
19503 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19504 return false;
19505
19506 const unsigned ElementSize = VT.getScalarStoreSize();
19507 const unsigned NumElems = VT.getVectorNumElements();
19508
19509 // Create the shuffle mask and check all bits active
19510 assert(ShuffleMask.empty());
19511 BitVector ActiveLanes(NumElems);
19512 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19513 // TODO: We've found an active bit of UB, and could be
19514 // more aggressive here if desired.
19515 if (Index->getOperand(i)->isUndef())
19516 return false;
19517 uint64_t C = Index->getConstantOperandVal(i);
19518 if (C % ElementSize != 0)
19519 return false;
19520 C = C / ElementSize;
19521 if (C >= NumElems)
19522 return false;
19523 ShuffleMask.push_back(C);
19524 ActiveLanes.set(C);
19525 }
19526 return ActiveLanes.all();
19527}
19528
19529/// Match the index of a gather or scatter operation as an operation
19530/// with twice the element width and half the number of elements. This is
19531/// generally profitable (if legal) because these operations are linear
19532/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19533/// come out ahead.
19534static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19535 Align BaseAlign, const RISCVSubtarget &ST) {
19536 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19537 return false;
19538 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19539 return false;
19540
19541 // Attempt a doubling. If we can use a element type 4x or 8x in
19542 // size, this will happen via multiply iterations of the transform.
19543 const unsigned NumElems = VT.getVectorNumElements();
19544 if (NumElems % 2 != 0)
19545 return false;
19546
19547 const unsigned ElementSize = VT.getScalarStoreSize();
19548 const unsigned WiderElementSize = ElementSize * 2;
19549 if (WiderElementSize > ST.getELen()/8)
19550 return false;
19551
19552 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19553 return false;
19554
19555 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19556 // TODO: We've found an active bit of UB, and could be
19557 // more aggressive here if desired.
19558 if (Index->getOperand(i)->isUndef())
19559 return false;
19560 // TODO: This offset check is too strict if we support fully
19561 // misaligned memory operations.
19562 uint64_t C = Index->getConstantOperandVal(i);
19563 if (i % 2 == 0) {
19564 if (C % WiderElementSize != 0)
19565 return false;
19566 continue;
19567 }
19568 uint64_t Last = Index->getConstantOperandVal(i-1);
19569 if (C != Last + ElementSize)
19570 return false;
19571 }
19572 return true;
19573}
19574
19575// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19576// This would be benefit for the cases where X and Y are both the same value
19577// type of low precision vectors. Since the truncate would be lowered into
19578// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19579// restriction, such pattern would be expanded into a series of "vsetvli"
19580// and "vnsrl" instructions later to reach this point.
19582 SDValue Mask = N->getOperand(1);
19583 SDValue VL = N->getOperand(2);
19584
19585 bool IsVLMAX = isAllOnesConstant(VL) ||
19586 (isa<RegisterSDNode>(VL) &&
19587 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19588 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19589 Mask.getOperand(0) != VL)
19590 return SDValue();
19591
19592 auto IsTruncNode = [&](SDValue V) {
19593 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19594 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19595 };
19596
19597 SDValue Op = N->getOperand(0);
19598
19599 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19600 // to distinguish such pattern.
19601 while (IsTruncNode(Op)) {
19602 if (!Op.hasOneUse())
19603 return SDValue();
19604 Op = Op.getOperand(0);
19605 }
19606
19607 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19608 return SDValue();
19609
19610 SDValue N0 = Op.getOperand(0);
19611 SDValue N1 = Op.getOperand(1);
19612 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19613 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19614 return SDValue();
19615
19616 SDValue N00 = N0.getOperand(0);
19617 SDValue N10 = N1.getOperand(0);
19618 if (!N00.getValueType().isVector() ||
19619 N00.getValueType() != N10.getValueType() ||
19620 N->getValueType(0) != N10.getValueType())
19621 return SDValue();
19622
19623 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19624 SDValue SMin =
19625 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19626 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19627 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19628}
19629
19630// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19631// maximum value for the truncated type.
19632// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19633// is the signed maximum value for the truncated type and C2 is the signed
19634// minimum value.
19636 const RISCVSubtarget &Subtarget) {
19637 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19638
19639 MVT VT = N->getSimpleValueType(0);
19640
19641 SDValue Mask = N->getOperand(1);
19642 SDValue VL = N->getOperand(2);
19643
19644 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19645 APInt &SplatVal) {
19646 if (V.getOpcode() != Opc &&
19647 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19648 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19649 return SDValue();
19650
19651 SDValue Op = V.getOperand(1);
19652
19653 // Peek through conversion between fixed and scalable vectors.
19654 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19655 isNullConstant(Op.getOperand(2)) &&
19656 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19657 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19658 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19659 isNullConstant(Op.getOperand(1).getOperand(1)))
19660 Op = Op.getOperand(1).getOperand(0);
19661
19662 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19663 return V.getOperand(0);
19664
19665 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19666 Op.getOperand(2) == VL) {
19667 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19668 SplatVal =
19669 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19670 return V.getOperand(0);
19671 }
19672 }
19673
19674 return SDValue();
19675 };
19676
19677 SDLoc DL(N);
19678
19679 auto DetectUSatPattern = [&](SDValue V) {
19680 APInt LoC, HiC;
19681
19682 // Simple case, V is a UMIN.
19683 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19684 if (HiC.isMask(VT.getScalarSizeInBits()))
19685 return UMinOp;
19686
19687 // If we have an SMAX that removes negative numbers first, then we can match
19688 // SMIN instead of UMIN.
19689 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19690 if (SDValue SMaxOp =
19691 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19692 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19693 return SMinOp;
19694
19695 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19696 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19697 // first.
19698 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19699 if (SDValue SMinOp =
19700 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19701 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19702 HiC.uge(LoC))
19703 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19704 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19705 Mask, VL);
19706
19707 return SDValue();
19708 };
19709
19710 auto DetectSSatPattern = [&](SDValue V) {
19711 unsigned NumDstBits = VT.getScalarSizeInBits();
19712 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19713 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19714 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19715
19716 APInt HiC, LoC;
19717 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19718 if (SDValue SMaxOp =
19719 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19720 if (HiC == SignedMax && LoC == SignedMin)
19721 return SMaxOp;
19722
19723 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19724 if (SDValue SMinOp =
19725 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19726 if (HiC == SignedMax && LoC == SignedMin)
19727 return SMinOp;
19728
19729 return SDValue();
19730 };
19731
19732 SDValue Src = N->getOperand(0);
19733
19734 // Look through multiple layers of truncates.
19735 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19736 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19737 Src.hasOneUse())
19738 Src = Src.getOperand(0);
19739
19740 SDValue Val;
19741 unsigned ClipOpc;
19742 if ((Val = DetectUSatPattern(Src)))
19743 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19744 else if ((Val = DetectSSatPattern(Src)))
19745 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19746 else
19747 return SDValue();
19748
19749 MVT ValVT = Val.getSimpleValueType();
19750
19751 do {
19752 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19753 ValVT = ValVT.changeVectorElementType(ValEltVT);
19754 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19755 } while (ValVT != VT);
19756
19757 return Val;
19758}
19759
19760// Convert
19761// (iX ctpop (bitcast (vXi1 A)))
19762// ->
19763// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19764// and
19765// (iN reduce.add (zext (vXi1 A to vXiN))
19766// ->
19767// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19768// FIXME: It's complicated to match all the variations of this after type
19769// legalization so we only handle the pre-type legalization pattern, but that
19770// requires the fixed vector type to be legal.
19772 const RISCVSubtarget &Subtarget) {
19773 unsigned Opc = N->getOpcode();
19775 "Unexpected opcode");
19776 EVT VT = N->getValueType(0);
19777 if (!VT.isScalarInteger())
19778 return SDValue();
19779
19780 SDValue Src = N->getOperand(0);
19781
19782 if (Opc == ISD::CTPOP) {
19783 // Peek through zero_extend. It doesn't change the count.
19784 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19785 Src = Src.getOperand(0);
19786
19787 if (Src.getOpcode() != ISD::BITCAST)
19788 return SDValue();
19789 Src = Src.getOperand(0);
19790 } else if (Opc == ISD::VECREDUCE_ADD) {
19791 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19792 return SDValue();
19793 Src = Src.getOperand(0);
19794 }
19795
19796 EVT SrcEVT = Src.getValueType();
19797 if (!SrcEVT.isSimple())
19798 return SDValue();
19799
19800 MVT SrcMVT = SrcEVT.getSimpleVT();
19801 // Make sure the input is an i1 vector.
19802 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19803 return SDValue();
19804
19805 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19806 if (!TLI.isTypeLegal(SrcMVT))
19807 return SDValue();
19808
19809 // Check that destination type is large enough to hold result without
19810 // overflow.
19811 if (Opc == ISD::VECREDUCE_ADD) {
19812 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19813 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19814 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19815 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19816 ? SrcMVT.getVectorNumElements()
19818 VectorBitsMax, EltSize, MinSize);
19819 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19820 return SDValue();
19821 }
19822
19823 MVT ContainerVT = SrcMVT;
19824 if (SrcMVT.isFixedLengthVector()) {
19825 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
19826 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
19827 }
19828
19829 SDLoc DL(N);
19830 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
19831
19832 MVT XLenVT = Subtarget.getXLenVT();
19833 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
19834 return DAG.getZExtOrTrunc(Pop, DL, VT);
19835}
19836
19839 const RISCVSubtarget &Subtarget) {
19840 // (shl (zext x), y) -> (vwsll x, y)
19841 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19842 return V;
19843
19844 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
19845 // (shl (zext x), C) -> (vwmulu x, 1u << C)
19846
19847 if (!DCI.isAfterLegalizeDAG())
19848 return SDValue();
19849
19850 SDValue LHS = N->getOperand(0);
19851 if (!LHS.hasOneUse())
19852 return SDValue();
19853 unsigned Opcode;
19854 switch (LHS.getOpcode()) {
19855 case ISD::SIGN_EXTEND:
19856 case RISCVISD::VSEXT_VL:
19857 Opcode = RISCVISD::VWMULSU_VL;
19858 break;
19859 case ISD::ZERO_EXTEND:
19860 case RISCVISD::VZEXT_VL:
19861 Opcode = RISCVISD::VWMULU_VL;
19862 break;
19863 default:
19864 return SDValue();
19865 }
19866
19867 SDValue RHS = N->getOperand(1);
19868 APInt ShAmt;
19869 uint64_t ShAmtInt;
19870 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
19871 ShAmtInt = ShAmt.getZExtValue();
19872 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
19873 RHS.getOperand(1).getOpcode() == ISD::Constant)
19874 ShAmtInt = RHS.getConstantOperandVal(1);
19875 else
19876 return SDValue();
19877
19878 // Better foldings:
19879 // (shl (sext x), 1) -> (vwadd x, x)
19880 // (shl (zext x), 1) -> (vwaddu x, x)
19881 if (ShAmtInt <= 1)
19882 return SDValue();
19883
19884 SDValue NarrowOp = LHS.getOperand(0);
19885 MVT NarrowVT = NarrowOp.getSimpleValueType();
19886 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
19887 if (ShAmtInt >= NarrowBits)
19888 return SDValue();
19889 MVT VT = N->getSimpleValueType(0);
19890 if (NarrowBits * 2 != VT.getScalarSizeInBits())
19891 return SDValue();
19892
19893 SelectionDAG &DAG = DCI.DAG;
19894 SDLoc DL(N);
19895 SDValue Passthru, Mask, VL;
19896 switch (N->getOpcode()) {
19897 case ISD::SHL:
19898 Passthru = DAG.getUNDEF(VT);
19899 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
19900 break;
19901 case RISCVISD::SHL_VL:
19902 Passthru = N->getOperand(2);
19903 Mask = N->getOperand(3);
19904 VL = N->getOperand(4);
19905 break;
19906 default:
19907 llvm_unreachable("Expected SHL");
19908 }
19909 return DAG.getNode(Opcode, DL, VT, NarrowOp,
19910 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
19911 Passthru, Mask, VL);
19912}
19913
19915 DAGCombinerInfo &DCI) const {
19916 SelectionDAG &DAG = DCI.DAG;
19917 const MVT XLenVT = Subtarget.getXLenVT();
19918 SDLoc DL(N);
19919
19920 // Helper to call SimplifyDemandedBits on an operand of N where only some low
19921 // bits are demanded. N will be added to the Worklist if it was not deleted.
19922 // Caller should return SDValue(N, 0) if this returns true.
19923 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
19924 SDValue Op = N->getOperand(OpNo);
19925 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
19926 if (!SimplifyDemandedBits(Op, Mask, DCI))
19927 return false;
19928
19929 if (N->getOpcode() != ISD::DELETED_NODE)
19930 DCI.AddToWorklist(N);
19931 return true;
19932 };
19933
19934 switch (N->getOpcode()) {
19935 default:
19936 break;
19937 case RISCVISD::SplitF64: {
19938 SDValue Op0 = N->getOperand(0);
19939 // If the input to SplitF64 is just BuildPairF64 then the operation is
19940 // redundant. Instead, use BuildPairF64's operands directly.
19941 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
19942 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
19943
19944 if (Op0->isUndef()) {
19945 SDValue Lo = DAG.getUNDEF(MVT::i32);
19946 SDValue Hi = DAG.getUNDEF(MVT::i32);
19947 return DCI.CombineTo(N, Lo, Hi);
19948 }
19949
19950 // It's cheaper to materialise two 32-bit integers than to load a double
19951 // from the constant pool and transfer it to integer registers through the
19952 // stack.
19953 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
19954 APInt V = C->getValueAPF().bitcastToAPInt();
19955 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
19956 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
19957 return DCI.CombineTo(N, Lo, Hi);
19958 }
19959
19960 // This is a target-specific version of a DAGCombine performed in
19961 // DAGCombiner::visitBITCAST. It performs the equivalent of:
19962 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
19963 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
19964 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
19965 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
19966 break;
19967 SDValue NewSplitF64 =
19968 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
19969 Op0.getOperand(0));
19970 SDValue Lo = NewSplitF64.getValue(0);
19971 SDValue Hi = NewSplitF64.getValue(1);
19972 APInt SignBit = APInt::getSignMask(32);
19973 if (Op0.getOpcode() == ISD::FNEG) {
19974 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
19975 DAG.getConstant(SignBit, DL, MVT::i32));
19976 return DCI.CombineTo(N, Lo, NewHi);
19977 }
19978 assert(Op0.getOpcode() == ISD::FABS);
19979 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
19980 DAG.getConstant(~SignBit, DL, MVT::i32));
19981 return DCI.CombineTo(N, Lo, NewHi);
19982 }
19983 case RISCVISD::SLLW:
19984 case RISCVISD::SRAW:
19985 case RISCVISD::SRLW:
19986 case RISCVISD::RORW:
19987 case RISCVISD::ROLW: {
19988 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
19989 if (SimplifyDemandedLowBitsHelper(0, 32) ||
19990 SimplifyDemandedLowBitsHelper(1, 5))
19991 return SDValue(N, 0);
19992
19993 break;
19994 }
19995 case RISCVISD::CLZW:
19996 case RISCVISD::CTZW: {
19997 // Only the lower 32 bits of the first operand are read
19998 if (SimplifyDemandedLowBitsHelper(0, 32))
19999 return SDValue(N, 0);
20000 break;
20001 }
20002 case RISCVISD::FMV_W_X_RV64: {
20003 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20004 // conversion is unnecessary and can be replaced with the
20005 // FMV_X_ANYEXTW_RV64 operand.
20006 SDValue Op0 = N->getOperand(0);
20007 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20008 return Op0.getOperand(0);
20009 break;
20010 }
20011 case RISCVISD::FMV_X_ANYEXTH:
20012 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20013 SDLoc DL(N);
20014 SDValue Op0 = N->getOperand(0);
20015 MVT VT = N->getSimpleValueType(0);
20016
20017 // Constant fold.
20018 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20019 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20020 return DAG.getConstant(Val, DL, VT);
20021 }
20022
20023 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20024 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20025 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20026 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20027 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20028 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20029 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20030 assert(Op0.getOperand(0).getValueType() == VT &&
20031 "Unexpected value type!");
20032 return Op0.getOperand(0);
20033 }
20034
20035 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20036 cast<LoadSDNode>(Op0)->isSimple()) {
20038 auto *LN0 = cast<LoadSDNode>(Op0);
20039 SDValue Load =
20040 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20041 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20042 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20043 return Load;
20044 }
20045
20046 // This is a target-specific version of a DAGCombine performed in
20047 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20048 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20049 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20050 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20051 !Op0.getNode()->hasOneUse())
20052 break;
20053 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20054 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20055 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20056 if (Op0.getOpcode() == ISD::FNEG)
20057 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20058 DAG.getConstant(SignBit, DL, VT));
20059
20060 assert(Op0.getOpcode() == ISD::FABS);
20061 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20062 DAG.getConstant(~SignBit, DL, VT));
20063 }
20064 case ISD::ABS: {
20065 EVT VT = N->getValueType(0);
20066 SDValue N0 = N->getOperand(0);
20067 // abs (sext) -> zext (abs)
20068 // abs (zext) -> zext (handled elsewhere)
20069 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20070 SDValue Src = N0.getOperand(0);
20071 SDLoc DL(N);
20072 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20073 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20074 }
20075 break;
20076 }
20077 case ISD::ADD: {
20078 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20079 return V;
20080 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20081 return V;
20082 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20083 return V;
20084 return performADDCombine(N, DCI, Subtarget);
20085 }
20086 case ISD::SUB: {
20087 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20088 return V;
20089 return performSUBCombine(N, DAG, Subtarget);
20090 }
20091 case ISD::AND:
20092 return performANDCombine(N, DCI, Subtarget);
20093 case ISD::OR: {
20094 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20095 return V;
20096 return performORCombine(N, DCI, Subtarget);
20097 }
20098 case ISD::XOR:
20099 return performXORCombine(N, DAG, Subtarget);
20100 case ISD::MUL:
20101 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20102 return V;
20103 return performMULCombine(N, DAG, DCI, Subtarget);
20104 case ISD::SDIV:
20105 case ISD::UDIV:
20106 case ISD::SREM:
20107 case ISD::UREM:
20108 if (SDValue V = combineBinOpOfZExt(N, DAG))
20109 return V;
20110 break;
20111 case ISD::FMUL: {
20112 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20113 SDValue N0 = N->getOperand(0);
20114 SDValue N1 = N->getOperand(1);
20115 if (N0->getOpcode() != ISD::FCOPYSIGN)
20116 std::swap(N0, N1);
20117 if (N0->getOpcode() != ISD::FCOPYSIGN)
20118 return SDValue();
20119 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
20120 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20121 return SDValue();
20122 EVT VT = N->getValueType(0);
20123 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20124 return SDValue();
20125 SDValue Sign = N0->getOperand(1);
20126 if (Sign.getValueType() != VT)
20127 return SDValue();
20128 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
20129 }
20130 case ISD::FADD:
20131 case ISD::UMAX:
20132 case ISD::UMIN:
20133 case ISD::SMAX:
20134 case ISD::SMIN:
20135 case ISD::FMAXNUM:
20136 case ISD::FMINNUM: {
20137 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20138 return V;
20139 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20140 return V;
20141 return SDValue();
20142 }
20143 case ISD::SETCC:
20144 return performSETCCCombine(N, DCI, Subtarget);
20146 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20147 case ISD::ZERO_EXTEND:
20148 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20149 // type legalization. This is safe because fp_to_uint produces poison if
20150 // it overflows.
20151 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20152 SDValue Src = N->getOperand(0);
20153 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20154 isTypeLegal(Src.getOperand(0).getValueType()))
20155 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20156 Src.getOperand(0));
20157 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20158 isTypeLegal(Src.getOperand(1).getValueType())) {
20159 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20160 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20161 Src.getOperand(0), Src.getOperand(1));
20162 DCI.CombineTo(N, Res);
20163 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20164 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20165 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20166 }
20167 }
20168 return SDValue();
20169 case RISCVISD::TRUNCATE_VECTOR_VL:
20170 if (SDValue V = combineTruncOfSraSext(N, DAG))
20171 return V;
20172 return combineTruncToVnclip(N, DAG, Subtarget);
20173 case ISD::VP_TRUNCATE:
20174 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20175 case ISD::TRUNCATE:
20176 return performTRUNCATECombine(N, DAG, Subtarget);
20177 case ISD::SELECT:
20178 return performSELECTCombine(N, DAG, Subtarget);
20179 case ISD::VSELECT:
20180 return performVSELECTCombine(N, DAG);
20181 case RISCVISD::CZERO_EQZ:
20182 case RISCVISD::CZERO_NEZ: {
20183 SDValue Val = N->getOperand(0);
20184 SDValue Cond = N->getOperand(1);
20185
20186 unsigned Opc = N->getOpcode();
20187
20188 // czero_eqz x, x -> x
20189 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20190 return Val;
20191
20192 unsigned InvOpc =
20193 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20194
20195 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20196 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20197 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20198 SDValue NewCond = Cond.getOperand(0);
20199 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20200 if (DAG.MaskedValueIsZero(NewCond, Mask))
20201 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20202 }
20203 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20204 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20205 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20206 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20207 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20208 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20209 if (ISD::isIntEqualitySetCC(CCVal))
20210 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20211 N->getValueType(0), Val, Cond.getOperand(0));
20212 }
20213 return SDValue();
20214 }
20215 case RISCVISD::SELECT_CC: {
20216 // Transform
20217 SDValue LHS = N->getOperand(0);
20218 SDValue RHS = N->getOperand(1);
20219 SDValue CC = N->getOperand(2);
20220 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20221 SDValue TrueV = N->getOperand(3);
20222 SDValue FalseV = N->getOperand(4);
20223 SDLoc DL(N);
20224 EVT VT = N->getValueType(0);
20225
20226 // If the True and False values are the same, we don't need a select_cc.
20227 if (TrueV == FalseV)
20228 return TrueV;
20229
20230 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20231 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20232 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20233 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20234 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20235 if (CCVal == ISD::CondCode::SETGE)
20236 std::swap(TrueV, FalseV);
20237
20238 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20239 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20240 // Only handle simm12, if it is not in this range, it can be considered as
20241 // register.
20242 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20243 isInt<12>(TrueSImm - FalseSImm)) {
20244 SDValue SRA =
20245 DAG.getNode(ISD::SRA, DL, VT, LHS,
20246 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20247 SDValue AND =
20248 DAG.getNode(ISD::AND, DL, VT, SRA,
20249 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20250 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20251 }
20252
20253 if (CCVal == ISD::CondCode::SETGE)
20254 std::swap(TrueV, FalseV);
20255 }
20256
20257 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20258 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20259 {LHS, RHS, CC, TrueV, FalseV});
20260
20261 if (!Subtarget.hasConditionalMoveFusion()) {
20262 // (select c, -1, y) -> -c | y
20263 if (isAllOnesConstant(TrueV)) {
20264 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20265 SDValue Neg = DAG.getNegative(C, DL, VT);
20266 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20267 }
20268 // (select c, y, -1) -> -!c | y
20269 if (isAllOnesConstant(FalseV)) {
20270 SDValue C =
20271 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20272 SDValue Neg = DAG.getNegative(C, DL, VT);
20273 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20274 }
20275
20276 // (select c, 0, y) -> -!c & y
20277 if (isNullConstant(TrueV)) {
20278 SDValue C =
20279 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20280 SDValue Neg = DAG.getNegative(C, DL, VT);
20281 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20282 }
20283 // (select c, y, 0) -> -c & y
20284 if (isNullConstant(FalseV)) {
20285 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20286 SDValue Neg = DAG.getNegative(C, DL, VT);
20287 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20288 }
20289 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20290 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20291 if (((isOneConstant(FalseV) && LHS == TrueV &&
20292 CCVal == ISD::CondCode::SETNE) ||
20293 (isOneConstant(TrueV) && LHS == FalseV &&
20294 CCVal == ISD::CondCode::SETEQ)) &&
20296 // freeze it to be safe.
20297 LHS = DAG.getFreeze(LHS);
20299 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20300 }
20301 }
20302
20303 // If both true/false are an xor with 1, pull through the select.
20304 // This can occur after op legalization if both operands are setccs that
20305 // require an xor to invert.
20306 // FIXME: Generalize to other binary ops with identical operand?
20307 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20308 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20309 isOneConstant(TrueV.getOperand(1)) &&
20310 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20311 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20312 TrueV.getOperand(0), FalseV.getOperand(0));
20313 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20314 }
20315
20316 return SDValue();
20317 }
20318 case RISCVISD::BR_CC: {
20319 SDValue LHS = N->getOperand(1);
20320 SDValue RHS = N->getOperand(2);
20321 SDValue CC = N->getOperand(3);
20322 SDLoc DL(N);
20323
20324 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20325 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20326 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20327
20328 return SDValue();
20329 }
20330 case ISD::BITREVERSE:
20331 return performBITREVERSECombine(N, DAG, Subtarget);
20332 case ISD::FP_TO_SINT:
20333 case ISD::FP_TO_UINT:
20334 return performFP_TO_INTCombine(N, DCI, Subtarget);
20337 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20338 case ISD::FCOPYSIGN: {
20339 EVT VT = N->getValueType(0);
20340 if (!VT.isVector())
20341 break;
20342 // There is a form of VFSGNJ which injects the negated sign of its second
20343 // operand. Try and bubble any FNEG up after the extend/round to produce
20344 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20345 // TRUNC=1.
20346 SDValue In2 = N->getOperand(1);
20347 // Avoid cases where the extend/round has multiple uses, as duplicating
20348 // those is typically more expensive than removing a fneg.
20349 if (!In2.hasOneUse())
20350 break;
20351 if (In2.getOpcode() != ISD::FP_EXTEND &&
20352 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20353 break;
20354 In2 = In2.getOperand(0);
20355 if (In2.getOpcode() != ISD::FNEG)
20356 break;
20357 SDLoc DL(N);
20358 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20359 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20360 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20361 }
20362 case ISD::MGATHER: {
20363 const auto *MGN = cast<MaskedGatherSDNode>(N);
20364 const EVT VT = N->getValueType(0);
20365 SDValue Index = MGN->getIndex();
20366 SDValue ScaleOp = MGN->getScale();
20367 ISD::MemIndexType IndexType = MGN->getIndexType();
20368 assert(!MGN->isIndexScaled() &&
20369 "Scaled gather/scatter should not be formed");
20370
20371 SDLoc DL(N);
20372 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20373 return DAG.getMaskedGather(
20374 N->getVTList(), MGN->getMemoryVT(), DL,
20375 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20376 MGN->getBasePtr(), Index, ScaleOp},
20377 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20378
20379 if (narrowIndex(Index, IndexType, DAG))
20380 return DAG.getMaskedGather(
20381 N->getVTList(), MGN->getMemoryVT(), DL,
20382 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20383 MGN->getBasePtr(), Index, ScaleOp},
20384 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20385
20386 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20387 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20388 // The sequence will be XLenVT, not the type of Index. Tell
20389 // isSimpleVIDSequence this so we avoid overflow.
20390 if (std::optional<VIDSequence> SimpleVID =
20391 isSimpleVIDSequence(Index, Subtarget.getXLen());
20392 SimpleVID && SimpleVID->StepDenominator == 1) {
20393 const int64_t StepNumerator = SimpleVID->StepNumerator;
20394 const int64_t Addend = SimpleVID->Addend;
20395
20396 // Note: We don't need to check alignment here since (by assumption
20397 // from the existence of the gather), our offsets must be sufficiently
20398 // aligned.
20399
20400 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20401 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20402 assert(IndexType == ISD::UNSIGNED_SCALED);
20403 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20404 DAG.getSignedConstant(Addend, DL, PtrVT));
20405
20406 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20408 SDValue StridedLoad = DAG.getStridedLoadVP(
20409 VT, DL, MGN->getChain(), BasePtr,
20410 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20411 EVL, MGN->getMemOperand());
20412 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
20413 StridedLoad, MGN->getPassThru(), EVL);
20414 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
20415 DL);
20416 }
20417 }
20418
20419 SmallVector<int> ShuffleMask;
20420 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20421 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20422 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20423 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20424 MGN->getMask(), DAG.getUNDEF(VT),
20425 MGN->getMemoryVT(), MGN->getMemOperand(),
20427 SDValue Shuffle =
20428 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20429 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20430 }
20431
20432 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20433 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20434 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20435 SmallVector<SDValue> NewIndices;
20436 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20437 NewIndices.push_back(Index.getOperand(i));
20438 EVT IndexVT = Index.getValueType()
20439 .getHalfNumVectorElementsVT(*DAG.getContext());
20440 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20441
20442 unsigned ElementSize = VT.getScalarStoreSize();
20443 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20444 auto EltCnt = VT.getVectorElementCount();
20445 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20446 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20447 EltCnt.divideCoefficientBy(2));
20448 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20449 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20450 EltCnt.divideCoefficientBy(2));
20451 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20452
20453 SDValue Gather =
20454 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20455 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20456 Index, ScaleOp},
20457 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20458 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20459 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20460 }
20461 break;
20462 }
20463 case ISD::MSCATTER:{
20464 const auto *MSN = cast<MaskedScatterSDNode>(N);
20465 SDValue Index = MSN->getIndex();
20466 SDValue ScaleOp = MSN->getScale();
20467 ISD::MemIndexType IndexType = MSN->getIndexType();
20468 assert(!MSN->isIndexScaled() &&
20469 "Scaled gather/scatter should not be formed");
20470
20471 SDLoc DL(N);
20472 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20473 return DAG.getMaskedScatter(
20474 N->getVTList(), MSN->getMemoryVT(), DL,
20475 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20476 Index, ScaleOp},
20477 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20478
20479 if (narrowIndex(Index, IndexType, DAG))
20480 return DAG.getMaskedScatter(
20481 N->getVTList(), MSN->getMemoryVT(), DL,
20482 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20483 Index, ScaleOp},
20484 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20485
20486 EVT VT = MSN->getValue()->getValueType(0);
20487 SmallVector<int> ShuffleMask;
20488 if (!MSN->isTruncatingStore() &&
20489 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20490 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20491 DAG.getUNDEF(VT), ShuffleMask);
20492 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20493 DAG.getUNDEF(XLenVT), MSN->getMask(),
20494 MSN->getMemoryVT(), MSN->getMemOperand(),
20495 ISD::UNINDEXED, false);
20496 }
20497 break;
20498 }
20499 case ISD::VP_GATHER: {
20500 const auto *VPGN = cast<VPGatherSDNode>(N);
20501 SDValue Index = VPGN->getIndex();
20502 SDValue ScaleOp = VPGN->getScale();
20503 ISD::MemIndexType IndexType = VPGN->getIndexType();
20504 assert(!VPGN->isIndexScaled() &&
20505 "Scaled gather/scatter should not be formed");
20506
20507 SDLoc DL(N);
20508 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20509 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20510 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20511 ScaleOp, VPGN->getMask(),
20512 VPGN->getVectorLength()},
20513 VPGN->getMemOperand(), IndexType);
20514
20515 if (narrowIndex(Index, IndexType, DAG))
20516 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20517 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20518 ScaleOp, VPGN->getMask(),
20519 VPGN->getVectorLength()},
20520 VPGN->getMemOperand(), IndexType);
20521
20522 break;
20523 }
20524 case ISD::VP_SCATTER: {
20525 const auto *VPSN = cast<VPScatterSDNode>(N);
20526 SDValue Index = VPSN->getIndex();
20527 SDValue ScaleOp = VPSN->getScale();
20528 ISD::MemIndexType IndexType = VPSN->getIndexType();
20529 assert(!VPSN->isIndexScaled() &&
20530 "Scaled gather/scatter should not be formed");
20531
20532 SDLoc DL(N);
20533 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20534 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20535 {VPSN->getChain(), VPSN->getValue(),
20536 VPSN->getBasePtr(), Index, ScaleOp,
20537 VPSN->getMask(), VPSN->getVectorLength()},
20538 VPSN->getMemOperand(), IndexType);
20539
20540 if (narrowIndex(Index, IndexType, DAG))
20541 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20542 {VPSN->getChain(), VPSN->getValue(),
20543 VPSN->getBasePtr(), Index, ScaleOp,
20544 VPSN->getMask(), VPSN->getVectorLength()},
20545 VPSN->getMemOperand(), IndexType);
20546 break;
20547 }
20548 case RISCVISD::SHL_VL:
20549 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20550 return V;
20551 [[fallthrough]];
20552 case RISCVISD::SRA_VL:
20553 case RISCVISD::SRL_VL: {
20554 SDValue ShAmt = N->getOperand(1);
20555 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20556 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20557 SDLoc DL(N);
20558 SDValue VL = N->getOperand(4);
20559 EVT VT = N->getValueType(0);
20560 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20561 ShAmt.getOperand(1), VL);
20562 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20563 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20564 }
20565 break;
20566 }
20567 case ISD::SRA:
20568 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20569 return V;
20570 [[fallthrough]];
20571 case ISD::SRL:
20572 case ISD::SHL: {
20573 if (N->getOpcode() == ISD::SHL) {
20574 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20575 return V;
20576 }
20577 SDValue ShAmt = N->getOperand(1);
20578 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20579 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20580 SDLoc DL(N);
20581 EVT VT = N->getValueType(0);
20582 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20583 ShAmt.getOperand(1),
20584 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20585 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20586 }
20587 break;
20588 }
20589 case RISCVISD::ADD_VL:
20590 if (SDValue V = simplifyOp_VL(N))
20591 return V;
20592 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20593 return V;
20594 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20595 return V;
20596 return combineToVWMACC(N, DAG, Subtarget);
20597 case RISCVISD::VWADD_W_VL:
20598 case RISCVISD::VWADDU_W_VL:
20599 case RISCVISD::VWSUB_W_VL:
20600 case RISCVISD::VWSUBU_W_VL:
20601 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20602 case RISCVISD::OR_VL:
20603 case RISCVISD::SUB_VL:
20604 case RISCVISD::MUL_VL:
20605 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20606 case RISCVISD::VFMADD_VL:
20607 case RISCVISD::VFNMADD_VL:
20608 case RISCVISD::VFMSUB_VL:
20609 case RISCVISD::VFNMSUB_VL:
20610 case RISCVISD::STRICT_VFMADD_VL:
20611 case RISCVISD::STRICT_VFNMADD_VL:
20612 case RISCVISD::STRICT_VFMSUB_VL:
20613 case RISCVISD::STRICT_VFNMSUB_VL:
20614 return performVFMADD_VLCombine(N, DCI, Subtarget);
20615 case RISCVISD::FADD_VL:
20616 case RISCVISD::FSUB_VL:
20617 case RISCVISD::FMUL_VL:
20618 case RISCVISD::VFWADD_W_VL:
20619 case RISCVISD::VFWSUB_W_VL:
20620 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20621 case ISD::LOAD:
20622 case ISD::STORE: {
20623 if (DCI.isAfterLegalizeDAG())
20624 if (SDValue V = performMemPairCombine(N, DCI))
20625 return V;
20626
20627 if (N->getOpcode() != ISD::STORE)
20628 break;
20629
20630 auto *Store = cast<StoreSDNode>(N);
20631 SDValue Chain = Store->getChain();
20632 EVT MemVT = Store->getMemoryVT();
20633 SDValue Val = Store->getValue();
20634 SDLoc DL(N);
20635
20636 bool IsScalarizable =
20637 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20638 Store->isSimple() &&
20639 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20640 isPowerOf2_64(MemVT.getSizeInBits()) &&
20641 MemVT.getSizeInBits() <= Subtarget.getXLen();
20642
20643 // If sufficiently aligned we can scalarize stores of constant vectors of
20644 // any power-of-two size up to XLen bits, provided that they aren't too
20645 // expensive to materialize.
20646 // vsetivli zero, 2, e8, m1, ta, ma
20647 // vmv.v.i v8, 4
20648 // vse64.v v8, (a0)
20649 // ->
20650 // li a1, 1028
20651 // sh a1, 0(a0)
20652 if (DCI.isBeforeLegalize() && IsScalarizable &&
20654 // Get the constant vector bits
20655 APInt NewC(Val.getValueSizeInBits(), 0);
20656 uint64_t EltSize = Val.getScalarValueSizeInBits();
20657 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20658 if (Val.getOperand(i).isUndef())
20659 continue;
20660 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20661 i * EltSize);
20662 }
20663 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20664
20665 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20666 true) <= 2 &&
20668 NewVT, *Store->getMemOperand())) {
20669 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20670 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20671 Store->getPointerInfo(), Store->getBaseAlign(),
20672 Store->getMemOperand()->getFlags());
20673 }
20674 }
20675
20676 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20677 // vsetivli zero, 2, e16, m1, ta, ma
20678 // vle16.v v8, (a0)
20679 // vse16.v v8, (a1)
20680 if (auto *L = dyn_cast<LoadSDNode>(Val);
20681 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20682 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20683 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20684 L->getMemoryVT() == MemVT) {
20685 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20687 NewVT, *Store->getMemOperand()) &&
20689 NewVT, *L->getMemOperand())) {
20690 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20691 L->getPointerInfo(), L->getBaseAlign(),
20692 L->getMemOperand()->getFlags());
20693 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20694 Store->getPointerInfo(), Store->getBaseAlign(),
20695 Store->getMemOperand()->getFlags());
20696 }
20697 }
20698
20699 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20700 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20701 // any illegal types.
20702 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20703 (DCI.isAfterLegalizeDAG() &&
20705 isNullConstant(Val.getOperand(1)))) &&
20706 Val.hasOneUse()) {
20707 SDValue Src = Val.getOperand(0);
20708 MVT VecVT = Src.getSimpleValueType();
20709 // VecVT should be scalable and memory VT should match the element type.
20710 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20711 MemVT == VecVT.getVectorElementType()) {
20712 SDLoc DL(N);
20713 MVT MaskVT = getMaskTypeFor(VecVT);
20714 return DAG.getStoreVP(
20715 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20716 DAG.getConstant(1, DL, MaskVT),
20717 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20718 Store->getMemOperand(), Store->getAddressingMode(),
20719 Store->isTruncatingStore(), /*IsCompress*/ false);
20720 }
20721 }
20722
20723 break;
20724 }
20725 case ISD::SPLAT_VECTOR: {
20726 EVT VT = N->getValueType(0);
20727 // Only perform this combine on legal MVT types.
20728 if (!isTypeLegal(VT))
20729 break;
20730 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20731 DAG, Subtarget))
20732 return Gather;
20733 break;
20734 }
20735 case ISD::BUILD_VECTOR:
20736 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20737 return V;
20738 break;
20740 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20741 return V;
20742 break;
20744 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20745 return V;
20746 break;
20748 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20749 return V;
20750 break;
20751 case RISCVISD::VFMV_V_F_VL: {
20752 const MVT VT = N->getSimpleValueType(0);
20753 SDValue Passthru = N->getOperand(0);
20754 SDValue Scalar = N->getOperand(1);
20755 SDValue VL = N->getOperand(2);
20756
20757 // If VL is 1, we can use vfmv.s.f.
20758 if (isOneConstant(VL))
20759 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20760 break;
20761 }
20762 case RISCVISD::VMV_V_X_VL: {
20763 const MVT VT = N->getSimpleValueType(0);
20764 SDValue Passthru = N->getOperand(0);
20765 SDValue Scalar = N->getOperand(1);
20766 SDValue VL = N->getOperand(2);
20767
20768 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20769 // scalar input.
20770 unsigned ScalarSize = Scalar.getValueSizeInBits();
20771 unsigned EltWidth = VT.getScalarSizeInBits();
20772 if (ScalarSize > EltWidth && Passthru.isUndef())
20773 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20774 return SDValue(N, 0);
20775
20776 // If VL is 1 and the scalar value won't benefit from immediate, we can
20777 // use vmv.s.x.
20778 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20779 if (isOneConstant(VL) &&
20780 (!Const || Const->isZero() ||
20781 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20782 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20783
20784 break;
20785 }
20786 case RISCVISD::VFMV_S_F_VL: {
20787 SDValue Src = N->getOperand(1);
20788 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20789 // into an undef vector.
20790 // TODO: Could use a vslide or vmv.v.v for non-undef.
20791 if (N->getOperand(0).isUndef() &&
20792 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20793 isNullConstant(Src.getOperand(1)) &&
20794 Src.getOperand(0).getValueType().isScalableVector()) {
20795 EVT VT = N->getValueType(0);
20796 SDValue EVSrc = Src.getOperand(0);
20797 EVT EVSrcVT = EVSrc.getValueType();
20799 // Widths match, just return the original vector.
20800 if (EVSrcVT == VT)
20801 return EVSrc;
20802 SDLoc DL(N);
20803 // Width is narrower, using insert_subvector.
20804 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20805 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20806 EVSrc,
20807 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20808 }
20809 // Width is wider, using extract_subvector.
20810 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20811 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20812 }
20813 [[fallthrough]];
20814 }
20815 case RISCVISD::VMV_S_X_VL: {
20816 const MVT VT = N->getSimpleValueType(0);
20817 SDValue Passthru = N->getOperand(0);
20818 SDValue Scalar = N->getOperand(1);
20819 SDValue VL = N->getOperand(2);
20820
20821 // The vmv.s.x instruction copies the scalar integer register to element 0
20822 // of the destination vector register. If SEW < XLEN, the least-significant
20823 // bits are copied and the upper XLEN-SEW bits are ignored.
20824 unsigned ScalarSize = Scalar.getValueSizeInBits();
20825 unsigned EltWidth = VT.getScalarSizeInBits();
20826 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
20827 return SDValue(N, 0);
20828
20829 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20830 Scalar.getOperand(0).getValueType() == N->getValueType(0))
20831 return Scalar.getOperand(0);
20832
20833 // Use M1 or smaller to avoid over constraining register allocation
20834 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20835 if (M1VT.bitsLT(VT)) {
20836 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
20837 SDValue Result =
20838 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
20839 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
20840 return Result;
20841 }
20842
20843 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
20844 // higher would involve overly constraining the register allocator for
20845 // no purpose.
20846 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
20847 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
20848 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
20849 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
20850
20851 break;
20852 }
20853 case RISCVISD::VMV_X_S: {
20854 SDValue Vec = N->getOperand(0);
20855 MVT VecVT = N->getOperand(0).getSimpleValueType();
20856 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
20857 if (M1VT.bitsLT(VecVT)) {
20858 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
20859 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
20860 }
20861 break;
20862 }
20866 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
20867 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
20868 switch (IntNo) {
20869 // By default we do not combine any intrinsic.
20870 default:
20871 return SDValue();
20872 case Intrinsic::riscv_vcpop:
20873 case Intrinsic::riscv_vcpop_mask:
20874 case Intrinsic::riscv_vfirst:
20875 case Intrinsic::riscv_vfirst_mask: {
20876 SDValue VL = N->getOperand(2);
20877 if (IntNo == Intrinsic::riscv_vcpop_mask ||
20878 IntNo == Intrinsic::riscv_vfirst_mask)
20879 VL = N->getOperand(3);
20880 if (!isNullConstant(VL))
20881 return SDValue();
20882 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
20883 SDLoc DL(N);
20884 EVT VT = N->getValueType(0);
20885 if (IntNo == Intrinsic::riscv_vfirst ||
20886 IntNo == Intrinsic::riscv_vfirst_mask)
20887 return DAG.getAllOnesConstant(DL, VT);
20888 return DAG.getConstant(0, DL, VT);
20889 }
20890 case Intrinsic::riscv_vsseg2_mask:
20891 case Intrinsic::riscv_vsseg3_mask:
20892 case Intrinsic::riscv_vsseg4_mask:
20893 case Intrinsic::riscv_vsseg5_mask:
20894 case Intrinsic::riscv_vsseg6_mask:
20895 case Intrinsic::riscv_vsseg7_mask:
20896 case Intrinsic::riscv_vsseg8_mask: {
20897 SDValue Tuple = N->getOperand(2);
20898 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
20899
20900 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
20901 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
20902 !Tuple.getOperand(0).isUndef())
20903 return SDValue();
20904
20905 SDValue Val = Tuple.getOperand(1);
20906 unsigned Idx = Tuple.getConstantOperandVal(2);
20907
20908 unsigned SEW = Val.getValueType().getScalarSizeInBits();
20909 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
20910 "Type mismatch without bitcast?");
20911 unsigned Stride = SEW / 8 * NF;
20912 unsigned Offset = SEW / 8 * Idx;
20913
20914 SDValue Ops[] = {
20915 /*Chain=*/N->getOperand(0),
20916 /*IntID=*/
20917 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
20918 /*StoredVal=*/Val,
20919 /*Ptr=*/
20920 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
20921 DAG.getConstant(Offset, DL, XLenVT)),
20922 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
20923 /*Mask=*/N->getOperand(4),
20924 /*VL=*/N->getOperand(5)};
20925
20926 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
20927 // Match getTgtMemIntrinsic for non-unit stride case
20928 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
20931 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
20932
20933 SDVTList VTs = DAG.getVTList(MVT::Other);
20934 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
20935 MMO);
20936 }
20937 }
20938 }
20939 case ISD::EXPERIMENTAL_VP_REVERSE:
20940 return performVP_REVERSECombine(N, DAG, Subtarget);
20941 case ISD::VP_STORE:
20942 return performVP_STORECombine(N, DAG, Subtarget);
20943 case ISD::BITCAST: {
20945 SDValue N0 = N->getOperand(0);
20946 EVT VT = N->getValueType(0);
20947 EVT SrcVT = N0.getValueType();
20948 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
20949 unsigned NF = VT.getRISCVVectorTupleNumFields();
20950 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
20951 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
20952 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
20953
20954 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
20955
20956 SDValue Result = DAG.getUNDEF(VT);
20957 for (unsigned i = 0; i < NF; ++i)
20958 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
20959 DAG.getTargetConstant(i, DL, MVT::i32));
20960 return Result;
20961 }
20962 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
20963 // type, widen both sides to avoid a trip through memory.
20964 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
20965 VT.isScalarInteger()) {
20966 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
20967 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
20968 Ops[0] = N0;
20969 SDLoc DL(N);
20970 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
20971 N0 = DAG.getBitcast(MVT::i8, N0);
20972 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
20973 }
20974
20975 return SDValue();
20976 }
20977 case ISD::VECREDUCE_ADD:
20978 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
20979 return V;
20980 [[fallthrough]];
20981 case ISD::CTPOP:
20982 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
20983 return V;
20984 break;
20985 case RISCVISD::VRGATHER_VX_VL: {
20986 // Note this assumes that out of bounds indices produce poison
20987 // and can thus be replaced without having to prove them inbounds..
20988 EVT VT = N->getValueType(0);
20989 SDValue Src = N->getOperand(0);
20990 SDValue Idx = N->getOperand(1);
20991 SDValue Passthru = N->getOperand(2);
20992 SDValue VL = N->getOperand(4);
20993
20994 // Warning: Unlike most cases we strip an insert_subvector, this one
20995 // does not require the first operand to be undef.
20996 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
20997 isNullConstant(Src.getOperand(2)))
20998 Src = Src.getOperand(1);
20999
21000 switch (Src.getOpcode()) {
21001 default:
21002 break;
21003 case RISCVISD::VMV_V_X_VL:
21004 case RISCVISD::VFMV_V_F_VL:
21005 // Drop a redundant vrgather_vx.
21006 // TODO: Remove the type restriction if we find a motivating
21007 // test case?
21008 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21009 Src.getValueType() == VT)
21010 return Src;
21011 break;
21012 case RISCVISD::VMV_S_X_VL:
21013 case RISCVISD::VFMV_S_F_VL:
21014 // If this use only demands lane zero from the source vmv.s.x, and
21015 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21016 // a vmv.v.x. Note that there can be other uses of the original
21017 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21018 if (isNullConstant(Idx) && Passthru.isUndef() &&
21019 VL == Src.getOperand(2)) {
21020 unsigned Opc =
21021 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21022 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21023 VL);
21024 }
21025 break;
21026 }
21027 break;
21028 }
21029 case RISCVISD::TUPLE_EXTRACT: {
21030 EVT VT = N->getValueType(0);
21031 SDValue Tuple = N->getOperand(0);
21032 unsigned Idx = N->getConstantOperandVal(1);
21033 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21034 break;
21035
21036 unsigned NF = 0;
21037 switch (Tuple.getConstantOperandVal(1)) {
21038 default:
21039 break;
21040 case Intrinsic::riscv_vlseg2_mask:
21041 case Intrinsic::riscv_vlseg3_mask:
21042 case Intrinsic::riscv_vlseg4_mask:
21043 case Intrinsic::riscv_vlseg5_mask:
21044 case Intrinsic::riscv_vlseg6_mask:
21045 case Intrinsic::riscv_vlseg7_mask:
21046 case Intrinsic::riscv_vlseg8_mask:
21048 break;
21049 }
21050
21051 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21052 break;
21053
21054 unsigned SEW = VT.getScalarSizeInBits();
21055 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21056 "Type mismatch without bitcast?");
21057 unsigned Stride = SEW / 8 * NF;
21058 unsigned Offset = SEW / 8 * Idx;
21059
21060 SDValue Ops[] = {
21061 /*Chain=*/Tuple.getOperand(0),
21062 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21063 /*Passthru=*/Tuple.getOperand(2),
21064 /*Ptr=*/
21065 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21066 DAG.getConstant(Offset, DL, XLenVT)),
21067 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21068 /*Mask=*/Tuple.getOperand(4),
21069 /*VL=*/Tuple.getOperand(5),
21070 /*Policy=*/Tuple.getOperand(6)};
21071
21072 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21073 // Match getTgtMemIntrinsic for non-unit stride case
21074 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21077 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21078
21079 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21081 Ops, MemVT, MMO);
21082 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21083 return Result.getValue(0);
21084 }
21085 case RISCVISD::TUPLE_INSERT: {
21086 // tuple_insert tuple, undef, idx -> tuple
21087 if (N->getOperand(1).isUndef())
21088 return N->getOperand(0);
21089 break;
21090 }
21091 }
21092
21093 return SDValue();
21094}
21095
21097 EVT XVT, unsigned KeptBits) const {
21098 // For vectors, we don't have a preference..
21099 if (XVT.isVector())
21100 return false;
21101
21102 if (XVT != MVT::i32 && XVT != MVT::i64)
21103 return false;
21104
21105 // We can use sext.w for RV64 or an srai 31 on RV32.
21106 if (KeptBits == 32 || KeptBits == 64)
21107 return true;
21108
21109 // With Zbb we can use sext.h/sext.b.
21110 return Subtarget.hasStdExtZbb() &&
21111 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21112 KeptBits == 16);
21113}
21114
21116 const SDNode *N, CombineLevel Level) const {
21117 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21118 N->getOpcode() == ISD::SRL) &&
21119 "Expected shift op");
21120
21121 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21122 // materialised in fewer instructions than `(OP _, c1)`:
21123 //
21124 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21125 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21126 SDValue N0 = N->getOperand(0);
21127 EVT Ty = N0.getValueType();
21128
21129 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21130 // LD/ST, it can still complete the folding optimization operation performed
21131 // above.
21132 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21133 for (SDNode *Use : X->users()) {
21134 // This use is the one we're on right now. Skip it
21135 if (Use == User || Use->getOpcode() == ISD::SELECT)
21136 continue;
21137 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
21138 return false;
21139 }
21140 return true;
21141 };
21142
21143 if (Ty.isScalarInteger() &&
21144 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21145 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21146 return isUsedByLdSt(N0.getNode(), N);
21147
21148 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21149 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21150
21151 bool IsShXAdd =
21152 (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
21153 C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
21154 bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
21155 C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
21156
21157 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21158 if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
21159 N->user_begin()->getOpcode() == ISD::ADD &&
21160 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21161 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21162 return false;
21163
21164 if (C1 && C2) {
21165 const APInt &C1Int = C1->getAPIntValue();
21166 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21167
21168 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21169 // and the combine should happen, to potentially allow further combines
21170 // later.
21171 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21172 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21173 return true;
21174
21175 // We can materialise `c1` in an add immediate, so it's "free", and the
21176 // combine should be prevented.
21177 if (C1Int.getSignificantBits() <= 64 &&
21179 return false;
21180
21181 // Neither constant will fit into an immediate, so find materialisation
21182 // costs.
21183 int C1Cost =
21184 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21185 /*CompressionCost*/ true);
21186 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21187 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21188 /*CompressionCost*/ true);
21189
21190 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21191 // combine should be prevented.
21192 if (C1Cost < ShiftedC1Cost)
21193 return false;
21194 }
21195 }
21196
21197 if (!N0->hasOneUse())
21198 return false;
21199
21200 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21201 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21202 !N0->getOperand(0)->hasOneUse())
21203 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21204
21205 return true;
21206}
21207
21209 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21210 TargetLoweringOpt &TLO) const {
21211 // Delay this optimization as late as possible.
21212 if (!TLO.LegalOps)
21213 return false;
21214
21215 EVT VT = Op.getValueType();
21216 if (VT.isVector())
21217 return false;
21218
21219 unsigned Opcode = Op.getOpcode();
21220 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21221 return false;
21222
21223 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21224 if (!C)
21225 return false;
21226
21227 const APInt &Mask = C->getAPIntValue();
21228
21229 // Clear all non-demanded bits initially.
21230 APInt ShrunkMask = Mask & DemandedBits;
21231
21232 // Try to make a smaller immediate by setting undemanded bits.
21233
21234 APInt ExpandedMask = Mask | ~DemandedBits;
21235
21236 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21237 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21238 };
21239 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21240 if (NewMask == Mask)
21241 return true;
21242 SDLoc DL(Op);
21243 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21244 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21245 Op.getOperand(0), NewC);
21246 return TLO.CombineTo(Op, NewOp);
21247 };
21248
21249 // If the shrunk mask fits in sign extended 12 bits, let the target
21250 // independent code apply it.
21251 if (ShrunkMask.isSignedIntN(12))
21252 return false;
21253
21254 // And has a few special cases for zext.
21255 if (Opcode == ISD::AND) {
21256 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21257 // otherwise use SLLI + SRLI.
21258 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21259 if (IsLegalMask(NewMask))
21260 return UseMask(NewMask);
21261
21262 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21263 if (VT == MVT::i64) {
21264 APInt NewMask = APInt(64, 0xffffffff);
21265 if (IsLegalMask(NewMask))
21266 return UseMask(NewMask);
21267 }
21268 }
21269
21270 // For the remaining optimizations, we need to be able to make a negative
21271 // number through a combination of mask and undemanded bits.
21272 if (!ExpandedMask.isNegative())
21273 return false;
21274
21275 // What is the fewest number of bits we need to represent the negative number.
21276 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21277
21278 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21279 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21280 // If we can't create a simm12, we shouldn't change opaque constants.
21281 APInt NewMask = ShrunkMask;
21282 if (MinSignedBits <= 12)
21283 NewMask.setBitsFrom(11);
21284 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21285 NewMask.setBitsFrom(31);
21286 else
21287 return false;
21288
21289 // Check that our new mask is a subset of the demanded mask.
21290 assert(IsLegalMask(NewMask));
21291 return UseMask(NewMask);
21292}
21293
21294static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21295 static const uint64_t GREVMasks[] = {
21296 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21297 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21298
21299 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21300 unsigned Shift = 1 << Stage;
21301 if (ShAmt & Shift) {
21302 uint64_t Mask = GREVMasks[Stage];
21303 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21304 if (IsGORC)
21305 Res |= x;
21306 x = Res;
21307 }
21308 }
21309
21310 return x;
21311}
21312
21314 KnownBits &Known,
21315 const APInt &DemandedElts,
21316 const SelectionDAG &DAG,
21317 unsigned Depth) const {
21318 unsigned BitWidth = Known.getBitWidth();
21319 unsigned Opc = Op.getOpcode();
21324 "Should use MaskedValueIsZero if you don't know whether Op"
21325 " is a target node!");
21326
21327 Known.resetAll();
21328 switch (Opc) {
21329 default: break;
21330 case RISCVISD::SELECT_CC: {
21331 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21332 // If we don't know any bits, early out.
21333 if (Known.isUnknown())
21334 break;
21335 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21336
21337 // Only known if known in both the LHS and RHS.
21338 Known = Known.intersectWith(Known2);
21339 break;
21340 }
21341 case RISCVISD::VCPOP_VL: {
21342 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21343 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21344 break;
21345 }
21346 case RISCVISD::CZERO_EQZ:
21347 case RISCVISD::CZERO_NEZ:
21348 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21349 // Result is either all zero or operand 0. We can propagate zeros, but not
21350 // ones.
21351 Known.One.clearAllBits();
21352 break;
21353 case RISCVISD::REMUW: {
21354 KnownBits Known2;
21355 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21356 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21357 // We only care about the lower 32 bits.
21358 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21359 // Restore the original width by sign extending.
21360 Known = Known.sext(BitWidth);
21361 break;
21362 }
21363 case RISCVISD::DIVUW: {
21364 KnownBits Known2;
21365 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21366 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21367 // We only care about the lower 32 bits.
21368 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21369 // Restore the original width by sign extending.
21370 Known = Known.sext(BitWidth);
21371 break;
21372 }
21373 case RISCVISD::SLLW: {
21374 KnownBits Known2;
21375 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21376 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21377 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21378 // Restore the original width by sign extending.
21379 Known = Known.sext(BitWidth);
21380 break;
21381 }
21382 case RISCVISD::CTZW: {
21383 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21384 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21385 unsigned LowBits = llvm::bit_width(PossibleTZ);
21386 Known.Zero.setBitsFrom(LowBits);
21387 break;
21388 }
21389 case RISCVISD::CLZW: {
21390 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21391 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21392 unsigned LowBits = llvm::bit_width(PossibleLZ);
21393 Known.Zero.setBitsFrom(LowBits);
21394 break;
21395 }
21396 case RISCVISD::BREV8:
21397 case RISCVISD::ORC_B: {
21398 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21399 // control value of 7 is equivalent to brev8 and orc.b.
21400 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21401 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21402 // To compute zeros for ORC_B, we need to invert the value and invert it
21403 // back after. This inverting is harmless for BREV8.
21404 Known.Zero =
21405 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21406 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21407 break;
21408 }
21409 case RISCVISD::READ_VLENB: {
21410 // We can use the minimum and maximum VLEN values to bound VLENB. We
21411 // know VLEN must be a power of two.
21412 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21413 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21414 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21415 Known.Zero.setLowBits(Log2_32(MinVLenB));
21416 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21417 if (MaxVLenB == MinVLenB)
21418 Known.One.setBit(Log2_32(MinVLenB));
21419 break;
21420 }
21421 case RISCVISD::FCLASS: {
21422 // fclass will only set one of the low 10 bits.
21423 Known.Zero.setBitsFrom(10);
21424 break;
21425 }
21428 unsigned IntNo =
21429 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21430 switch (IntNo) {
21431 default:
21432 // We can't do anything for most intrinsics.
21433 break;
21434 case Intrinsic::riscv_vsetvli:
21435 case Intrinsic::riscv_vsetvlimax: {
21436 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21437 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21438 RISCVVType::VLMUL VLMUL =
21439 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21440 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21441 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21442 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21443 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21444
21445 // Result of vsetvli must be not larger than AVL.
21446 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21447 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21448
21449 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21450 if (BitWidth > KnownZeroFirstBit)
21451 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21452 break;
21453 }
21454 }
21455 break;
21456 }
21457 }
21458}
21459
21461 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21462 unsigned Depth) const {
21463 switch (Op.getOpcode()) {
21464 default:
21465 break;
21466 case RISCVISD::SELECT_CC: {
21467 unsigned Tmp =
21468 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21469 if (Tmp == 1) return 1; // Early out.
21470 unsigned Tmp2 =
21471 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21472 return std::min(Tmp, Tmp2);
21473 }
21474 case RISCVISD::CZERO_EQZ:
21475 case RISCVISD::CZERO_NEZ:
21476 // Output is either all zero or operand 0. We can propagate sign bit count
21477 // from operand 0.
21478 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21479 case RISCVISD::ABSW: {
21480 // We expand this at isel to negw+max. The result will have 33 sign bits
21481 // if the input has at least 33 sign bits.
21482 unsigned Tmp =
21483 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21484 if (Tmp < 33) return 1;
21485 return 33;
21486 }
21487 case RISCVISD::SRAW: {
21488 unsigned Tmp =
21489 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21490 // sraw produces at least 33 sign bits. If the input already has more than
21491 // 33 sign bits sraw, will preserve them.
21492 // TODO: A more precise answer could be calculated depending on known bits
21493 // in the shift amount.
21494 return std::max(Tmp, 33U);
21495 }
21496 case RISCVISD::SLLW:
21497 case RISCVISD::SRLW:
21498 case RISCVISD::DIVW:
21499 case RISCVISD::DIVUW:
21500 case RISCVISD::REMUW:
21501 case RISCVISD::ROLW:
21502 case RISCVISD::RORW:
21503 case RISCVISD::FCVT_W_RV64:
21504 case RISCVISD::FCVT_WU_RV64:
21505 case RISCVISD::STRICT_FCVT_W_RV64:
21506 case RISCVISD::STRICT_FCVT_WU_RV64:
21507 // TODO: As the result is sign-extended, this is conservatively correct.
21508 return 33;
21509 case RISCVISD::VMV_X_S: {
21510 // The number of sign bits of the scalar result is computed by obtaining the
21511 // element type of the input vector operand, subtracting its width from the
21512 // XLEN, and then adding one (sign bit within the element type). If the
21513 // element type is wider than XLen, the least-significant XLEN bits are
21514 // taken.
21515 unsigned XLen = Subtarget.getXLen();
21516 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21517 if (EltBits <= XLen)
21518 return XLen - EltBits + 1;
21519 break;
21520 }
21522 unsigned IntNo = Op.getConstantOperandVal(1);
21523 switch (IntNo) {
21524 default:
21525 break;
21526 case Intrinsic::riscv_masked_atomicrmw_xchg:
21527 case Intrinsic::riscv_masked_atomicrmw_add:
21528 case Intrinsic::riscv_masked_atomicrmw_sub:
21529 case Intrinsic::riscv_masked_atomicrmw_nand:
21530 case Intrinsic::riscv_masked_atomicrmw_max:
21531 case Intrinsic::riscv_masked_atomicrmw_min:
21532 case Intrinsic::riscv_masked_atomicrmw_umax:
21533 case Intrinsic::riscv_masked_atomicrmw_umin:
21534 case Intrinsic::riscv_masked_cmpxchg:
21535 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21536 // narrow atomic operation. These are implemented using atomic
21537 // operations at the minimum supported atomicrmw/cmpxchg width whose
21538 // result is then sign extended to XLEN. With +A, the minimum width is
21539 // 32 for both 64 and 32.
21541 assert(Subtarget.hasStdExtA());
21542 return Op.getValueSizeInBits() - 31;
21543 }
21544 break;
21545 }
21546 }
21547
21548 return 1;
21549}
21550
21552 SDValue Op, const APInt &OriginalDemandedBits,
21553 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21554 unsigned Depth) const {
21555 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21556
21557 switch (Op.getOpcode()) {
21558 case RISCVISD::BREV8:
21559 case RISCVISD::ORC_B: {
21560 KnownBits Known2;
21561 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21562 // For BREV8, we need to do BREV8 on the demanded bits.
21563 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21564 // So we need to do ORC_B on the demanded bits.
21566 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21567 7, IsGORC));
21568 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21569 OriginalDemandedElts, Known2, TLO, Depth + 1))
21570 return true;
21571
21572 // To compute zeros for ORC_B, we need to invert the value and invert it
21573 // back after. This inverting is harmless for BREV8.
21574 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21575 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21576 return false;
21577 }
21578 }
21579
21581 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21582}
21583
21585 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21586 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21587
21588 // TODO: Add more target nodes.
21589 switch (Op.getOpcode()) {
21590 case RISCVISD::SLLW:
21591 case RISCVISD::SRAW:
21592 case RISCVISD::SRLW:
21593 case RISCVISD::RORW:
21594 case RISCVISD::ROLW:
21595 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21596 // amount is bounds.
21597 return false;
21598 case RISCVISD::SELECT_CC:
21599 // Integer comparisons cannot create poison.
21600 assert(Op.getOperand(0).getValueType().isInteger() &&
21601 "RISCVISD::SELECT_CC only compares integers");
21602 return false;
21603 }
21605 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21606}
21607
21608const Constant *
21610 assert(Ld && "Unexpected null LoadSDNode");
21611 if (!ISD::isNormalLoad(Ld))
21612 return nullptr;
21613
21614 SDValue Ptr = Ld->getBasePtr();
21615
21616 // Only constant pools with no offset are supported.
21617 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21618 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21619 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21620 CNode->getOffset() != 0)
21621 return nullptr;
21622
21623 return CNode;
21624 };
21625
21626 // Simple case, LLA.
21627 if (Ptr.getOpcode() == RISCVISD::LLA) {
21628 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21629 if (!CNode || CNode->getTargetFlags() != 0)
21630 return nullptr;
21631
21632 return CNode->getConstVal();
21633 }
21634
21635 // Look for a HI and ADD_LO pair.
21636 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21637 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21638 return nullptr;
21639
21640 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21641 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21642
21643 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21644 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21645 return nullptr;
21646
21647 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21648 return nullptr;
21649
21650 return CNodeLo->getConstVal();
21651}
21652
21654 MachineBasicBlock *BB) {
21655 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21656
21657 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21658 // Should the count have wrapped while it was being read, we need to try
21659 // again.
21660 // For example:
21661 // ```
21662 // read:
21663 // csrrs x3, counterh # load high word of counter
21664 // csrrs x2, counter # load low word of counter
21665 // csrrs x4, counterh # load high word of counter
21666 // bne x3, x4, read # check if high word reads match, otherwise try again
21667 // ```
21668
21669 MachineFunction &MF = *BB->getParent();
21670 const BasicBlock *LLVMBB = BB->getBasicBlock();
21672
21673 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21674 MF.insert(It, LoopMBB);
21675
21676 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21677 MF.insert(It, DoneMBB);
21678
21679 // Transfer the remainder of BB and its successor edges to DoneMBB.
21680 DoneMBB->splice(DoneMBB->begin(), BB,
21681 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21683
21684 BB->addSuccessor(LoopMBB);
21685
21687 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21688 Register LoReg = MI.getOperand(0).getReg();
21689 Register HiReg = MI.getOperand(1).getReg();
21690 int64_t LoCounter = MI.getOperand(2).getImm();
21691 int64_t HiCounter = MI.getOperand(3).getImm();
21692 DebugLoc DL = MI.getDebugLoc();
21693
21695 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21696 .addImm(HiCounter)
21697 .addReg(RISCV::X0);
21698 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21699 .addImm(LoCounter)
21700 .addReg(RISCV::X0);
21701 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21702 .addImm(HiCounter)
21703 .addReg(RISCV::X0);
21704
21705 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21706 .addReg(HiReg)
21707 .addReg(ReadAgainReg)
21708 .addMBB(LoopMBB);
21709
21710 LoopMBB->addSuccessor(LoopMBB);
21711 LoopMBB->addSuccessor(DoneMBB);
21712
21713 MI.eraseFromParent();
21714
21715 return DoneMBB;
21716}
21717
21720 const RISCVSubtarget &Subtarget) {
21721 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21722
21723 MachineFunction &MF = *BB->getParent();
21724 DebugLoc DL = MI.getDebugLoc();
21727 Register LoReg = MI.getOperand(0).getReg();
21728 Register HiReg = MI.getOperand(1).getReg();
21729 Register SrcReg = MI.getOperand(2).getReg();
21730
21731 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21732 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21733
21734 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21735 RI, Register());
21737 MachineMemOperand *MMOLo =
21741 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21742 .addFrameIndex(FI)
21743 .addImm(0)
21744 .addMemOperand(MMOLo);
21745 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21746 .addFrameIndex(FI)
21747 .addImm(4)
21748 .addMemOperand(MMOHi);
21749 MI.eraseFromParent(); // The pseudo instruction is gone now.
21750 return BB;
21751}
21752
21755 const RISCVSubtarget &Subtarget) {
21756 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21757 "Unexpected instruction");
21758
21759 MachineFunction &MF = *BB->getParent();
21760 DebugLoc DL = MI.getDebugLoc();
21763 Register DstReg = MI.getOperand(0).getReg();
21764 Register LoReg = MI.getOperand(1).getReg();
21765 Register HiReg = MI.getOperand(2).getReg();
21766
21767 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21768 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21769
21771 MachineMemOperand *MMOLo =
21775 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21776 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
21777 .addFrameIndex(FI)
21778 .addImm(0)
21779 .addMemOperand(MMOLo);
21780 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21781 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
21782 .addFrameIndex(FI)
21783 .addImm(4)
21784 .addMemOperand(MMOHi);
21785 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
21786 MI.eraseFromParent(); // The pseudo instruction is gone now.
21787 return BB;
21788}
21789
21791 unsigned RelOpcode, unsigned EqOpcode,
21792 const RISCVSubtarget &Subtarget) {
21793 DebugLoc DL = MI.getDebugLoc();
21794 Register DstReg = MI.getOperand(0).getReg();
21795 Register Src1Reg = MI.getOperand(1).getReg();
21796 Register Src2Reg = MI.getOperand(2).getReg();
21798 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
21800
21801 // Save the current FFLAGS.
21802 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
21803
21804 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
21805 .addReg(Src1Reg)
21806 .addReg(Src2Reg);
21809
21810 // Restore the FFLAGS.
21811 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
21812 .addReg(SavedFFlags, RegState::Kill);
21813
21814 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
21815 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
21816 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
21817 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
21820
21821 // Erase the pseudoinstruction.
21822 MI.eraseFromParent();
21823 return BB;
21824}
21825
21826static MachineBasicBlock *
21828 MachineBasicBlock *ThisMBB,
21829 const RISCVSubtarget &Subtarget) {
21830 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
21831 // Without this, custom-inserter would have generated:
21832 //
21833 // A
21834 // | \
21835 // | B
21836 // | /
21837 // C
21838 // | \
21839 // | D
21840 // | /
21841 // E
21842 //
21843 // A: X = ...; Y = ...
21844 // B: empty
21845 // C: Z = PHI [X, A], [Y, B]
21846 // D: empty
21847 // E: PHI [X, C], [Z, D]
21848 //
21849 // If we lower both Select_FPRX_ in a single step, we can instead generate:
21850 //
21851 // A
21852 // | \
21853 // | C
21854 // | /|
21855 // |/ |
21856 // | |
21857 // | D
21858 // | /
21859 // E
21860 //
21861 // A: X = ...; Y = ...
21862 // D: empty
21863 // E: PHI [X, A], [X, C], [Y, D]
21864
21865 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
21866 const DebugLoc &DL = First.getDebugLoc();
21867 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
21868 MachineFunction *F = ThisMBB->getParent();
21869 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
21870 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
21871 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
21872 MachineFunction::iterator It = ++ThisMBB->getIterator();
21873 F->insert(It, FirstMBB);
21874 F->insert(It, SecondMBB);
21875 F->insert(It, SinkMBB);
21876
21877 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
21878 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
21880 ThisMBB->end());
21881 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
21882
21883 // Fallthrough block for ThisMBB.
21884 ThisMBB->addSuccessor(FirstMBB);
21885 // Fallthrough block for FirstMBB.
21886 FirstMBB->addSuccessor(SecondMBB);
21887 ThisMBB->addSuccessor(SinkMBB);
21888 FirstMBB->addSuccessor(SinkMBB);
21889 // This is fallthrough.
21890 SecondMBB->addSuccessor(SinkMBB);
21891
21892 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
21893 Register FLHS = First.getOperand(1).getReg();
21894 Register FRHS = First.getOperand(2).getReg();
21895 // Insert appropriate branch.
21896 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
21897 .addReg(FLHS)
21898 .addReg(FRHS)
21899 .addMBB(SinkMBB);
21900
21901 Register SLHS = Second.getOperand(1).getReg();
21902 Register SRHS = Second.getOperand(2).getReg();
21903 Register Op1Reg4 = First.getOperand(4).getReg();
21904 Register Op1Reg5 = First.getOperand(5).getReg();
21905
21906 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
21907 // Insert appropriate branch.
21908 BuildMI(ThisMBB, DL,
21909 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
21910 .addReg(SLHS)
21911 .addReg(SRHS)
21912 .addMBB(SinkMBB);
21913
21914 Register DestReg = Second.getOperand(0).getReg();
21915 Register Op2Reg4 = Second.getOperand(4).getReg();
21916 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
21917 .addReg(Op2Reg4)
21918 .addMBB(ThisMBB)
21919 .addReg(Op1Reg4)
21920 .addMBB(FirstMBB)
21921 .addReg(Op1Reg5)
21922 .addMBB(SecondMBB);
21923
21924 // Now remove the Select_FPRX_s.
21925 First.eraseFromParent();
21926 Second.eraseFromParent();
21927 return SinkMBB;
21928}
21929
21932 const RISCVSubtarget &Subtarget) {
21933 // To "insert" Select_* instructions, we actually have to insert the triangle
21934 // control-flow pattern. The incoming instructions know the destination vreg
21935 // to set, the condition code register to branch on, the true/false values to
21936 // select between, and the condcode to use to select the appropriate branch.
21937 //
21938 // We produce the following control flow:
21939 // HeadMBB
21940 // | \
21941 // | IfFalseMBB
21942 // | /
21943 // TailMBB
21944 //
21945 // When we find a sequence of selects we attempt to optimize their emission
21946 // by sharing the control flow. Currently we only handle cases where we have
21947 // multiple selects with the exact same condition (same LHS, RHS and CC).
21948 // The selects may be interleaved with other instructions if the other
21949 // instructions meet some requirements we deem safe:
21950 // - They are not pseudo instructions.
21951 // - They are debug instructions. Otherwise,
21952 // - They do not have side-effects, do not access memory and their inputs do
21953 // not depend on the results of the select pseudo-instructions.
21954 // The TrueV/FalseV operands of the selects cannot depend on the result of
21955 // previous selects in the sequence.
21956 // These conditions could be further relaxed. See the X86 target for a
21957 // related approach and more information.
21958 //
21959 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
21960 // is checked here and handled by a separate function -
21961 // EmitLoweredCascadedSelect.
21962
21963 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
21964 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
21965 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
21966 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
21967 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
21968 Next->getOperand(5).isKill())
21969 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
21970
21971 Register LHS = MI.getOperand(1).getReg();
21972 Register RHS;
21973 if (MI.getOperand(2).isReg())
21974 RHS = MI.getOperand(2).getReg();
21975 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
21976
21977 SmallVector<MachineInstr *, 4> SelectDebugValues;
21978 SmallSet<Register, 4> SelectDests;
21979 SelectDests.insert(MI.getOperand(0).getReg());
21980
21981 MachineInstr *LastSelectPseudo = &MI;
21982 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
21983 SequenceMBBI != E; ++SequenceMBBI) {
21984 if (SequenceMBBI->isDebugInstr())
21985 continue;
21986 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
21987 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
21988 !SequenceMBBI->getOperand(2).isReg() ||
21989 SequenceMBBI->getOperand(2).getReg() != RHS ||
21990 SequenceMBBI->getOperand(3).getImm() != CC ||
21991 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
21992 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
21993 break;
21994 LastSelectPseudo = &*SequenceMBBI;
21995 SequenceMBBI->collectDebugValues(SelectDebugValues);
21996 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
21997 continue;
21998 }
21999 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22000 SequenceMBBI->mayLoadOrStore() ||
22001 SequenceMBBI->usesCustomInsertionHook())
22002 break;
22003 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22004 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22005 }))
22006 break;
22007 }
22008
22009 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22010 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22011 DebugLoc DL = MI.getDebugLoc();
22013
22014 MachineBasicBlock *HeadMBB = BB;
22015 MachineFunction *F = BB->getParent();
22016 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22017 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22018
22019 F->insert(I, IfFalseMBB);
22020 F->insert(I, TailMBB);
22021
22022 // Set the call frame size on entry to the new basic blocks.
22023 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22024 IfFalseMBB->setCallFrameSize(CallFrameSize);
22025 TailMBB->setCallFrameSize(CallFrameSize);
22026
22027 // Transfer debug instructions associated with the selects to TailMBB.
22028 for (MachineInstr *DebugInstr : SelectDebugValues) {
22029 TailMBB->push_back(DebugInstr->removeFromParent());
22030 }
22031
22032 // Move all instructions after the sequence to TailMBB.
22033 TailMBB->splice(TailMBB->end(), HeadMBB,
22034 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22035 // Update machine-CFG edges by transferring all successors of the current
22036 // block to the new block which will contain the Phi nodes for the selects.
22037 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22038 // Set the successors for HeadMBB.
22039 HeadMBB->addSuccessor(IfFalseMBB);
22040 HeadMBB->addSuccessor(TailMBB);
22041
22042 // Insert appropriate branch.
22043 if (MI.getOperand(2).isImm())
22044 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22045 .addReg(LHS)
22046 .addImm(MI.getOperand(2).getImm())
22047 .addMBB(TailMBB);
22048 else
22049 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22050 .addReg(LHS)
22051 .addReg(RHS)
22052 .addMBB(TailMBB);
22053
22054 // IfFalseMBB just falls through to TailMBB.
22055 IfFalseMBB->addSuccessor(TailMBB);
22056
22057 // Create PHIs for all of the select pseudo-instructions.
22058 auto SelectMBBI = MI.getIterator();
22059 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22060 auto InsertionPoint = TailMBB->begin();
22061 while (SelectMBBI != SelectEnd) {
22062 auto Next = std::next(SelectMBBI);
22063 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22064 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22065 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22066 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22067 .addReg(SelectMBBI->getOperand(4).getReg())
22068 .addMBB(HeadMBB)
22069 .addReg(SelectMBBI->getOperand(5).getReg())
22070 .addMBB(IfFalseMBB);
22071 SelectMBBI->eraseFromParent();
22072 }
22073 SelectMBBI = Next;
22074 }
22075
22076 F->getProperties().resetNoPHIs();
22077 return TailMBB;
22078}
22079
22080// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22081static const RISCV::RISCVMaskedPseudoInfo *
22082lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22084 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22085 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22087 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22088 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22089 return Masked;
22090}
22091
22094 unsigned CVTXOpc) {
22095 DebugLoc DL = MI.getDebugLoc();
22096
22098
22100 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22101
22102 // Save the old value of FFLAGS.
22103 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22104
22105 assert(MI.getNumOperands() == 7);
22106
22107 // Emit a VFCVT_X_F
22108 const TargetRegisterInfo *TRI =
22110 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22111 Register Tmp = MRI.createVirtualRegister(RC);
22112 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22113 .add(MI.getOperand(1))
22114 .add(MI.getOperand(2))
22115 .add(MI.getOperand(3))
22116 .add(MachineOperand::CreateImm(7)) // frm = DYN
22117 .add(MI.getOperand(4))
22118 .add(MI.getOperand(5))
22119 .add(MI.getOperand(6))
22120 .add(MachineOperand::CreateReg(RISCV::FRM,
22121 /*IsDef*/ false,
22122 /*IsImp*/ true));
22123
22124 // Emit a VFCVT_F_X
22125 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22126 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22127 // There is no E8 variant for VFCVT_F_X.
22128 assert(Log2SEW >= 4);
22129 unsigned CVTFOpc =
22130 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22131 ->MaskedPseudo;
22132
22133 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22134 .add(MI.getOperand(0))
22135 .add(MI.getOperand(1))
22136 .addReg(Tmp)
22137 .add(MI.getOperand(3))
22138 .add(MachineOperand::CreateImm(7)) // frm = DYN
22139 .add(MI.getOperand(4))
22140 .add(MI.getOperand(5))
22141 .add(MI.getOperand(6))
22142 .add(MachineOperand::CreateReg(RISCV::FRM,
22143 /*IsDef*/ false,
22144 /*IsImp*/ true));
22145
22146 // Restore FFLAGS.
22147 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22148 .addReg(SavedFFLAGS, RegState::Kill);
22149
22150 // Erase the pseudoinstruction.
22151 MI.eraseFromParent();
22152 return BB;
22153}
22154
22156 const RISCVSubtarget &Subtarget) {
22157 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22158 const TargetRegisterClass *RC;
22159 switch (MI.getOpcode()) {
22160 default:
22161 llvm_unreachable("Unexpected opcode");
22162 case RISCV::PseudoFROUND_H:
22163 CmpOpc = RISCV::FLT_H;
22164 F2IOpc = RISCV::FCVT_W_H;
22165 I2FOpc = RISCV::FCVT_H_W;
22166 FSGNJOpc = RISCV::FSGNJ_H;
22167 FSGNJXOpc = RISCV::FSGNJX_H;
22168 RC = &RISCV::FPR16RegClass;
22169 break;
22170 case RISCV::PseudoFROUND_H_INX:
22171 CmpOpc = RISCV::FLT_H_INX;
22172 F2IOpc = RISCV::FCVT_W_H_INX;
22173 I2FOpc = RISCV::FCVT_H_W_INX;
22174 FSGNJOpc = RISCV::FSGNJ_H_INX;
22175 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22176 RC = &RISCV::GPRF16RegClass;
22177 break;
22178 case RISCV::PseudoFROUND_S:
22179 CmpOpc = RISCV::FLT_S;
22180 F2IOpc = RISCV::FCVT_W_S;
22181 I2FOpc = RISCV::FCVT_S_W;
22182 FSGNJOpc = RISCV::FSGNJ_S;
22183 FSGNJXOpc = RISCV::FSGNJX_S;
22184 RC = &RISCV::FPR32RegClass;
22185 break;
22186 case RISCV::PseudoFROUND_S_INX:
22187 CmpOpc = RISCV::FLT_S_INX;
22188 F2IOpc = RISCV::FCVT_W_S_INX;
22189 I2FOpc = RISCV::FCVT_S_W_INX;
22190 FSGNJOpc = RISCV::FSGNJ_S_INX;
22191 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22192 RC = &RISCV::GPRF32RegClass;
22193 break;
22194 case RISCV::PseudoFROUND_D:
22195 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22196 CmpOpc = RISCV::FLT_D;
22197 F2IOpc = RISCV::FCVT_L_D;
22198 I2FOpc = RISCV::FCVT_D_L;
22199 FSGNJOpc = RISCV::FSGNJ_D;
22200 FSGNJXOpc = RISCV::FSGNJX_D;
22201 RC = &RISCV::FPR64RegClass;
22202 break;
22203 case RISCV::PseudoFROUND_D_INX:
22204 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22205 CmpOpc = RISCV::FLT_D_INX;
22206 F2IOpc = RISCV::FCVT_L_D_INX;
22207 I2FOpc = RISCV::FCVT_D_L_INX;
22208 FSGNJOpc = RISCV::FSGNJ_D_INX;
22209 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22210 RC = &RISCV::GPRRegClass;
22211 break;
22212 }
22213
22214 const BasicBlock *BB = MBB->getBasicBlock();
22215 DebugLoc DL = MI.getDebugLoc();
22217
22219 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22220 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22221
22222 F->insert(I, CvtMBB);
22223 F->insert(I, DoneMBB);
22224 // Move all instructions after the sequence to DoneMBB.
22225 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22226 MBB->end());
22227 // Update machine-CFG edges by transferring all successors of the current
22228 // block to the new block which will contain the Phi nodes for the selects.
22230 // Set the successors for MBB.
22231 MBB->addSuccessor(CvtMBB);
22232 MBB->addSuccessor(DoneMBB);
22233
22234 Register DstReg = MI.getOperand(0).getReg();
22235 Register SrcReg = MI.getOperand(1).getReg();
22236 Register MaxReg = MI.getOperand(2).getReg();
22237 int64_t FRM = MI.getOperand(3).getImm();
22238
22239 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22241
22242 Register FabsReg = MRI.createVirtualRegister(RC);
22243 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22244
22245 // Compare the FP value to the max value.
22246 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22247 auto MIB =
22248 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22251
22252 // Insert branch.
22253 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22254 .addReg(CmpReg)
22255 .addReg(RISCV::X0)
22256 .addMBB(DoneMBB);
22257
22258 CvtMBB->addSuccessor(DoneMBB);
22259
22260 // Convert to integer.
22261 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22262 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22265
22266 // Convert back to FP.
22267 Register I2FReg = MRI.createVirtualRegister(RC);
22268 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22271
22272 // Restore the sign bit.
22273 Register CvtReg = MRI.createVirtualRegister(RC);
22274 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22275
22276 // Merge the results.
22277 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22278 .addReg(SrcReg)
22279 .addMBB(MBB)
22280 .addReg(CvtReg)
22281 .addMBB(CvtMBB);
22282
22283 MI.eraseFromParent();
22284 return DoneMBB;
22285}
22286
22289 MachineBasicBlock *BB) const {
22290 switch (MI.getOpcode()) {
22291 default:
22292 llvm_unreachable("Unexpected instr type to insert");
22293 case RISCV::ReadCounterWide:
22294 assert(!Subtarget.is64Bit() &&
22295 "ReadCounterWide is only to be used on riscv32");
22296 return emitReadCounterWidePseudo(MI, BB);
22297 case RISCV::Select_GPR_Using_CC_GPR:
22298 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22299 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22300 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22301 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22302 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22303 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22304 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22305 case RISCV::Select_FPR16_Using_CC_GPR:
22306 case RISCV::Select_FPR16INX_Using_CC_GPR:
22307 case RISCV::Select_FPR32_Using_CC_GPR:
22308 case RISCV::Select_FPR32INX_Using_CC_GPR:
22309 case RISCV::Select_FPR64_Using_CC_GPR:
22310 case RISCV::Select_FPR64INX_Using_CC_GPR:
22311 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22312 return emitSelectPseudo(MI, BB, Subtarget);
22313 case RISCV::BuildPairF64Pseudo:
22314 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22315 case RISCV::SplitF64Pseudo:
22316 return emitSplitF64Pseudo(MI, BB, Subtarget);
22317 case RISCV::PseudoQuietFLE_H:
22318 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22319 case RISCV::PseudoQuietFLE_H_INX:
22320 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22321 case RISCV::PseudoQuietFLT_H:
22322 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22323 case RISCV::PseudoQuietFLT_H_INX:
22324 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22325 case RISCV::PseudoQuietFLE_S:
22326 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22327 case RISCV::PseudoQuietFLE_S_INX:
22328 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22329 case RISCV::PseudoQuietFLT_S:
22330 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22331 case RISCV::PseudoQuietFLT_S_INX:
22332 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22333 case RISCV::PseudoQuietFLE_D:
22334 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22335 case RISCV::PseudoQuietFLE_D_INX:
22336 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22337 case RISCV::PseudoQuietFLE_D_IN32X:
22338 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22339 Subtarget);
22340 case RISCV::PseudoQuietFLT_D:
22341 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22342 case RISCV::PseudoQuietFLT_D_INX:
22343 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22344 case RISCV::PseudoQuietFLT_D_IN32X:
22345 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22346 Subtarget);
22347
22348 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22349 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22350 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22351 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22352 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22353 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22354 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22355 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22356 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22357 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22358 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22359 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22360 case RISCV::PseudoFROUND_H:
22361 case RISCV::PseudoFROUND_H_INX:
22362 case RISCV::PseudoFROUND_S:
22363 case RISCV::PseudoFROUND_S_INX:
22364 case RISCV::PseudoFROUND_D:
22365 case RISCV::PseudoFROUND_D_INX:
22366 case RISCV::PseudoFROUND_D_IN32X:
22367 return emitFROUND(MI, BB, Subtarget);
22368 case RISCV::PROBED_STACKALLOC_DYN:
22369 return emitDynamicProbedAlloc(MI, BB);
22370 case TargetOpcode::STATEPOINT:
22371 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22372 // while jal call instruction (where statepoint will be lowered at the end)
22373 // has implicit def. This def is early-clobber as it will be set at
22374 // the moment of the call and earlier than any use is read.
22375 // Add this implicit dead def here as a workaround.
22376 MI.addOperand(*MI.getMF(),
22378 RISCV::X1, /*isDef*/ true,
22379 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22380 /*isUndef*/ false, /*isEarlyClobber*/ true));
22381 [[fallthrough]];
22382 case TargetOpcode::STACKMAP:
22383 case TargetOpcode::PATCHPOINT:
22384 if (!Subtarget.is64Bit())
22385 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22386 "supported on 64-bit targets");
22387 return emitPatchPoint(MI, BB);
22388 }
22389}
22390
22392 SDNode *Node) const {
22393 // If instruction defines FRM operand, conservatively set it as non-dead to
22394 // express data dependency with FRM users and prevent incorrect instruction
22395 // reordering.
22396 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22397 FRMDef->setIsDead(false);
22398 return;
22399 }
22400 // Add FRM dependency to any instructions with dynamic rounding mode.
22401 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22402 if (Idx < 0) {
22403 // Vector pseudos have FRM index indicated by TSFlags.
22404 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22405 if (Idx < 0)
22406 return;
22407 }
22408 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22409 return;
22410 // If the instruction already reads FRM, don't add another read.
22411 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22412 return;
22413 MI.addOperand(
22414 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22415}
22416
22417void RISCVTargetLowering::analyzeInputArgs(
22418 MachineFunction &MF, CCState &CCInfo,
22419 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22420 RISCVCCAssignFn Fn) const {
22421 for (const auto &[Idx, In] : enumerate(Ins)) {
22422 MVT ArgVT = In.VT;
22423 ISD::ArgFlagsTy ArgFlags = In.Flags;
22424
22425 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22426 In.OrigTy)) {
22427 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22428 << ArgVT << '\n');
22429 llvm_unreachable(nullptr);
22430 }
22431 }
22432}
22433
22434void RISCVTargetLowering::analyzeOutputArgs(
22435 MachineFunction &MF, CCState &CCInfo,
22436 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22437 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22438 for (const auto &[Idx, Out] : enumerate(Outs)) {
22439 MVT ArgVT = Out.VT;
22440 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22441
22442 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22443 Out.OrigTy)) {
22444 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22445 << ArgVT << "\n");
22446 llvm_unreachable(nullptr);
22447 }
22448 }
22449}
22450
22451// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22452// values.
22454 const CCValAssign &VA, const SDLoc &DL,
22455 const RISCVSubtarget &Subtarget) {
22456 if (VA.needsCustom()) {
22457 if (VA.getLocVT().isInteger() &&
22458 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22459 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22460 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22461 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22463 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22464 llvm_unreachable("Unexpected Custom handling.");
22465 }
22466
22467 switch (VA.getLocInfo()) {
22468 default:
22469 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22470 case CCValAssign::Full:
22471 break;
22472 case CCValAssign::BCvt:
22473 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22474 break;
22475 }
22476 return Val;
22477}
22478
22479// The caller is responsible for loading the full value if the argument is
22480// passed with CCValAssign::Indirect.
22482 const CCValAssign &VA, const SDLoc &DL,
22483 const ISD::InputArg &In,
22484 const RISCVTargetLowering &TLI) {
22487 EVT LocVT = VA.getLocVT();
22488 SDValue Val;
22489 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22490 Register VReg = RegInfo.createVirtualRegister(RC);
22491 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22492 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22493
22494 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22495 if (In.isOrigArg()) {
22496 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22497 if (OrigArg->getType()->isIntegerTy()) {
22498 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22499 // An input zero extended from i31 can also be considered sign extended.
22500 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22501 (BitWidth < 32 && In.Flags.isZExt())) {
22503 RVFI->addSExt32Register(VReg);
22504 }
22505 }
22506 }
22507
22509 return Val;
22510
22511 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22512}
22513
22515 const CCValAssign &VA, const SDLoc &DL,
22516 const RISCVSubtarget &Subtarget) {
22517 EVT LocVT = VA.getLocVT();
22518
22519 if (VA.needsCustom()) {
22520 if (LocVT.isInteger() &&
22521 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22522 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22523 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22524 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22525 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22526 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22527 llvm_unreachable("Unexpected Custom handling.");
22528 }
22529
22530 switch (VA.getLocInfo()) {
22531 default:
22532 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22533 case CCValAssign::Full:
22534 break;
22535 case CCValAssign::BCvt:
22536 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22537 break;
22538 }
22539 return Val;
22540}
22541
22542// The caller is responsible for loading the full value if the argument is
22543// passed with CCValAssign::Indirect.
22545 const CCValAssign &VA, const SDLoc &DL) {
22547 MachineFrameInfo &MFI = MF.getFrameInfo();
22548 EVT LocVT = VA.getLocVT();
22549 EVT ValVT = VA.getValVT();
22551 if (VA.getLocInfo() == CCValAssign::Indirect) {
22552 // When the value is a scalable vector, we save the pointer which points to
22553 // the scalable vector value in the stack. The ValVT will be the pointer
22554 // type, instead of the scalable vector type.
22555 ValVT = LocVT;
22556 }
22557 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22558 /*IsImmutable=*/true);
22559 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22560 SDValue Val;
22561
22563 switch (VA.getLocInfo()) {
22564 default:
22565 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22566 case CCValAssign::Full:
22568 case CCValAssign::BCvt:
22569 break;
22570 }
22571 Val = DAG.getExtLoad(
22572 ExtType, DL, LocVT, Chain, FIN,
22574 return Val;
22575}
22576
22578 const CCValAssign &VA,
22579 const CCValAssign &HiVA,
22580 const SDLoc &DL) {
22581 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22582 "Unexpected VA");
22584 MachineFrameInfo &MFI = MF.getFrameInfo();
22586
22587 assert(VA.isRegLoc() && "Expected register VA assignment");
22588
22589 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22590 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22591 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22592 SDValue Hi;
22593 if (HiVA.isMemLoc()) {
22594 // Second half of f64 is passed on the stack.
22595 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22596 /*IsImmutable=*/true);
22597 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22598 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22600 } else {
22601 // Second half of f64 is passed in another GPR.
22602 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22603 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22604 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22605 }
22606 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22607}
22608
22609// Transform physical registers into virtual registers.
22611 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22612 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22613 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22614
22616
22617 switch (CallConv) {
22618 default:
22619 reportFatalUsageError("Unsupported calling convention");
22620 case CallingConv::C:
22621 case CallingConv::Fast:
22624 case CallingConv::GRAAL:
22626#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22627 CC_VLS_CASE(32)
22628 CC_VLS_CASE(64)
22629 CC_VLS_CASE(128)
22630 CC_VLS_CASE(256)
22631 CC_VLS_CASE(512)
22632 CC_VLS_CASE(1024)
22633 CC_VLS_CASE(2048)
22634 CC_VLS_CASE(4096)
22635 CC_VLS_CASE(8192)
22636 CC_VLS_CASE(16384)
22637 CC_VLS_CASE(32768)
22638 CC_VLS_CASE(65536)
22639#undef CC_VLS_CASE
22640 break;
22641 case CallingConv::GHC:
22642 if (Subtarget.hasStdExtE())
22643 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22644 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22645 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22646 "(Zdinx/D) instruction set extensions");
22647 }
22648
22649 const Function &Func = MF.getFunction();
22650 if (Func.hasFnAttribute("interrupt")) {
22651 if (!Func.arg_empty())
22653 "Functions with the interrupt attribute cannot have arguments!");
22654
22655 StringRef Kind =
22656 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22657
22658 constexpr StringLiteral SupportedInterruptKinds[] = {
22659 "machine",
22660 "supervisor",
22661 "rnmi",
22662 "qci-nest",
22663 "qci-nonest",
22664 "SiFive-CLIC-preemptible",
22665 "SiFive-CLIC-stack-swap",
22666 "SiFive-CLIC-preemptible-stack-swap",
22667 };
22668 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22670 "Function interrupt attribute argument not supported!");
22671
22672 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22674 "'qci-*' interrupt kinds require Xqciint extension");
22675
22676 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22678 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22679
22680 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22681 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22682 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22683 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22684 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22685 "have a frame pointer");
22686 }
22687
22688 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22689 MVT XLenVT = Subtarget.getXLenVT();
22690 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22691 // Used with vargs to accumulate store chains.
22692 std::vector<SDValue> OutChains;
22693
22694 // Assign locations to all of the incoming arguments.
22696 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22697
22698 if (CallConv == CallingConv::GHC)
22700 else
22701 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22703 : CC_RISCV);
22704
22705 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22706 CCValAssign &VA = ArgLocs[i];
22707 SDValue ArgValue;
22708 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22709 // case.
22710 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22711 assert(VA.needsCustom());
22712 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22713 } else if (VA.isRegLoc())
22714 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22715 else
22716 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22717
22718 if (VA.getLocInfo() == CCValAssign::Indirect) {
22719 // If the original argument was split and passed by reference (e.g. i128
22720 // on RV32), we need to load all parts of it here (using the same
22721 // address). Vectors may be partly split to registers and partly to the
22722 // stack, in which case the base address is partly offset and subsequent
22723 // stores are relative to that.
22724 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22726 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22727 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22728 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22729 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22730 CCValAssign &PartVA = ArgLocs[i + 1];
22731 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22732 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22733 if (PartVA.getValVT().isScalableVector())
22734 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22735 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22736 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22738 ++i;
22739 ++InsIdx;
22740 }
22741 continue;
22742 }
22743 InVals.push_back(ArgValue);
22744 }
22745
22746 if (any_of(ArgLocs,
22747 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22748 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
22749
22750 if (IsVarArg) {
22751 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22752 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22753 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22754 MachineFrameInfo &MFI = MF.getFrameInfo();
22755 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22757
22758 // Size of the vararg save area. For now, the varargs save area is either
22759 // zero or large enough to hold a0-a7.
22760 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22761 int FI;
22762
22763 // If all registers are allocated, then all varargs must be passed on the
22764 // stack and we don't need to save any argregs.
22765 if (VarArgsSaveSize == 0) {
22766 int VaArgOffset = CCInfo.getStackSize();
22767 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
22768 } else {
22769 int VaArgOffset = -VarArgsSaveSize;
22770 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
22771
22772 // If saving an odd number of registers then create an extra stack slot to
22773 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
22774 // offsets to even-numbered registered remain 2*XLEN-aligned.
22775 if (Idx % 2) {
22777 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
22778 VarArgsSaveSize += XLenInBytes;
22779 }
22780
22781 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22782
22783 // Copy the integer registers that may have been used for passing varargs
22784 // to the vararg save area.
22785 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22786 const Register Reg = RegInfo.createVirtualRegister(RC);
22787 RegInfo.addLiveIn(ArgRegs[I], Reg);
22788 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
22789 SDValue Store = DAG.getStore(
22790 Chain, DL, ArgValue, FIN,
22791 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
22792 OutChains.push_back(Store);
22793 FIN =
22794 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
22795 }
22796 }
22797
22798 // Record the frame index of the first variable argument
22799 // which is a value necessary to VASTART.
22800 RVFI->setVarArgsFrameIndex(FI);
22801 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
22802 }
22803
22804 // All stores are grouped in one node to allow the matching between
22805 // the size of Ins and InVals. This only happens for vararg functions.
22806 if (!OutChains.empty()) {
22807 OutChains.push_back(Chain);
22808 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
22809 }
22810
22811 return Chain;
22812}
22813
22814/// isEligibleForTailCallOptimization - Check whether the call is eligible
22815/// for tail call optimization.
22816/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
22817bool RISCVTargetLowering::isEligibleForTailCallOptimization(
22818 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
22819 const SmallVector<CCValAssign, 16> &ArgLocs) const {
22820
22821 auto CalleeCC = CLI.CallConv;
22822 auto &Outs = CLI.Outs;
22823 auto &Caller = MF.getFunction();
22824 auto CallerCC = Caller.getCallingConv();
22825
22826 // Exception-handling functions need a special set of instructions to
22827 // indicate a return to the hardware. Tail-calling another function would
22828 // probably break this.
22829 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
22830 // should be expanded as new function attributes are introduced.
22831 if (Caller.hasFnAttribute("interrupt"))
22832 return false;
22833
22834 // Do not tail call opt if the stack is used to pass parameters.
22835 if (CCInfo.getStackSize() != 0)
22836 return false;
22837
22838 // Do not tail call opt if any parameters need to be passed indirectly.
22839 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
22840 // passed indirectly. So the address of the value will be passed in a
22841 // register, or if not available, then the address is put on the stack. In
22842 // order to pass indirectly, space on the stack often needs to be allocated
22843 // in order to store the value. In this case the CCInfo.getNextStackOffset()
22844 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
22845 // are passed CCValAssign::Indirect.
22846 for (auto &VA : ArgLocs)
22847 if (VA.getLocInfo() == CCValAssign::Indirect)
22848 return false;
22849
22850 // Do not tail call opt if either caller or callee uses struct return
22851 // semantics.
22852 auto IsCallerStructRet = Caller.hasStructRetAttr();
22853 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
22854 if (IsCallerStructRet || IsCalleeStructRet)
22855 return false;
22856
22857 // The callee has to preserve all registers the caller needs to preserve.
22858 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
22859 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
22860 if (CalleeCC != CallerCC) {
22861 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
22862 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
22863 return false;
22864 }
22865
22866 // Byval parameters hand the function a pointer directly into the stack area
22867 // we want to reuse during a tail call. Working around this *is* possible
22868 // but less efficient and uglier in LowerCall.
22869 for (auto &Arg : Outs)
22870 if (Arg.Flags.isByVal())
22871 return false;
22872
22873 return true;
22874}
22875
22877 return DAG.getDataLayout().getPrefTypeAlign(
22878 VT.getTypeForEVT(*DAG.getContext()));
22879}
22880
22881// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
22882// and output parameter nodes.
22884 SmallVectorImpl<SDValue> &InVals) const {
22885 SelectionDAG &DAG = CLI.DAG;
22886 SDLoc &DL = CLI.DL;
22888 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
22890 SDValue Chain = CLI.Chain;
22891 SDValue Callee = CLI.Callee;
22892 bool &IsTailCall = CLI.IsTailCall;
22893 CallingConv::ID CallConv = CLI.CallConv;
22894 bool IsVarArg = CLI.IsVarArg;
22895 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22896 MVT XLenVT = Subtarget.getXLenVT();
22897 const CallBase *CB = CLI.CB;
22898
22901
22902 // Set type id for call site info.
22903 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
22904 CSInfo = MachineFunction::CallSiteInfo(*CB);
22905
22906 // Analyze the operands of the call, assigning locations to each operand.
22908 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22909
22910 if (CallConv == CallingConv::GHC) {
22911 if (Subtarget.hasStdExtE())
22912 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22913 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
22914 } else
22915 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
22917 : CC_RISCV);
22918
22919 // Check if it's really possible to do a tail call.
22920 if (IsTailCall)
22921 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
22922
22923 if (IsTailCall)
22924 ++NumTailCalls;
22925 else if (CLI.CB && CLI.CB->isMustTailCall())
22926 reportFatalInternalError("failed to perform tail call elimination on a "
22927 "call site marked musttail");
22928
22929 // Get a count of how many bytes are to be pushed on the stack.
22930 unsigned NumBytes = ArgCCInfo.getStackSize();
22931
22932 // Create local copies for byval args
22933 SmallVector<SDValue, 8> ByValArgs;
22934 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
22935 ISD::ArgFlagsTy Flags = Outs[i].Flags;
22936 if (!Flags.isByVal())
22937 continue;
22938
22939 SDValue Arg = OutVals[i];
22940 unsigned Size = Flags.getByValSize();
22941 Align Alignment = Flags.getNonZeroByValAlign();
22942
22943 int FI =
22944 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
22945 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
22946 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
22947
22948 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
22949 /*IsVolatile=*/false,
22950 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
22952 ByValArgs.push_back(FIPtr);
22953 }
22954
22955 if (!IsTailCall)
22956 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
22957
22958 // Copy argument values to their designated locations.
22960 SmallVector<SDValue, 8> MemOpChains;
22961 SDValue StackPtr;
22962 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
22963 ++i, ++OutIdx) {
22964 CCValAssign &VA = ArgLocs[i];
22965 SDValue ArgValue = OutVals[OutIdx];
22966 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
22967
22968 // Handle passing f64 on RV32D with a soft float ABI as a special case.
22969 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22970 assert(VA.isRegLoc() && "Expected register VA assignment");
22971 assert(VA.needsCustom());
22972 SDValue SplitF64 = DAG.getNode(
22973 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
22974 SDValue Lo = SplitF64.getValue(0);
22975 SDValue Hi = SplitF64.getValue(1);
22976
22977 Register RegLo = VA.getLocReg();
22978 RegsToPass.push_back(std::make_pair(RegLo, Lo));
22979
22980 // Get the CCValAssign for the Hi part.
22981 CCValAssign &HiVA = ArgLocs[++i];
22982
22983 if (HiVA.isMemLoc()) {
22984 // Second half of f64 is passed on the stack.
22985 if (!StackPtr.getNode())
22986 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
22988 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
22989 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
22990 // Emit the store.
22991 MemOpChains.push_back(DAG.getStore(
22992 Chain, DL, Hi, Address,
22994 } else {
22995 // Second half of f64 is passed in another GPR.
22996 Register RegHigh = HiVA.getLocReg();
22997 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
22998 }
22999 continue;
23000 }
23001
23002 // Promote the value if needed.
23003 // For now, only handle fully promoted and indirect arguments.
23004 if (VA.getLocInfo() == CCValAssign::Indirect) {
23005 // Store the argument in a stack slot and pass its address.
23006 Align StackAlign =
23007 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23008 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23009 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23010 // If the original argument was split (e.g. i128), we need
23011 // to store the required parts of it here (and pass just one address).
23012 // Vectors may be partly split to registers and partly to the stack, in
23013 // which case the base address is partly offset and subsequent stores are
23014 // relative to that.
23015 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23016 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23017 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23018 // Calculate the total size to store. We don't have access to what we're
23019 // actually storing other than performing the loop and collecting the
23020 // info.
23022 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23023 SDValue PartValue = OutVals[OutIdx + 1];
23024 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23025 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23026 EVT PartVT = PartValue.getValueType();
23027 if (PartVT.isScalableVector())
23028 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23029 StoredSize += PartVT.getStoreSize();
23030 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23031 Parts.push_back(std::make_pair(PartValue, Offset));
23032 ++i;
23033 ++OutIdx;
23034 }
23035 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23036 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23037 MemOpChains.push_back(
23038 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23040 for (const auto &Part : Parts) {
23041 SDValue PartValue = Part.first;
23042 SDValue PartOffset = Part.second;
23044 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23045 MemOpChains.push_back(
23046 DAG.getStore(Chain, DL, PartValue, Address,
23048 }
23049 ArgValue = SpillSlot;
23050 } else {
23051 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23052 }
23053
23054 // Use local copy if it is a byval arg.
23055 if (Flags.isByVal())
23056 ArgValue = ByValArgs[j++];
23057
23058 if (VA.isRegLoc()) {
23059 // Queue up the argument copies and emit them at the end.
23060 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23061 } else {
23062 assert(VA.isMemLoc() && "Argument not register or memory");
23063 assert(!IsTailCall && "Tail call not allowed if stack is used "
23064 "for passing parameters");
23065
23066 // Work out the address of the stack slot.
23067 if (!StackPtr.getNode())
23068 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23070 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23072
23073 // Emit the store.
23074 MemOpChains.push_back(
23075 DAG.getStore(Chain, DL, ArgValue, Address,
23077 }
23078 }
23079
23080 // Join the stores, which are independent of one another.
23081 if (!MemOpChains.empty())
23082 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23083
23084 SDValue Glue;
23085
23086 // Build a sequence of copy-to-reg nodes, chained and glued together.
23087 for (auto &Reg : RegsToPass) {
23088 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23089 Glue = Chain.getValue(1);
23090 }
23091
23092 // Validate that none of the argument registers have been marked as
23093 // reserved, if so report an error. Do the same for the return address if this
23094 // is not a tailcall.
23095 validateCCReservedRegs(RegsToPass, MF);
23096 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23098 MF.getFunction(),
23099 "Return address register required, but has been reserved."});
23100
23101 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23102 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23103 // split it and then direct call can be matched by PseudoCALL.
23104 bool CalleeIsLargeExternalSymbol = false;
23106 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23107 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23108 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23109 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23110 CalleeIsLargeExternalSymbol = true;
23111 }
23112 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23113 const GlobalValue *GV = S->getGlobal();
23114 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23115 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23116 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23117 }
23118
23119 // The first call operand is the chain and the second is the target address.
23121 Ops.push_back(Chain);
23122 Ops.push_back(Callee);
23123
23124 // Add argument registers to the end of the list so that they are
23125 // known live into the call.
23126 for (auto &Reg : RegsToPass)
23127 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23128
23129 // Add a register mask operand representing the call-preserved registers.
23130 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23131 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23132 assert(Mask && "Missing call preserved mask for calling convention");
23133 Ops.push_back(DAG.getRegisterMask(Mask));
23134
23135 // Glue the call to the argument copies, if any.
23136 if (Glue.getNode())
23137 Ops.push_back(Glue);
23138
23139 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23140 "Unexpected CFI type for a direct call");
23141
23142 // Emit the call.
23143 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23144
23145 // Use software guarded branch for large code model non-indirect calls
23146 // Tail call to external symbol will have a null CLI.CB and we need another
23147 // way to determine the callsite type
23148 bool NeedSWGuarded = false;
23150 Subtarget.hasStdExtZicfilp() &&
23151 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23152 NeedSWGuarded = true;
23153
23154 if (IsTailCall) {
23156 unsigned CallOpc =
23157 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23158 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23159 if (CLI.CFIType)
23160 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23161 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23162 if (MF.getTarget().Options.EmitCallGraphSection && CB &&
23163 CB->isIndirectCall())
23164 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23165 return Ret;
23166 }
23167
23168 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23169 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23170 if (CLI.CFIType)
23171 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23172
23173 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23174 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23175
23176 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23177 Glue = Chain.getValue(1);
23178
23179 // Mark the end of the call, which is glued to the call itself.
23180 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23181 Glue = Chain.getValue(1);
23182
23183 // Assign locations to each value returned by this call.
23185 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23186 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23187
23188 // Copy all of the result registers out of their specified physreg.
23189 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23190 auto &VA = RVLocs[i];
23191 // Copy the value out
23192 SDValue RetValue =
23193 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23194 // Glue the RetValue to the end of the call sequence
23195 Chain = RetValue.getValue(1);
23196 Glue = RetValue.getValue(2);
23197
23198 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23199 assert(VA.needsCustom());
23200 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23201 MVT::i32, Glue);
23202 Chain = RetValue2.getValue(1);
23203 Glue = RetValue2.getValue(2);
23204 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23205 RetValue2);
23206 } else
23207 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23208
23209 InVals.push_back(RetValue);
23210 }
23211
23212 return Chain;
23213}
23214
23216 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23217 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23218 const Type *RetTy) const {
23220 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23221
23222 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23223 MVT VT = Outs[i].VT;
23224 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23225 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23226 /*IsRet=*/true, Outs[i].OrigTy))
23227 return false;
23228 }
23229 return true;
23230}
23231
23232SDValue
23234 bool IsVarArg,
23236 const SmallVectorImpl<SDValue> &OutVals,
23237 const SDLoc &DL, SelectionDAG &DAG) const {
23239 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23240
23241 // Stores the assignment of the return value to a location.
23243
23244 // Info about the registers and stack slot.
23245 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23246 *DAG.getContext());
23247
23248 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23249 nullptr, CC_RISCV);
23250
23251 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23252 reportFatalUsageError("GHC functions return void only");
23253
23254 SDValue Glue;
23255 SmallVector<SDValue, 4> RetOps(1, Chain);
23256
23257 // Copy the result values into the output registers.
23258 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23259 SDValue Val = OutVals[OutIdx];
23260 CCValAssign &VA = RVLocs[i];
23261 assert(VA.isRegLoc() && "Can only return in registers!");
23262
23263 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23264 // Handle returning f64 on RV32D with a soft float ABI.
23265 assert(VA.isRegLoc() && "Expected return via registers");
23266 assert(VA.needsCustom());
23267 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23268 DAG.getVTList(MVT::i32, MVT::i32), Val);
23269 SDValue Lo = SplitF64.getValue(0);
23270 SDValue Hi = SplitF64.getValue(1);
23271 Register RegLo = VA.getLocReg();
23272 Register RegHi = RVLocs[++i].getLocReg();
23273
23274 if (STI.isRegisterReservedByUser(RegLo) ||
23275 STI.isRegisterReservedByUser(RegHi))
23277 MF.getFunction(),
23278 "Return value register required, but has been reserved."});
23279
23280 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23281 Glue = Chain.getValue(1);
23282 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23283 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23284 Glue = Chain.getValue(1);
23285 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23286 } else {
23287 // Handle a 'normal' return.
23288 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23289 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23290
23291 if (STI.isRegisterReservedByUser(VA.getLocReg()))
23293 MF.getFunction(),
23294 "Return value register required, but has been reserved."});
23295
23296 // Guarantee that all emitted copies are stuck together.
23297 Glue = Chain.getValue(1);
23298 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23299 }
23300 }
23301
23302 RetOps[0] = Chain; // Update chain.
23303
23304 // Add the glue node if we have it.
23305 if (Glue.getNode()) {
23306 RetOps.push_back(Glue);
23307 }
23308
23309 if (any_of(RVLocs,
23310 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23311 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
23312
23313 unsigned RetOpc = RISCVISD::RET_GLUE;
23314 // Interrupt service routines use different return instructions.
23315 const Function &Func = DAG.getMachineFunction().getFunction();
23316 if (Func.hasFnAttribute("interrupt")) {
23317 if (!Func.getReturnType()->isVoidTy())
23319 "Functions with the interrupt attribute must have void return type!");
23320
23322 StringRef Kind =
23323 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23324
23325 if (Kind == "supervisor")
23326 RetOpc = RISCVISD::SRET_GLUE;
23327 else if (Kind == "rnmi") {
23328 assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23329 "Need Smrnmi extension for rnmi");
23330 RetOpc = RISCVISD::MNRET_GLUE;
23331 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23332 assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
23333 "Need Xqciint for qci-(no)nest");
23334 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23335 } else
23336 RetOpc = RISCVISD::MRET_GLUE;
23337 }
23338
23339 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23340}
23341
23342void RISCVTargetLowering::validateCCReservedRegs(
23343 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23344 MachineFunction &MF) const {
23345 const Function &F = MF.getFunction();
23346 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23347
23348 if (llvm::any_of(Regs, [&STI](auto Reg) {
23349 return STI.isRegisterReservedByUser(Reg.first);
23350 }))
23351 F.getContext().diagnose(DiagnosticInfoUnsupported{
23352 F, "Argument register required, but has been reserved."});
23353}
23354
23355// Check if the result of the node is only used as a return value, as
23356// otherwise we can't perform a tail-call.
23358 if (N->getNumValues() != 1)
23359 return false;
23360 if (!N->hasNUsesOfValue(1, 0))
23361 return false;
23362
23363 SDNode *Copy = *N->user_begin();
23364
23365 if (Copy->getOpcode() == ISD::BITCAST) {
23366 return isUsedByReturnOnly(Copy, Chain);
23367 }
23368
23369 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23370 // with soft float ABIs.
23371 if (Copy->getOpcode() != ISD::CopyToReg) {
23372 return false;
23373 }
23374
23375 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23376 // isn't safe to perform a tail call.
23377 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23378 return false;
23379
23380 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23381 bool HasRet = false;
23382 for (SDNode *Node : Copy->users()) {
23383 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23384 return false;
23385 HasRet = true;
23386 }
23387 if (!HasRet)
23388 return false;
23389
23390 Chain = Copy->getOperand(0);
23391 return true;
23392}
23393
23395 return CI->isTailCall();
23396}
23397
23398/// getConstraintType - Given a constraint letter, return the type of
23399/// constraint it is for this target.
23402 if (Constraint.size() == 1) {
23403 switch (Constraint[0]) {
23404 default:
23405 break;
23406 case 'f':
23407 case 'R':
23408 return C_RegisterClass;
23409 case 'I':
23410 case 'J':
23411 case 'K':
23412 return C_Immediate;
23413 case 'A':
23414 return C_Memory;
23415 case 's':
23416 case 'S': // A symbolic address
23417 return C_Other;
23418 }
23419 } else {
23420 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23421 return C_RegisterClass;
23422 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23423 return C_RegisterClass;
23424 }
23425 return TargetLowering::getConstraintType(Constraint);
23426}
23427
23428std::pair<unsigned, const TargetRegisterClass *>
23430 StringRef Constraint,
23431 MVT VT) const {
23432 // First, see if this is a constraint that directly corresponds to a RISC-V
23433 // register class.
23434 if (Constraint.size() == 1) {
23435 switch (Constraint[0]) {
23436 case 'r':
23437 // TODO: Support fixed vectors up to XLen for P extension?
23438 if (VT.isVector())
23439 break;
23440 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23441 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23442 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23443 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23444 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23445 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23446 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23447 case 'f':
23448 if (VT == MVT::f16) {
23449 if (Subtarget.hasStdExtZfhmin())
23450 return std::make_pair(0U, &RISCV::FPR16RegClass);
23451 if (Subtarget.hasStdExtZhinxmin())
23452 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23453 } else if (VT == MVT::f32) {
23454 if (Subtarget.hasStdExtF())
23455 return std::make_pair(0U, &RISCV::FPR32RegClass);
23456 if (Subtarget.hasStdExtZfinx())
23457 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23458 } else if (VT == MVT::f64) {
23459 if (Subtarget.hasStdExtD())
23460 return std::make_pair(0U, &RISCV::FPR64RegClass);
23461 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23462 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23463 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23464 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23465 }
23466 break;
23467 case 'R':
23468 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23469 (VT == MVT::i128 && Subtarget.is64Bit()))
23470 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23471 break;
23472 default:
23473 break;
23474 }
23475 } else if (Constraint == "vr") {
23476 for (const auto *RC :
23477 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23478 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23479 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23480 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23481 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23482 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23483 &RISCV::VRN2M4RegClass}) {
23484 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23485 return std::make_pair(0U, RC);
23486
23487 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23488 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23489 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23490 return std::make_pair(0U, RC);
23491 }
23492 }
23493 } else if (Constraint == "vd") {
23494 for (const auto *RC :
23495 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23496 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23497 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23498 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23499 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23500 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23501 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23502 &RISCV::VRN2M4NoV0RegClass}) {
23503 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23504 return std::make_pair(0U, RC);
23505
23506 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23507 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23508 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23509 return std::make_pair(0U, RC);
23510 }
23511 }
23512 } else if (Constraint == "vm") {
23513 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23514 return std::make_pair(0U, &RISCV::VMV0RegClass);
23515
23516 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23517 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23518 // VT here might be coerced to vector with i8 elements, so we need to
23519 // check if this is a M1 register here instead of checking VMV0RegClass.
23520 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23521 return std::make_pair(0U, &RISCV::VMV0RegClass);
23522 }
23523 } else if (Constraint == "cr") {
23524 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23525 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23526 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23527 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23528 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23529 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23530 if (!VT.isVector())
23531 return std::make_pair(0U, &RISCV::GPRCRegClass);
23532 } else if (Constraint == "cR") {
23533 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23534 (VT == MVT::i128 && Subtarget.is64Bit()))
23535 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23536 } else if (Constraint == "cf") {
23537 if (VT == MVT::f16) {
23538 if (Subtarget.hasStdExtZfhmin())
23539 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23540 if (Subtarget.hasStdExtZhinxmin())
23541 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23542 } else if (VT == MVT::f32) {
23543 if (Subtarget.hasStdExtF())
23544 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23545 if (Subtarget.hasStdExtZfinx())
23546 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23547 } else if (VT == MVT::f64) {
23548 if (Subtarget.hasStdExtD())
23549 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23550 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23551 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23552 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23553 return std::make_pair(0U, &RISCV::GPRCRegClass);
23554 }
23555 }
23556
23557 // Clang will correctly decode the usage of register name aliases into their
23558 // official names. However, other frontends like `rustc` do not. This allows
23559 // users of these frontends to use the ABI names for registers in LLVM-style
23560 // register constraints.
23561 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23562 .Case("{zero}", RISCV::X0)
23563 .Case("{ra}", RISCV::X1)
23564 .Case("{sp}", RISCV::X2)
23565 .Case("{gp}", RISCV::X3)
23566 .Case("{tp}", RISCV::X4)
23567 .Case("{t0}", RISCV::X5)
23568 .Case("{t1}", RISCV::X6)
23569 .Case("{t2}", RISCV::X7)
23570 .Cases("{s0}", "{fp}", RISCV::X8)
23571 .Case("{s1}", RISCV::X9)
23572 .Case("{a0}", RISCV::X10)
23573 .Case("{a1}", RISCV::X11)
23574 .Case("{a2}", RISCV::X12)
23575 .Case("{a3}", RISCV::X13)
23576 .Case("{a4}", RISCV::X14)
23577 .Case("{a5}", RISCV::X15)
23578 .Case("{a6}", RISCV::X16)
23579 .Case("{a7}", RISCV::X17)
23580 .Case("{s2}", RISCV::X18)
23581 .Case("{s3}", RISCV::X19)
23582 .Case("{s4}", RISCV::X20)
23583 .Case("{s5}", RISCV::X21)
23584 .Case("{s6}", RISCV::X22)
23585 .Case("{s7}", RISCV::X23)
23586 .Case("{s8}", RISCV::X24)
23587 .Case("{s9}", RISCV::X25)
23588 .Case("{s10}", RISCV::X26)
23589 .Case("{s11}", RISCV::X27)
23590 .Case("{t3}", RISCV::X28)
23591 .Case("{t4}", RISCV::X29)
23592 .Case("{t5}", RISCV::X30)
23593 .Case("{t6}", RISCV::X31)
23594 .Default(RISCV::NoRegister);
23595 if (XRegFromAlias != RISCV::NoRegister)
23596 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23597
23598 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23599 // TableGen record rather than the AsmName to choose registers for InlineAsm
23600 // constraints, plus we want to match those names to the widest floating point
23601 // register type available, manually select floating point registers here.
23602 //
23603 // The second case is the ABI name of the register, so that frontends can also
23604 // use the ABI names in register constraint lists.
23605 if (Subtarget.hasStdExtF()) {
23606 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23607 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23608 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23609 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23610 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23611 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23612 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23613 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23614 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23615 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23616 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23617 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23618 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23619 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23620 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23621 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23622 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23623 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23624 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23625 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23626 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23627 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23628 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23629 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23630 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23631 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23632 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23633 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23634 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23635 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23636 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23637 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23638 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23639 .Default(RISCV::NoRegister);
23640 if (FReg != RISCV::NoRegister) {
23641 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23642 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23643 unsigned RegNo = FReg - RISCV::F0_F;
23644 unsigned DReg = RISCV::F0_D + RegNo;
23645 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23646 }
23647 if (VT == MVT::f32 || VT == MVT::Other)
23648 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23649 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23650 unsigned RegNo = FReg - RISCV::F0_F;
23651 unsigned HReg = RISCV::F0_H + RegNo;
23652 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23653 }
23654 }
23655 }
23656
23657 if (Subtarget.hasVInstructions()) {
23658 Register VReg = StringSwitch<Register>(Constraint.lower())
23659 .Case("{v0}", RISCV::V0)
23660 .Case("{v1}", RISCV::V1)
23661 .Case("{v2}", RISCV::V2)
23662 .Case("{v3}", RISCV::V3)
23663 .Case("{v4}", RISCV::V4)
23664 .Case("{v5}", RISCV::V5)
23665 .Case("{v6}", RISCV::V6)
23666 .Case("{v7}", RISCV::V7)
23667 .Case("{v8}", RISCV::V8)
23668 .Case("{v9}", RISCV::V9)
23669 .Case("{v10}", RISCV::V10)
23670 .Case("{v11}", RISCV::V11)
23671 .Case("{v12}", RISCV::V12)
23672 .Case("{v13}", RISCV::V13)
23673 .Case("{v14}", RISCV::V14)
23674 .Case("{v15}", RISCV::V15)
23675 .Case("{v16}", RISCV::V16)
23676 .Case("{v17}", RISCV::V17)
23677 .Case("{v18}", RISCV::V18)
23678 .Case("{v19}", RISCV::V19)
23679 .Case("{v20}", RISCV::V20)
23680 .Case("{v21}", RISCV::V21)
23681 .Case("{v22}", RISCV::V22)
23682 .Case("{v23}", RISCV::V23)
23683 .Case("{v24}", RISCV::V24)
23684 .Case("{v25}", RISCV::V25)
23685 .Case("{v26}", RISCV::V26)
23686 .Case("{v27}", RISCV::V27)
23687 .Case("{v28}", RISCV::V28)
23688 .Case("{v29}", RISCV::V29)
23689 .Case("{v30}", RISCV::V30)
23690 .Case("{v31}", RISCV::V31)
23691 .Default(RISCV::NoRegister);
23692 if (VReg != RISCV::NoRegister) {
23693 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23694 return std::make_pair(VReg, &RISCV::VMRegClass);
23695 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23696 return std::make_pair(VReg, &RISCV::VRRegClass);
23697 for (const auto *RC :
23698 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23699 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23700 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23701 return std::make_pair(VReg, RC);
23702 }
23703 }
23704 }
23705 }
23706
23707 std::pair<Register, const TargetRegisterClass *> Res =
23709
23710 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23711 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23712 // Subtarget into account.
23713 if (Res.second == &RISCV::GPRF16RegClass ||
23714 Res.second == &RISCV::GPRF32RegClass ||
23715 Res.second == &RISCV::GPRPairRegClass)
23716 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23717
23718 return Res;
23719}
23720
23723 // Currently only support length 1 constraints.
23724 if (ConstraintCode.size() == 1) {
23725 switch (ConstraintCode[0]) {
23726 case 'A':
23728 default:
23729 break;
23730 }
23731 }
23732
23733 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23734}
23735
23737 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23738 SelectionDAG &DAG) const {
23739 // Currently only support length 1 constraints.
23740 if (Constraint.size() == 1) {
23741 switch (Constraint[0]) {
23742 case 'I':
23743 // Validate & create a 12-bit signed immediate operand.
23744 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23745 uint64_t CVal = C->getSExtValue();
23746 if (isInt<12>(CVal))
23747 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23748 Subtarget.getXLenVT()));
23749 }
23750 return;
23751 case 'J':
23752 // Validate & create an integer zero operand.
23753 if (isNullConstant(Op))
23754 Ops.push_back(
23755 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23756 return;
23757 case 'K':
23758 // Validate & create a 5-bit unsigned immediate operand.
23759 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23760 uint64_t CVal = C->getZExtValue();
23761 if (isUInt<5>(CVal))
23762 Ops.push_back(
23763 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
23764 }
23765 return;
23766 case 'S':
23768 return;
23769 default:
23770 break;
23771 }
23772 }
23773 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
23774}
23775
23777 Instruction *Inst,
23778 AtomicOrdering Ord) const {
23779 if (Subtarget.hasStdExtZtso()) {
23780 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23781 return Builder.CreateFence(Ord);
23782 return nullptr;
23783 }
23784
23785 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23786 return Builder.CreateFence(Ord);
23787 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
23788 return Builder.CreateFence(AtomicOrdering::Release);
23789 return nullptr;
23790}
23791
23793 Instruction *Inst,
23794 AtomicOrdering Ord) const {
23795 if (Subtarget.hasStdExtZtso()) {
23796 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23797 return Builder.CreateFence(Ord);
23798 return nullptr;
23799 }
23800
23801 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
23802 return Builder.CreateFence(AtomicOrdering::Acquire);
23803 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
23806 return nullptr;
23807}
23808
23811 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
23812 // point operations can't be used in an lr/sc sequence without breaking the
23813 // forward-progress guarantee.
23814 if (AI->isFloatingPointOperation() ||
23820
23821 // Don't expand forced atomics, we want to have __sync libcalls instead.
23822 if (Subtarget.hasForcedAtomics())
23824
23825 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
23826 if (AI->getOperation() == AtomicRMWInst::Nand) {
23827 if (Subtarget.hasStdExtZacas() &&
23828 (Size >= 32 || Subtarget.hasStdExtZabha()))
23830 if (Size < 32)
23832 }
23833
23834 if (Size < 32 && !Subtarget.hasStdExtZabha())
23836
23838}
23839
23840static Intrinsic::ID
23842 switch (BinOp) {
23843 default:
23844 llvm_unreachable("Unexpected AtomicRMW BinOp");
23846 return Intrinsic::riscv_masked_atomicrmw_xchg;
23847 case AtomicRMWInst::Add:
23848 return Intrinsic::riscv_masked_atomicrmw_add;
23849 case AtomicRMWInst::Sub:
23850 return Intrinsic::riscv_masked_atomicrmw_sub;
23852 return Intrinsic::riscv_masked_atomicrmw_nand;
23853 case AtomicRMWInst::Max:
23854 return Intrinsic::riscv_masked_atomicrmw_max;
23855 case AtomicRMWInst::Min:
23856 return Intrinsic::riscv_masked_atomicrmw_min;
23858 return Intrinsic::riscv_masked_atomicrmw_umax;
23860 return Intrinsic::riscv_masked_atomicrmw_umin;
23861 }
23862}
23863
23865 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
23866 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
23867 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
23868 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
23869 // mask, as this produces better code than the LR/SC loop emitted by
23870 // int_riscv_masked_atomicrmw_xchg.
23871 if (AI->getOperation() == AtomicRMWInst::Xchg &&
23872 isa<ConstantInt>(AI->getValOperand())) {
23873 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
23874 if (CVal->isZero())
23875 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
23876 Builder.CreateNot(Mask, "Inv_Mask"),
23877 AI->getAlign(), Ord);
23878 if (CVal->isMinusOne())
23879 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
23880 AI->getAlign(), Ord);
23881 }
23882
23883 unsigned XLen = Subtarget.getXLen();
23884 Value *Ordering =
23885 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
23886 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
23888 AI->getModule(),
23890
23891 if (XLen == 64) {
23892 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
23893 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
23894 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
23895 }
23896
23897 Value *Result;
23898
23899 // Must pass the shift amount needed to sign extend the loaded value prior
23900 // to performing a signed comparison for min/max. ShiftAmt is the number of
23901 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
23902 // is the number of bits to left+right shift the value in order to
23903 // sign-extend.
23904 if (AI->getOperation() == AtomicRMWInst::Min ||
23906 const DataLayout &DL = AI->getDataLayout();
23907 unsigned ValWidth =
23908 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
23909 Value *SextShamt =
23910 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
23911 Result = Builder.CreateCall(LrwOpScwLoop,
23912 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
23913 } else {
23914 Result =
23915 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
23916 }
23917
23918 if (XLen == 64)
23919 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
23920 return Result;
23921}
23922
23925 AtomicCmpXchgInst *CI) const {
23926 // Don't expand forced atomics, we want to have __sync libcalls instead.
23927 if (Subtarget.hasForcedAtomics())
23929
23931 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
23932 (Size == 8 || Size == 16))
23935}
23936
23938 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
23939 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
23940 unsigned XLen = Subtarget.getXLen();
23941 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
23942 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
23943 if (XLen == 64) {
23944 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
23945 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
23946 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
23947 }
23948 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
23949 Value *Result = Builder.CreateIntrinsic(
23950 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
23951 if (XLen == 64)
23952 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
23953 return Result;
23954}
23955
23957 EVT DataVT) const {
23958 // We have indexed loads for all supported EEW types. Indices are always
23959 // zero extended.
23960 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
23961 isTypeLegal(Extend.getValueType()) &&
23962 isTypeLegal(Extend.getOperand(0).getValueType()) &&
23963 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
23964}
23965
23967 EVT VT) const {
23968 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
23969 return false;
23970
23971 switch (FPVT.getSimpleVT().SimpleTy) {
23972 case MVT::f16:
23973 return Subtarget.hasStdExtZfhmin();
23974 case MVT::f32:
23975 return Subtarget.hasStdExtF();
23976 case MVT::f64:
23977 return Subtarget.hasStdExtD();
23978 default:
23979 return false;
23980 }
23981}
23982
23984 // If we are using the small code model, we can reduce size of jump table
23985 // entry to 4 bytes.
23986 if (Subtarget.is64Bit() && !isPositionIndependent() &&
23989 }
23991}
23992
23994 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
23995 unsigned uid, MCContext &Ctx) const {
23996 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
23998 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
23999}
24000
24002 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24003 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24004 // a power of two as well.
24005 // FIXME: This doesn't work for zve32, but that's already broken
24006 // elsewhere for the same reason.
24007 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24008 static_assert(RISCV::RVVBitsPerBlock == 64,
24009 "RVVBitsPerBlock changed, audit needed");
24010 return true;
24011}
24012
24014 SDValue &Offset,
24016 SelectionDAG &DAG) const {
24017 // Target does not support indexed loads.
24018 if (!Subtarget.hasVendorXTHeadMemIdx())
24019 return false;
24020
24021 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24022 return false;
24023
24024 Base = Op->getOperand(0);
24025 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24026 int64_t RHSC = RHS->getSExtValue();
24027 if (Op->getOpcode() == ISD::SUB)
24028 RHSC = -(uint64_t)RHSC;
24029
24030 // The constants that can be encoded in the THeadMemIdx instructions
24031 // are of the form (sign_extend(imm5) << imm2).
24032 bool isLegalIndexedOffset = false;
24033 for (unsigned i = 0; i < 4; i++)
24034 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24035 isLegalIndexedOffset = true;
24036 break;
24037 }
24038
24039 if (!isLegalIndexedOffset)
24040 return false;
24041
24042 Offset = Op->getOperand(1);
24043 return true;
24044 }
24045
24046 return false;
24047}
24048
24050 SDValue &Offset,
24052 SelectionDAG &DAG) const {
24053 EVT VT;
24054 SDValue Ptr;
24055 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24056 VT = LD->getMemoryVT();
24057 Ptr = LD->getBasePtr();
24058 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24059 VT = ST->getMemoryVT();
24060 Ptr = ST->getBasePtr();
24061 } else
24062 return false;
24063
24064 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24065 return false;
24066
24067 AM = ISD::PRE_INC;
24068 return true;
24069}
24070
24072 SDValue &Base,
24073 SDValue &Offset,
24075 SelectionDAG &DAG) const {
24076 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24077 if (Op->getOpcode() != ISD::ADD)
24078 return false;
24079
24080 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
24081 Base = LS->getBasePtr();
24082 else
24083 return false;
24084
24085 if (Base == Op->getOperand(0))
24086 Offset = Op->getOperand(1);
24087 else if (Base == Op->getOperand(1))
24088 Offset = Op->getOperand(0);
24089 else
24090 return false;
24091
24092 AM = ISD::POST_INC;
24093 return true;
24094 }
24095
24096 EVT VT;
24097 SDValue Ptr;
24098 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24099 VT = LD->getMemoryVT();
24100 Ptr = LD->getBasePtr();
24101 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24102 VT = ST->getMemoryVT();
24103 Ptr = ST->getBasePtr();
24104 } else
24105 return false;
24106
24107 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24108 return false;
24109 // Post-indexing updates the base, so it's not a valid transform
24110 // if that's not the same as the load's pointer.
24111 if (Ptr != Base)
24112 return false;
24113
24114 AM = ISD::POST_INC;
24115 return true;
24116}
24117
24119 EVT VT) const {
24120 EVT SVT = VT.getScalarType();
24121
24122 if (!SVT.isSimple())
24123 return false;
24124
24125 switch (SVT.getSimpleVT().SimpleTy) {
24126 case MVT::f16:
24127 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24128 : Subtarget.hasStdExtZfhOrZhinx();
24129 case MVT::f32:
24130 return Subtarget.hasStdExtFOrZfinx();
24131 case MVT::f64:
24132 return Subtarget.hasStdExtDOrZdinx();
24133 default:
24134 break;
24135 }
24136
24137 return false;
24138}
24139
24141 // Zacas will use amocas.w which does not require extension.
24142 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24143}
24144
24146 const Constant *PersonalityFn) const {
24147 return RISCV::X10;
24148}
24149
24151 const Constant *PersonalityFn) const {
24152 return RISCV::X11;
24153}
24154
24156 // Return false to suppress the unnecessary extensions if the LibCall
24157 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24158 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24159 Type.getSizeInBits() < Subtarget.getXLen()))
24160 return false;
24161
24162 return true;
24163}
24164
24166 bool IsSigned) const {
24167 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24168 return true;
24169
24170 return IsSigned;
24171}
24172
24174 SDValue C) const {
24175 // Check integral scalar types.
24176 if (!VT.isScalarInteger())
24177 return false;
24178
24179 // Omit the optimization if the sub target has the M extension and the data
24180 // size exceeds XLen.
24181 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24182 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24183 return false;
24184
24185 auto *ConstNode = cast<ConstantSDNode>(C);
24186 const APInt &Imm = ConstNode->getAPIntValue();
24187
24188 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24189 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24190 return false;
24191
24192 // Break the MUL to a SLLI and an ADD/SUB.
24193 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24194 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24195 return true;
24196
24197 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24198 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
24199 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24200 (Imm - 8).isPowerOf2()))
24201 return true;
24202
24203 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24204 // a pair of LUI/ADDI.
24205 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24206 ConstNode->hasOneUse()) {
24207 APInt ImmS = Imm.ashr(Imm.countr_zero());
24208 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24209 (1 - ImmS).isPowerOf2())
24210 return true;
24211 }
24212
24213 return false;
24214}
24215
24217 SDValue ConstNode) const {
24218 // Let the DAGCombiner decide for vectors.
24219 EVT VT = AddNode.getValueType();
24220 if (VT.isVector())
24221 return true;
24222
24223 // Let the DAGCombiner decide for larger types.
24224 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24225 return true;
24226
24227 // It is worse if c1 is simm12 while c1*c2 is not.
24228 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24229 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24230 const APInt &C1 = C1Node->getAPIntValue();
24231 const APInt &C2 = C2Node->getAPIntValue();
24232 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24233 return false;
24234
24235 // Default to true and let the DAGCombiner decide.
24236 return true;
24237}
24238
24240 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24241 unsigned *Fast) const {
24242 if (!VT.isVector()) {
24243 if (Fast)
24244 *Fast = Subtarget.enableUnalignedScalarMem();
24245 return Subtarget.enableUnalignedScalarMem();
24246 }
24247
24248 // All vector implementations must support element alignment
24249 EVT ElemVT = VT.getVectorElementType();
24250 if (Alignment >= ElemVT.getStoreSize()) {
24251 if (Fast)
24252 *Fast = 1;
24253 return true;
24254 }
24255
24256 // Note: We lower an unmasked unaligned vector access to an equally sized
24257 // e8 element type access. Given this, we effectively support all unmasked
24258 // misaligned accesses. TODO: Work through the codegen implications of
24259 // allowing such accesses to be formed, and considered fast.
24260 if (Fast)
24261 *Fast = Subtarget.enableUnalignedVectorMem();
24262 return Subtarget.enableUnalignedVectorMem();
24263}
24264
24266 LLVMContext &Context, const MemOp &Op,
24267 const AttributeList &FuncAttributes) const {
24268 if (!Subtarget.hasVInstructions())
24269 return MVT::Other;
24270
24271 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24272 return MVT::Other;
24273
24274 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24275 // has an expansion threshold, and we want the number of hardware memory
24276 // operations to correspond roughly to that threshold. LMUL>1 operations
24277 // are typically expanded linearly internally, and thus correspond to more
24278 // than one actual memory operation. Note that store merging and load
24279 // combining will typically form larger LMUL operations from the LMUL1
24280 // operations emitted here, and that's okay because combining isn't
24281 // introducing new memory operations; it's just merging existing ones.
24282 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24283 const unsigned MinVLenInBytes =
24284 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24285
24286 if (Op.size() < MinVLenInBytes)
24287 // TODO: Figure out short memops. For the moment, do the default thing
24288 // which ends up using scalar sequences.
24289 return MVT::Other;
24290
24291 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24292 // fixed vectors.
24293 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24294 return MVT::Other;
24295
24296 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24297 // a large scalar constant and instead use vmv.v.x/i to do the
24298 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24299 // maximize the chance we can encode the size in the vsetvli.
24300 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24301 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24302
24303 // Do we have sufficient alignment for our preferred VT? If not, revert
24304 // to largest size allowed by our alignment criteria.
24305 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24306 Align RequiredAlign(PreferredVT.getStoreSize());
24307 if (Op.isFixedDstAlign())
24308 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24309 if (Op.isMemcpy())
24310 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24311 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24312 }
24313 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24314}
24315
24317 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24318 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24319 bool IsABIRegCopy = CC.has_value();
24320 EVT ValueVT = Val.getValueType();
24321
24322 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24323 if ((ValueVT == PairVT ||
24324 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24325 ValueVT == MVT::f64)) &&
24326 NumParts == 1 && PartVT == MVT::Untyped) {
24327 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24328 MVT XLenVT = Subtarget.getXLenVT();
24329 if (ValueVT == MVT::f64)
24330 Val = DAG.getBitcast(MVT::i64, Val);
24331 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24332 // Always creating an MVT::Untyped part, so always use
24333 // RISCVISD::BuildGPRPair.
24334 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24335 return true;
24336 }
24337
24338 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24339 PartVT == MVT::f32) {
24340 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24341 // nan, and cast to f32.
24342 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24343 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24344 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24345 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24346 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24347 Parts[0] = Val;
24348 return true;
24349 }
24350
24351 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24352#ifndef NDEBUG
24353 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24354 [[maybe_unused]] unsigned ValLMUL =
24356 ValNF * RISCV::RVVBitsPerBlock);
24357 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24358 [[maybe_unused]] unsigned PartLMUL =
24360 PartNF * RISCV::RVVBitsPerBlock);
24361 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24362 "RISC-V vector tuple type only accepts same register class type "
24363 "TUPLE_INSERT");
24364#endif
24365
24366 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24367 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24368 Parts[0] = Val;
24369 return true;
24370 }
24371
24372 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24373 PartVT.isScalableVector()) {
24374 if (ValueVT.isFixedLengthVector()) {
24375 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24376 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24377 }
24378 LLVMContext &Context = *DAG.getContext();
24379 EVT ValueEltVT = ValueVT.getVectorElementType();
24380 EVT PartEltVT = PartVT.getVectorElementType();
24381 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24382 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24383 if (PartVTBitSize % ValueVTBitSize == 0) {
24384 assert(PartVTBitSize >= ValueVTBitSize);
24385 // If the element types are different, bitcast to the same element type of
24386 // PartVT first.
24387 // Give an example here, we want copy a <vscale x 1 x i8> value to
24388 // <vscale x 4 x i16>.
24389 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24390 // subvector, then we can bitcast to <vscale x 4 x i16>.
24391 if (ValueEltVT != PartEltVT) {
24392 if (PartVTBitSize > ValueVTBitSize) {
24393 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24394 assert(Count != 0 && "The number of element should not be zero.");
24395 EVT SameEltTypeVT =
24396 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24397 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24398 }
24399 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24400 } else {
24401 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24402 }
24403 Parts[0] = Val;
24404 return true;
24405 }
24406 }
24407
24408 return false;
24409}
24410
24412 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24413 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24414 bool IsABIRegCopy = CC.has_value();
24415
24416 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24417 if ((ValueVT == PairVT ||
24418 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24419 ValueVT == MVT::f64)) &&
24420 NumParts == 1 && PartVT == MVT::Untyped) {
24421 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24422 MVT XLenVT = Subtarget.getXLenVT();
24423
24424 SDValue Val = Parts[0];
24425 // Always starting with an MVT::Untyped part, so always use
24426 // RISCVISD::SplitGPRPair
24427 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24428 Val);
24429 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24430 Val.getValue(1));
24431 if (ValueVT == MVT::f64)
24432 Val = DAG.getBitcast(ValueVT, Val);
24433 return Val;
24434 }
24435
24436 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24437 PartVT == MVT::f32) {
24438 SDValue Val = Parts[0];
24439
24440 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24441 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24442 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24443 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24444 return Val;
24445 }
24446
24447 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24448 PartVT.isScalableVector()) {
24449 LLVMContext &Context = *DAG.getContext();
24450 SDValue Val = Parts[0];
24451 EVT ValueEltVT = ValueVT.getVectorElementType();
24452 EVT PartEltVT = PartVT.getVectorElementType();
24453 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24454 if (ValueVT.isFixedLengthVector())
24455 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24456 .getSizeInBits()
24458 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24459 if (PartVTBitSize % ValueVTBitSize == 0) {
24460 assert(PartVTBitSize >= ValueVTBitSize);
24461 EVT SameEltTypeVT = ValueVT;
24462 // If the element types are different, convert it to the same element type
24463 // of PartVT.
24464 // Give an example here, we want copy a <vscale x 1 x i8> value from
24465 // <vscale x 4 x i16>.
24466 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24467 // then we can extract <vscale x 1 x i8>.
24468 if (ValueEltVT != PartEltVT) {
24469 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24470 assert(Count != 0 && "The number of element should not be zero.");
24471 SameEltTypeVT =
24472 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24473 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24474 }
24475 if (ValueVT.isFixedLengthVector())
24476 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24477 else
24478 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24479 return Val;
24480 }
24481 }
24482 return SDValue();
24483}
24484
24486 // When aggressively optimizing for code size, we prefer to use a div
24487 // instruction, as it is usually smaller than the alternative sequence.
24488 // TODO: Add vector division?
24489 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24490 return OptSize && !VT.isVector();
24491}
24492
24494 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24495 // some situation.
24496 unsigned Opc = N->getOpcode();
24498 return false;
24499 return true;
24500}
24501
24502static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24503 Module *M = IRB.GetInsertBlock()->getModule();
24504 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24505 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24506 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24507 IRB.CreateCall(ThreadPointerFunc), Offset);
24508}
24509
24511 // Fuchsia provides a fixed TLS slot for the stack cookie.
24512 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24513 if (Subtarget.isTargetFuchsia())
24514 return useTpOffset(IRB, -0x10);
24515
24516 // Android provides a fixed TLS slot for the stack cookie. See the definition
24517 // of TLS_SLOT_STACK_GUARD in
24518 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24519 if (Subtarget.isTargetAndroid())
24520 return useTpOffset(IRB, -0x18);
24521
24522 Module *M = IRB.GetInsertBlock()->getModule();
24523
24524 if (M->getStackProtectorGuard() == "tls") {
24525 // Users must specify the offset explicitly
24526 int Offset = M->getStackProtectorGuardOffset();
24527 return useTpOffset(IRB, Offset);
24528 }
24529
24531}
24532
24534 Align Alignment) const {
24535 if (!Subtarget.hasVInstructions())
24536 return false;
24537
24538 // Only support fixed vectors if we know the minimum vector size.
24539 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24540 return false;
24541
24542 EVT ScalarType = DataType.getScalarType();
24543 if (!isLegalElementTypeForRVV(ScalarType))
24544 return false;
24545
24546 if (!Subtarget.enableUnalignedVectorMem() &&
24547 Alignment < ScalarType.getStoreSize())
24548 return false;
24549
24550 return true;
24551}
24552
24556 const TargetInstrInfo *TII) const {
24557 assert(MBBI->isCall() && MBBI->getCFIType() &&
24558 "Invalid call instruction for a KCFI check");
24559 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24560 MBBI->getOpcode()));
24561
24562 MachineOperand &Target = MBBI->getOperand(0);
24563 Target.setIsRenamable(false);
24564
24565 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24566 .addReg(Target.getReg())
24567 .addImm(MBBI->getCFIType())
24568 .getInstr();
24569}
24570
24571#define GET_REGISTER_MATCHER
24572#include "RISCVGenAsmMatcher.inc"
24573
24576 const MachineFunction &MF) const {
24578 if (!Reg)
24580 if (!Reg)
24581 return Reg;
24582
24583 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24584 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24585 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24586 StringRef(RegName) + "\"."));
24587 return Reg;
24588}
24589
24592 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24593
24594 if (NontemporalInfo == nullptr)
24596
24597 // 1 for default value work as __RISCV_NTLH_ALL
24598 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24599 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24600 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24601 // 5 -> __RISCV_NTLH_ALL
24602 int NontemporalLevel = 5;
24603 const MDNode *RISCVNontemporalInfo =
24604 I.getMetadata("riscv-nontemporal-domain");
24605 if (RISCVNontemporalInfo != nullptr)
24606 NontemporalLevel =
24607 cast<ConstantInt>(
24608 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24609 ->getValue())
24610 ->getZExtValue();
24611
24612 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24613 "RISC-V target doesn't support this non-temporal domain.");
24614
24615 NontemporalLevel -= 2;
24617 if (NontemporalLevel & 0b1)
24618 Flags |= MONontemporalBit0;
24619 if (NontemporalLevel & 0b10)
24620 Flags |= MONontemporalBit1;
24621
24622 return Flags;
24623}
24624
24627
24628 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24630 TargetFlags |= (NodeFlags & MONontemporalBit0);
24631 TargetFlags |= (NodeFlags & MONontemporalBit1);
24632 return TargetFlags;
24633}
24634
24636 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24637 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24638}
24639
24641 if (VT.isScalableVector())
24642 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
24643 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
24644 return true;
24645 return Subtarget.hasStdExtZbb() &&
24646 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
24647}
24648
24650 ISD::CondCode Cond) const {
24651 return isCtpopFast(VT) ? 0 : 1;
24652}
24653
24655 const Instruction *I) const {
24656 if (Subtarget.hasStdExtZalasr()) {
24657 if (Subtarget.hasStdExtZtso()) {
24658 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24659 // should be lowered to plain load/store. The easiest way to do this is
24660 // to say we should insert fences for them, and the fence insertion code
24661 // will just not insert any fences
24662 auto *LI = dyn_cast<LoadInst>(I);
24663 auto *SI = dyn_cast<StoreInst>(I);
24664 if ((LI &&
24665 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24666 (SI &&
24667 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24668 // Here, this is a load or store which is seq_cst, and needs a .aq or
24669 // .rl therefore we shouldn't try to insert fences
24670 return false;
24671 }
24672 // Here, we are a TSO inst that isn't a seq_cst load/store
24673 return isa<LoadInst>(I) || isa<StoreInst>(I);
24674 }
24675 return false;
24676 }
24677 // Note that one specific case requires fence insertion for an
24678 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24679 // than this hook due to limitations in the interface here.
24680 return isa<LoadInst>(I) || isa<StoreInst>(I);
24681}
24682
24684
24685 // GISel support is in progress or complete for these opcodes.
24686 unsigned Op = Inst.getOpcode();
24687 if (Op == Instruction::Add || Op == Instruction::Sub ||
24688 Op == Instruction::And || Op == Instruction::Or ||
24689 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24690 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24691 Op == Instruction::Freeze || Op == Instruction::Store)
24692 return false;
24693
24694 if (Inst.getType()->isScalableTy())
24695 return true;
24696
24697 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24698 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24699 !isa<ReturnInst>(&Inst))
24700 return true;
24701
24702 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24703 if (AI->getAllocatedType()->isScalableTy())
24704 return true;
24705 }
24706
24707 return false;
24708}
24709
24710SDValue
24711RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24712 SelectionDAG &DAG,
24713 SmallVectorImpl<SDNode *> &Created) const {
24715 if (isIntDivCheap(N->getValueType(0), Attr))
24716 return SDValue(N, 0); // Lower SDIV as SDIV
24717
24718 // Only perform this transform if short forward branch opt is supported.
24719 if (!Subtarget.hasShortForwardBranchOpt())
24720 return SDValue();
24721 EVT VT = N->getValueType(0);
24722 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24723 return SDValue();
24724
24725 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24726 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24727 return SDValue();
24728 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24729}
24730
24731bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24732 EVT VT, const APInt &AndMask) const {
24733 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
24734 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
24736}
24737
24738unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
24739 return Subtarget.getMinimumJumpTableEntries();
24740}
24741
24744 int JTI,
24745 SelectionDAG &DAG) const {
24746 if (Subtarget.hasStdExtZicfilp()) {
24747 // When Zicfilp enabled, we need to use software guarded branch for jump
24748 // table branch.
24749 SDValue Chain = Value;
24750 // Jump table debug info is only needed if CodeView is enabled.
24752 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
24753 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
24754 }
24755 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24756}
24757
24758// If an output pattern produces multiple instructions tablegen may pick an
24759// arbitrary type from an instructions destination register class to use for the
24760// VT of that MachineSDNode. This VT may be used to look up the representative
24761// register class. If the type isn't legal, the default implementation will
24762// not find a register class.
24763//
24764// Some integer types smaller than XLen are listed in the GPR register class to
24765// support isel patterns for GISel, but are not legal in SelectionDAG. The
24766// arbitrary type tablegen picks may be one of these smaller types.
24767//
24768// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24769// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24770std::pair<const TargetRegisterClass *, uint8_t>
24771RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24772 MVT VT) const {
24773 switch (VT.SimpleTy) {
24774 default:
24775 break;
24776 case MVT::i8:
24777 case MVT::i16:
24778 case MVT::i32:
24780 case MVT::bf16:
24781 case MVT::f16:
24783 }
24784
24786}
24787
24789
24790#define GET_RISCVVIntrinsicsTable_IMPL
24791#include "RISCVGenSearchableTables.inc"
24792
24793} // namespace llvm::RISCVVIntrinsicsTable
24794
24796
24797 // If the function specifically requests inline stack probes, emit them.
24798 if (MF.getFunction().hasFnAttribute("probe-stack"))
24799 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
24800 "inline-asm";
24801
24802 return false;
24803}
24804
24806 Align StackAlign) const {
24807 // The default stack probe size is 4096 if the function has no
24808 // stack-probe-size attribute.
24809 const Function &Fn = MF.getFunction();
24810 unsigned StackProbeSize =
24811 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
24812 // Round down to the stack alignment.
24813 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
24814 return StackProbeSize ? StackProbeSize : StackAlign.value();
24815}
24816
24817SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
24818 SelectionDAG &DAG) const {
24820 if (!hasInlineStackProbe(MF))
24821 return SDValue();
24822
24823 MVT XLenVT = Subtarget.getXLenVT();
24824 // Get the inputs.
24825 SDValue Chain = Op.getOperand(0);
24826 SDValue Size = Op.getOperand(1);
24827
24829 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
24830 SDLoc dl(Op);
24831 EVT VT = Op.getValueType();
24832
24833 // Construct the new SP value in a GPR.
24834 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
24835 Chain = SP.getValue(1);
24836 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
24837 if (Align)
24838 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
24839 DAG.getSignedConstant(-Align->value(), dl, VT));
24840
24841 // Set the real SP to the new value with a probing loop.
24842 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
24843 return DAG.getMergeValues({SP, Chain}, dl);
24844}
24845
24848 MachineBasicBlock *MBB) const {
24849 MachineFunction &MF = *MBB->getParent();
24850 MachineBasicBlock::iterator MBBI = MI.getIterator();
24852 Register TargetReg = MI.getOperand(0).getReg();
24853
24854 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
24855 bool IsRV64 = Subtarget.is64Bit();
24856 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
24857 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
24858 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
24859
24860 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
24861 MachineBasicBlock *LoopTestMBB =
24863 MF.insert(MBBInsertPoint, LoopTestMBB);
24865 MF.insert(MBBInsertPoint, ExitMBB);
24866 Register SPReg = RISCV::X2;
24867 Register ScratchReg =
24868 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
24869
24870 // ScratchReg = ProbeSize
24871 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
24872
24873 // LoopTest:
24874 // SUB SP, SP, ProbeSize
24875 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
24876 .addReg(SPReg)
24877 .addReg(ScratchReg);
24878
24879 // s[d|w] zero, 0(sp)
24880 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
24881 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
24882 .addReg(RISCV::X0)
24883 .addReg(SPReg)
24884 .addImm(0);
24885
24886 // BLT TargetReg, SP, LoopTest
24887 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
24888 .addReg(TargetReg)
24889 .addReg(SPReg)
24890 .addMBB(LoopTestMBB);
24891
24892 // Adjust with: MV SP, TargetReg.
24893 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
24894 .addReg(TargetReg)
24895 .addImm(0);
24896
24897 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
24899
24900 LoopTestMBB->addSuccessor(ExitMBB);
24901 LoopTestMBB->addSuccessor(LoopTestMBB);
24902 MBB->addSuccessor(LoopTestMBB);
24903
24904 MI.eraseFromParent();
24905 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
24906 return ExitMBB->begin()->getParent();
24907}
24908
24910 if (Subtarget.hasStdExtFOrZfinx()) {
24911 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
24912 return RCRegs;
24913 }
24914 return {};
24915}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition: CostModel.cpp:74
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition: SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1109
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:397
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
an instruction to allocate memory on the stack
Definition: Instructions.h:64
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:843
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
bool isFloatingPointOperation() const
Definition: Instructions.h:898
BinOp getOperation() const
Definition: Instructions.h:819
Value * getValOperand()
Definition: Instructions.h:890
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:863
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:248
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition: DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
A debug info location.
Definition: DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
unsigned size() const
Definition: DenseMap.h:120
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:221
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:315
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
Tagged union holding either a T or a Error.
Definition: Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:39
bool isDSOLocal() const
Definition: GlobalValue.h:307
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:531
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1936
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:575
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1891
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:247
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:317
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:416
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition: Module.cpp:352
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
RISCVVRGatherCostModelEnum getVRGatherCostModel() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
const RISCVFrameLowering * getFrameLowering() const override
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
Definition: SelectionDAG.h:941
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
Definition: SelectionDAG.h:963
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
Definition: SelectionDAG.h:956
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:398
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:506
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:902
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
Definition: SelectionDAG.h:949
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:918
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void reserve(size_type N)
Definition: SmallVector.h:664
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:806
void push_back(const T &Elt)
Definition: SmallVector.h:414
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:287
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
LLVM_ABI std::string lower() const
Definition: StringRef.cpp:112
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:87
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:771
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:184
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:233
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:259
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr bool isZero() const
Definition: TypeSize.h:157
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
self_iterator getIterator()
Definition: ilist_node.h:134
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1108
@ PARTIAL_REDUCE_SMLA
Definition: ISDOpcodes.h:1510
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1458
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1401
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1379
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1381
@ STRICT_FCEIL
Definition: ISDOpcodes.h:454
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1382
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1112
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1131
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1476
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1480
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1338
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1343
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1135
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1490
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1377
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1378
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1309
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:1018
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:957
@ PARTIAL_REDUCE_UMLA
Definition: ISDOpcodes.h:1511
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1298
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1473
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1477
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ STRICT_LROUND
Definition: ISDOpcodes.h:459
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1380
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition: ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1492
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:343
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:458
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:952
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:988
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1126
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:987
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1103
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1375
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1321
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:928
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1448
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1383
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:379
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ STRICT_LRINT
Definition: ISDOpcodes.h:461
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:627
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ STRICT_FROUND
Definition: ISDOpcodes.h:456
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:477
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1413
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1493
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:455
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:994
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1373
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1374
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1292
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ STRICT_LLRINT
Definition: ISDOpcodes.h:462
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:648
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1372
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1025
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:690
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:434
@ STRICT_LLROUND
Definition: ISDOpcodes.h:460
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:927
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1481
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ PARTIAL_REDUCE_SUMLA
Definition: ISDOpcodes.h:1512
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ STRICT_FRINT
Definition: ISDOpcodes.h:450
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition: ISDOpcodes.h:611
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1086
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1315
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1756
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1647
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1647
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1634
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1730
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:58
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
uint32_t read32le(const void *P)
Definition: Endian.h:429
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:477
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:270
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1587
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition: Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:390
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
#define NC
Definition: regutils.h:42
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1056
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:267
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:74
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:289
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:173
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1016
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:273
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)