LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
55 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
56 MVT VT = N->getSimpleValueType(0);
57 unsigned Opc =
58 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
59 SDLoc DL(N);
60 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
61 SDValue Src = N->getOperand(0);
62 if (VT.isInteger())
63 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
64 N->getOperand(0));
65 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
66 break;
67 }
68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70 // load. Done after lowering and combining so that we have a chance to
71 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
73 MVT VT = N->getSimpleValueType(0);
74 SDValue Passthru = N->getOperand(0);
75 SDValue Lo = N->getOperand(1);
76 SDValue Hi = N->getOperand(2);
77 SDValue VL = N->getOperand(3);
78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
80 "Unexpected VTs!");
82 SDLoc DL(N);
83
84 // Create temporary stack for each expanding node.
85 SDValue StackSlot =
87 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
89
90 SDValue Chain = CurDAG->getEntryNode();
91 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
92
93 SDValue OffsetSlot =
95 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
96 Align(8));
97
98 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
99
100 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
101 SDValue IntID =
102 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
103 SDValue Ops[] = {Chain,
104 IntID,
105 Passthru,
106 StackSlot,
107 CurDAG->getRegister(RISCV::X0, MVT::i64),
108 VL};
109
111 MVT::i64, MPI, Align(8),
113 break;
114 }
115 case ISD::FP_EXTEND: {
116 // We only have vector patterns for riscv_fpextend_vl in isel.
117 SDLoc DL(N);
118 MVT VT = N->getSimpleValueType(0);
119 if (!VT.isVector())
120 break;
121 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
122 SDValue TrueMask = CurDAG->getNode(
123 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
124 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
125 TrueMask, VLMAX);
126 break;
127 }
128 }
129
130 if (Result) {
131 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
132 LLVM_DEBUG(N->dump(CurDAG));
133 LLVM_DEBUG(dbgs() << "\nNew: ");
134 LLVM_DEBUG(Result->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\n");
136
138 MadeChange = true;
139 }
140 }
141
142 if (MadeChange)
144}
145
147 HandleSDNode Dummy(CurDAG->getRoot());
149
150 bool MadeChange = false;
151 while (Position != CurDAG->allnodes_begin()) {
152 SDNode *N = &*--Position;
153 // Skip dead nodes and any non-machine opcodes.
154 if (N->use_empty() || !N->isMachineOpcode())
155 continue;
156
157 MadeChange |= doPeepholeSExtW(N);
158
159 // FIXME: This is here only because the VMerge transform doesn't
160 // know how to handle masked true inputs. Once that has been moved
161 // to post-ISEL, this can be deleted as well.
162 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
163 }
164
165 CurDAG->setRoot(Dummy.getValue());
166
167 // After we're done with everything else, convert IMPLICIT_DEF
168 // passthru operands to NoRegister. This is required to workaround
169 // an optimization deficiency in MachineCSE. This really should
170 // be merged back into each of the patterns (i.e. there's no good
171 // reason not to go directly to NoReg), but is being done this way
172 // to allow easy backporting.
173 MadeChange |= doPeepholeNoRegPassThru();
174
175 if (MadeChange)
177}
178
179static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
181 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
182 for (const RISCVMatInt::Inst &Inst : Seq) {
183 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
184 SDNode *Result = nullptr;
185 switch (Inst.getOpndKind()) {
186 case RISCVMatInt::Imm:
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
191 CurDAG->getRegister(RISCV::X0, VT));
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
195 break;
197 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
198 break;
199 }
200
201 // Only the first instruction has X0 as its source.
202 SrcReg = SDValue(Result, 0);
203 }
204
205 return SrcReg;
206}
207
208static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
209 int64_t Imm, const RISCVSubtarget &Subtarget) {
211
212 // Use a rematerializable pseudo instruction for short sequences if enabled.
213 if (Seq.size() == 2 && UsePseudoMovImm)
214 return SDValue(
215 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
216 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
217 0);
218
219 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
220 // worst an LUI+ADDIW. This will require an extra register, but avoids a
221 // constant pool.
222 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
223 // low and high 32 bits are the same and bit 31 and 63 are set.
224 if (Seq.size() > 3) {
225 unsigned ShiftAmt, AddOpc;
227 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
228 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
229 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
230
231 SDValue SLLI = SDValue(
232 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
233 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
234 0);
235 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
236 }
237 }
238
239 // Otherwise, use the original sequence.
240 return selectImmSeq(CurDAG, DL, VT, Seq);
241}
242
244 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
245 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
246 bool IsLoad, MVT *IndexVT) {
247 SDValue Chain = Node->getOperand(0);
248
249 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
250
251 if (IsStridedOrIndexed) {
252 Operands.push_back(Node->getOperand(CurOp++)); // Index.
253 if (IndexVT)
254 *IndexVT = Operands.back()->getSimpleValueType(0);
255 }
256
257 if (IsMasked) {
258 SDValue Mask = Node->getOperand(CurOp++);
259 Operands.push_back(Mask);
260 }
261 SDValue VL;
262 selectVLOp(Node->getOperand(CurOp++), VL);
263 Operands.push_back(VL);
264
265 MVT XLenVT = Subtarget->getXLenVT();
266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267 Operands.push_back(SEWOp);
268
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
272 if (IsLoad) {
274 if (IsMasked)
275 Policy = Node->getConstantOperandVal(CurOp++);
276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277 Operands.push_back(PolicyOp);
278 }
279
280 Operands.push_back(Chain); // Chain.
281}
282
283void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
284 bool IsStrided) {
285 SDLoc DL(Node);
286 MVT VT = Node->getSimpleValueType(0);
287 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
289
290 unsigned CurOp = 2;
292
293 Operands.push_back(Node->getOperand(CurOp++));
294
295 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
296 Operands, /*IsLoad=*/true);
297
298 const RISCV::VLSEGPseudo *P =
299 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
300 static_cast<unsigned>(LMUL));
301 MachineSDNode *Load =
302 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
303
304 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
305
306 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
307 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
308 CurDAG->RemoveDeadNode(Node);
309}
310
312 bool IsMasked) {
313 SDLoc DL(Node);
314 MVT VT = Node->getSimpleValueType(0);
315 MVT XLenVT = Subtarget->getXLenVT();
316 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
318
319 unsigned CurOp = 2;
321
322 Operands.push_back(Node->getOperand(CurOp++));
323
324 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
325 /*IsStridedOrIndexed*/ false, Operands,
326 /*IsLoad=*/true);
327
328 const RISCV::VLSEGPseudo *P =
329 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
330 Log2SEW, static_cast<unsigned>(LMUL));
331 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
332 XLenVT, MVT::Other, Operands);
333
334 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
335
336 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
337 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
338 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
339 CurDAG->RemoveDeadNode(Node);
340}
341
342void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
343 bool IsOrdered) {
344 SDLoc DL(Node);
345 MVT VT = Node->getSimpleValueType(0);
346 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
348
349 unsigned CurOp = 2;
351
352 Operands.push_back(Node->getOperand(CurOp++));
353
354 MVT IndexVT;
355 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
356 /*IsStridedOrIndexed*/ true, Operands,
357 /*IsLoad=*/true, &IndexVT);
358
359#ifndef NDEBUG
360 // Number of element = RVVBitsPerBlock * LMUL / SEW
361 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
362 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
363 if (DecodedLMUL.second)
364 ContainedTyNumElts /= DecodedLMUL.first;
365 else
366 ContainedTyNumElts *= DecodedLMUL.first;
367 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
368 "Element count mismatch");
369#endif
370
372 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
373 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
374 report_fatal_error("The V extension does not support EEW=64 for index "
375 "values when XLEN=32");
376 }
377 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
378 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
379 static_cast<unsigned>(IndexLMUL));
380 MachineSDNode *Load =
381 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
382
383 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
384
385 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
386 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
387 CurDAG->RemoveDeadNode(Node);
388}
389
390void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
391 bool IsStrided) {
392 SDLoc DL(Node);
393 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
394 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
396
397 unsigned CurOp = 2;
399
400 Operands.push_back(Node->getOperand(CurOp++));
401
402 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
403 Operands);
404
405 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
406 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
407 MachineSDNode *Store =
408 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
409
410 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
411
412 ReplaceNode(Node, Store);
413}
414
415void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
416 bool IsOrdered) {
417 SDLoc DL(Node);
418 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
419 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
421
422 unsigned CurOp = 2;
424
425 Operands.push_back(Node->getOperand(CurOp++));
426
427 MVT IndexVT;
428 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
429 /*IsStridedOrIndexed*/ true, Operands,
430 /*IsLoad=*/false, &IndexVT);
431
432#ifndef NDEBUG
433 // Number of element = RVVBitsPerBlock * LMUL / SEW
434 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
435 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
436 if (DecodedLMUL.second)
437 ContainedTyNumElts /= DecodedLMUL.first;
438 else
439 ContainedTyNumElts *= DecodedLMUL.first;
440 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
441 "Element count mismatch");
442#endif
443
445 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
446 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
447 report_fatal_error("The V extension does not support EEW=64 for index "
448 "values when XLEN=32");
449 }
450 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
451 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
452 static_cast<unsigned>(IndexLMUL));
453 MachineSDNode *Store =
454 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
455
456 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
457
458 ReplaceNode(Node, Store);
459}
460
462 if (!Subtarget->hasVInstructions())
463 return;
464
465 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
466
467 SDLoc DL(Node);
468 MVT XLenVT = Subtarget->getXLenVT();
469
470 unsigned IntNo = Node->getConstantOperandVal(0);
471
472 assert((IntNo == Intrinsic::riscv_vsetvli ||
473 IntNo == Intrinsic::riscv_vsetvlimax) &&
474 "Unexpected vsetvli intrinsic");
475
476 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
477 unsigned Offset = (VLMax ? 1 : 2);
478
479 assert(Node->getNumOperands() == Offset + 2 &&
480 "Unexpected number of operands");
481
482 unsigned SEW =
483 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
484 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
485 Node->getConstantOperandVal(Offset + 1) & 0x7);
486
487 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
488 /*MaskAgnostic*/ true);
489 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
490
491 SDValue VLOperand;
492 unsigned Opcode = RISCV::PseudoVSETVLI;
493 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
494 if (auto VLEN = Subtarget->getRealVLen())
495 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
496 VLMax = true;
497 }
498 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
499 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
500 Opcode = RISCV::PseudoVSETVLIX0;
501 } else {
502 VLOperand = Node->getOperand(1);
503
504 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
505 uint64_t AVL = C->getZExtValue();
506 if (isUInt<5>(AVL)) {
507 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
508 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
509 XLenVT, VLImm, VTypeIOp));
510 return;
511 }
512 }
513 }
514
515 ReplaceNode(Node,
516 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
517}
518
520 MVT VT = Node->getSimpleValueType(0);
521 unsigned Opcode = Node->getOpcode();
522 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
523 "Unexpected opcode");
524 SDLoc DL(Node);
525
526 // For operations of the form (x << C1) op C2, check if we can use
527 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
528 SDValue N0 = Node->getOperand(0);
529 SDValue N1 = Node->getOperand(1);
530
531 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
532 if (!Cst)
533 return false;
534
535 int64_t Val = Cst->getSExtValue();
536
537 // Check if immediate can already use ANDI/ORI/XORI.
538 if (isInt<12>(Val))
539 return false;
540
541 SDValue Shift = N0;
542
543 // If Val is simm32 and we have a sext_inreg from i32, then the binop
544 // produces at least 33 sign bits. We can peek through the sext_inreg and use
545 // a SLLIW at the end.
546 bool SignExt = false;
547 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
548 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
549 SignExt = true;
550 Shift = N0.getOperand(0);
551 }
552
553 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
554 return false;
555
556 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
557 if (!ShlCst)
558 return false;
559
560 uint64_t ShAmt = ShlCst->getZExtValue();
561
562 // Make sure that we don't change the operation by removing bits.
563 // This only matters for OR and XOR, AND is unaffected.
564 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
565 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
566 return false;
567
568 int64_t ShiftedVal = Val >> ShAmt;
569 if (!isInt<12>(ShiftedVal))
570 return false;
571
572 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
573 if (SignExt && ShAmt >= 32)
574 return false;
575
576 // Ok, we can reorder to get a smaller immediate.
577 unsigned BinOpc;
578 switch (Opcode) {
579 default: llvm_unreachable("Unexpected opcode");
580 case ISD::AND: BinOpc = RISCV::ANDI; break;
581 case ISD::OR: BinOpc = RISCV::ORI; break;
582 case ISD::XOR: BinOpc = RISCV::XORI; break;
583 }
584
585 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
586
587 SDNode *BinOp = CurDAG->getMachineNode(
588 BinOpc, DL, VT, Shift.getOperand(0),
589 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
590 SDNode *SLLI =
591 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
592 CurDAG->getTargetConstant(ShAmt, DL, VT));
593 ReplaceNode(Node, SLLI);
594 return true;
595}
596
598 unsigned Opc;
599
600 if (Subtarget->hasVendorXTHeadBb())
601 Opc = RISCV::TH_EXT;
602 else if (Subtarget->hasVendorXAndesPerf())
603 Opc = RISCV::NDS_BFOS;
604 else if (Subtarget->hasVendorXqcibm())
605 Opc = RISCV::QC_EXT;
606 else
607 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
608 return false;
609
610 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
611 if (!N1C)
612 return false;
613
614 SDValue N0 = Node->getOperand(0);
615 if (!N0.hasOneUse())
616 return false;
617
618 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
619 const SDLoc &DL, MVT VT) {
620 if (Opc == RISCV::QC_EXT) {
621 // QC.EXT X, width, shamt
622 // shamt is the same as Lsb
623 // width is the number of bits to extract from the Lsb
624 Msb = Msb - Lsb + 1;
625 }
626 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
627 CurDAG->getTargetConstant(Msb, DL, VT),
628 CurDAG->getTargetConstant(Lsb, DL, VT));
629 };
630
631 SDLoc DL(Node);
632 MVT VT = Node->getSimpleValueType(0);
633 const unsigned RightShAmt = N1C->getZExtValue();
634
635 // Transform (sra (shl X, C1) C2) with C1 < C2
636 // -> (SignedBitfieldExtract X, msb, lsb)
637 if (N0.getOpcode() == ISD::SHL) {
638 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
639 if (!N01C)
640 return false;
641
642 const unsigned LeftShAmt = N01C->getZExtValue();
643 // Make sure that this is a bitfield extraction (i.e., the shift-right
644 // amount can not be less than the left-shift).
645 if (LeftShAmt > RightShAmt)
646 return false;
647
648 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
649 const unsigned Msb = MsbPlusOne - 1;
650 const unsigned Lsb = RightShAmt - LeftShAmt;
651
652 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
653 ReplaceNode(Node, Sbe);
654 return true;
655 }
656
657 // Transform (sra (sext_inreg X, _), C) ->
658 // (SignedBitfieldExtract X, msb, lsb)
659 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
660 unsigned ExtSize =
661 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
662
663 // ExtSize of 32 should use sraiw via tablegen pattern.
664 if (ExtSize == 32)
665 return false;
666
667 const unsigned Msb = ExtSize - 1;
668 // If the shift-right amount is greater than Msb, it means that extracts
669 // the X[Msb] bit and sign-extend it.
670 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
671
672 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
673 ReplaceNode(Node, Sbe);
674 return true;
675 }
676
677 return false;
678}
679
681 // Supported only in Xqcibm for now.
682 if (!Subtarget->hasVendorXqcibm())
683 return false;
684
685 using namespace SDPatternMatch;
686
687 SDValue X;
688 APInt MaskImm;
689 if (!sd_match(Node, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
690 return false;
691
692 unsigned ShAmt, Width;
693 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
694 return false;
695
696 // If Zbs is enabled and it is a single bit set we can use BSETI which
697 // can be compressed to C_BSETI when Xqcibm in enabled.
698 if (Width == 1 && Subtarget->hasStdExtZbs())
699 return false;
700
701 // If C1 is a shifted mask (but can't be formed as an ORI),
702 // use a bitfield insert of -1.
703 // Transform (or x, C1)
704 // -> (qc.insbi x, -1, width, shift)
705 SDLoc DL(Node);
706 MVT VT = Node->getSimpleValueType(0);
707
708 SDValue Ops[] = {X, CurDAG->getSignedTargetConstant(-1, DL, VT),
709 CurDAG->getTargetConstant(Width, DL, VT),
710 CurDAG->getTargetConstant(ShAmt, DL, VT)};
711 SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
712 ReplaceNode(Node, BitIns);
713 return true;
714}
715
716// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
717// being inserted only sets known zero bits.
719 // Supported only in Xqcibm for now.
720 if (!Subtarget->hasVendorXqcibm())
721 return false;
722
723 using namespace SDPatternMatch;
724
725 SDValue And;
726 APInt MaskImm, OrImm;
727 if (!sd_match(Node, m_Or(m_OneUse(m_And(m_Value(And), m_ConstInt(MaskImm))),
728 m_ConstInt(OrImm))))
729 return false;
730
731 // Compute the Known Zero for the AND as this allows us to catch more general
732 // cases than just looking for AND with imm.
733 KnownBits Known = CurDAG->computeKnownBits(Node->getOperand(0));
734
735 // The bits being inserted must only set those bits that are known to be zero.
736 if (!OrImm.isSubsetOf(Known.Zero)) {
737 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
738 // currently handle this case.
739 return false;
740 }
741
742 unsigned ShAmt, Width;
743 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
744 if (!Known.Zero.isShiftedMask(ShAmt, Width))
745 return false;
746
747 // QC_INSB(I) dst, src, #width, #shamt.
748 SDLoc DL(Node);
749 MVT VT = Node->getSimpleValueType(0);
750 SDValue ImmNode;
751 auto Opc = RISCV::QC_INSB;
752
753 int32_t LIImm = OrImm.getSExtValue() >> ShAmt;
754
755 if (isInt<5>(LIImm)) {
756 Opc = RISCV::QC_INSBI;
757 ImmNode = CurDAG->getSignedTargetConstant(LIImm, DL, MVT::i32);
758 } else {
759 ImmNode = selectImm(CurDAG, DL, MVT::i32, LIImm, *Subtarget);
760 }
761
762 SDValue Ops[] = {And, ImmNode, CurDAG->getTargetConstant(Width, DL, VT),
763 CurDAG->getTargetConstant(ShAmt, DL, VT)};
764 SDNode *BitIns = CurDAG->getMachineNode(Opc, DL, VT, Ops);
765 ReplaceNode(Node, BitIns);
766 return true;
767}
768
770 // Only supported with XAndesPerf at the moment.
771 if (!Subtarget->hasVendorXAndesPerf())
772 return false;
773
774 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
775 if (!N1C)
776 return false;
777
778 SDValue N0 = Node->getOperand(0);
779 if (!N0.hasOneUse())
780 return false;
781
782 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
783 const SDLoc &DL, MVT VT) {
784 unsigned Opc = RISCV::NDS_BFOS;
785 // If the Lsb is equal to the Msb, then the Lsb should be 0.
786 if (Lsb == Msb)
787 Lsb = 0;
788 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
789 CurDAG->getTargetConstant(Lsb, DL, VT),
790 CurDAG->getTargetConstant(Msb, DL, VT));
791 };
792
793 SDLoc DL(Node);
794 MVT VT = Node->getSimpleValueType(0);
795 const unsigned RightShAmt = N1C->getZExtValue();
796
797 // Transform (sra (shl X, C1) C2) with C1 > C2
798 // -> (NDS.BFOS X, lsb, msb)
799 if (N0.getOpcode() == ISD::SHL) {
800 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
801 if (!N01C)
802 return false;
803
804 const unsigned LeftShAmt = N01C->getZExtValue();
805 // Make sure that this is a bitfield insertion (i.e., the shift-right
806 // amount should be less than the left-shift).
807 if (LeftShAmt <= RightShAmt)
808 return false;
809
810 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
811 const unsigned Msb = MsbPlusOne - 1;
812 const unsigned Lsb = LeftShAmt - RightShAmt;
813
814 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
815 ReplaceNode(Node, Sbi);
816 return true;
817 }
818
819 return false;
820}
821
822// (xor X, (and (xor X, C1), C2))
823// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt)
824// where C2 is a shifted mask with width=Width and shift=ShAmt
826
827 if (!Subtarget->hasVendorXqcibm())
828 return false;
829
830 using namespace SDPatternMatch;
831
832 SDValue X;
833 APInt CImm, CMask;
834 if (!sd_match(
835 Node,
836 m_Xor(m_Value(X),
837 m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))),
838 m_ConstInt(CMask))))))
839 return false;
840
841 unsigned Width, ShAmt;
842 if (!CMask.isShiftedMask(ShAmt, Width))
843 return false;
844
845 int64_t Imm = CImm.getSExtValue();
846 Imm >>= ShAmt;
847
848 SDLoc DL(Node);
849 SDValue ImmNode;
850 auto Opc = RISCV::QC_INSB;
851
852 if (isInt<5>(Imm)) {
853 Opc = RISCV::QC_INSBI;
854 ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32);
855 } else {
856 ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget);
857 }
858 SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32),
859 CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)};
860 ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops));
861
862 return true;
863}
864
866 const SDLoc &DL, MVT VT,
867 SDValue X, unsigned Msb,
868 unsigned Lsb) {
869 unsigned Opc;
870
871 if (Subtarget->hasVendorXTHeadBb()) {
872 Opc = RISCV::TH_EXTU;
873 } else if (Subtarget->hasVendorXAndesPerf()) {
874 Opc = RISCV::NDS_BFOZ;
875 } else if (Subtarget->hasVendorXqcibm()) {
876 Opc = RISCV::QC_EXTU;
877 // QC.EXTU X, width, shamt
878 // shamt is the same as Lsb
879 // width is the number of bits to extract from the Lsb
880 Msb = Msb - Lsb + 1;
881 } else {
882 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
883 return false;
884 }
885
886 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
887 CurDAG->getTargetConstant(Msb, DL, VT),
888 CurDAG->getTargetConstant(Lsb, DL, VT));
889 ReplaceNode(Node, Ube);
890 return true;
891}
892
894 const SDLoc &DL, MVT VT,
895 SDValue X, unsigned Msb,
896 unsigned Lsb) {
897 // Only supported with XAndesPerf at the moment.
898 if (!Subtarget->hasVendorXAndesPerf())
899 return false;
900
901 unsigned Opc = RISCV::NDS_BFOZ;
902
903 // If the Lsb is equal to the Msb, then the Lsb should be 0.
904 if (Lsb == Msb)
905 Lsb = 0;
906 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
907 CurDAG->getTargetConstant(Lsb, DL, VT),
908 CurDAG->getTargetConstant(Msb, DL, VT));
909 ReplaceNode(Node, Ubi);
910 return true;
911}
912
914 // Target does not support indexed loads.
915 if (!Subtarget->hasVendorXTHeadMemIdx())
916 return false;
917
918 LoadSDNode *Ld = cast<LoadSDNode>(Node);
920 if (AM == ISD::UNINDEXED)
921 return false;
922
923 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
924 if (!C)
925 return false;
926
927 EVT LoadVT = Ld->getMemoryVT();
928 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
929 "Unexpected addressing mode");
930 bool IsPre = AM == ISD::PRE_INC;
931 bool IsPost = AM == ISD::POST_INC;
932 int64_t Offset = C->getSExtValue();
933
934 // The constants that can be encoded in the THeadMemIdx instructions
935 // are of the form (sign_extend(imm5) << imm2).
936 unsigned Shift;
937 for (Shift = 0; Shift < 4; Shift++)
938 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
939 break;
940
941 // Constant cannot be encoded.
942 if (Shift == 4)
943 return false;
944
945 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
946 unsigned Opcode;
947 if (LoadVT == MVT::i8 && IsPre)
948 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
949 else if (LoadVT == MVT::i8 && IsPost)
950 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
951 else if (LoadVT == MVT::i16 && IsPre)
952 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
953 else if (LoadVT == MVT::i16 && IsPost)
954 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
955 else if (LoadVT == MVT::i32 && IsPre)
956 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
957 else if (LoadVT == MVT::i32 && IsPost)
958 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
959 else if (LoadVT == MVT::i64 && IsPre)
960 Opcode = RISCV::TH_LDIB;
961 else if (LoadVT == MVT::i64 && IsPost)
962 Opcode = RISCV::TH_LDIA;
963 else
964 return false;
965
966 EVT Ty = Ld->getOffset().getValueType();
967 SDValue Ops[] = {
968 Ld->getBasePtr(),
969 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
970 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
971 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
972 Ld->getValueType(1), MVT::Other, Ops);
973
974 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
975 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
976
977 ReplaceNode(Node, New);
978
979 return true;
980}
981
983 if (!Subtarget->hasVInstructions())
984 return;
985
986 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
987
988 SDLoc DL(Node);
989 unsigned IntNo = Node->getConstantOperandVal(1);
990
991 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
992 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
993 "Unexpected vsetvli intrinsic");
994
995 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
996 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
997 SDValue SEWOp =
998 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
999 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1000 Node->getOperand(4), Node->getOperand(5),
1001 Node->getOperand(8), SEWOp,
1002 Node->getOperand(0)};
1003
1004 unsigned Opcode;
1005 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1006 switch (LMulSDNode->getSExtValue()) {
1007 case 5:
1008 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1009 : RISCV::PseudoSF_VC_I_SE_MF8;
1010 break;
1011 case 6:
1012 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1013 : RISCV::PseudoSF_VC_I_SE_MF4;
1014 break;
1015 case 7:
1016 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1017 : RISCV::PseudoSF_VC_I_SE_MF2;
1018 break;
1019 case 0:
1020 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1021 : RISCV::PseudoSF_VC_I_SE_M1;
1022 break;
1023 case 1:
1024 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1025 : RISCV::PseudoSF_VC_I_SE_M2;
1026 break;
1027 case 2:
1028 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1029 : RISCV::PseudoSF_VC_I_SE_M4;
1030 break;
1031 case 3:
1032 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1033 : RISCV::PseudoSF_VC_I_SE_M8;
1034 break;
1035 }
1036
1038 Opcode, DL, Node->getSimpleValueType(0), Operands));
1039}
1040
1041static unsigned getSegInstNF(unsigned Intrinsic) {
1042#define INST_NF_CASE(NAME, NF) \
1043 case Intrinsic::riscv_##NAME##NF: \
1044 return NF;
1045#define INST_NF_CASE_MASK(NAME, NF) \
1046 case Intrinsic::riscv_##NAME##NF##_mask: \
1047 return NF;
1048#define INST_NF_CASE_FF(NAME, NF) \
1049 case Intrinsic::riscv_##NAME##NF##ff: \
1050 return NF;
1051#define INST_NF_CASE_FF_MASK(NAME, NF) \
1052 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1053 return NF;
1054#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1055 MACRO_NAME(NAME, 2) \
1056 MACRO_NAME(NAME, 3) \
1057 MACRO_NAME(NAME, 4) \
1058 MACRO_NAME(NAME, 5) \
1059 MACRO_NAME(NAME, 6) \
1060 MACRO_NAME(NAME, 7) \
1061 MACRO_NAME(NAME, 8)
1062#define INST_ALL_NF_CASE(NAME) \
1063 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1064 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1065#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1066 INST_ALL_NF_CASE(NAME) \
1067 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1068 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1069 switch (Intrinsic) {
1070 default:
1071 llvm_unreachable("Unexpected segment load/store intrinsic");
1073 INST_ALL_NF_CASE(vlsseg)
1074 INST_ALL_NF_CASE(vloxseg)
1075 INST_ALL_NF_CASE(vluxseg)
1076 INST_ALL_NF_CASE(vsseg)
1077 INST_ALL_NF_CASE(vssseg)
1078 INST_ALL_NF_CASE(vsoxseg)
1079 INST_ALL_NF_CASE(vsuxseg)
1080 }
1081}
1082
1084 // If we have a custom node, we have already selected.
1085 if (Node->isMachineOpcode()) {
1086 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1087 Node->setNodeId(-1);
1088 return;
1089 }
1090
1091 // Instruction Selection not handled by the auto-generated tablegen selection
1092 // should be handled here.
1093 unsigned Opcode = Node->getOpcode();
1094 MVT XLenVT = Subtarget->getXLenVT();
1095 SDLoc DL(Node);
1096 MVT VT = Node->getSimpleValueType(0);
1097
1098 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
1099
1100 switch (Opcode) {
1101 case ISD::Constant: {
1102 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1103 auto *ConstNode = cast<ConstantSDNode>(Node);
1104 if (ConstNode->isZero()) {
1105 SDValue New =
1106 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1107 ReplaceNode(Node, New.getNode());
1108 return;
1109 }
1110 int64_t Imm = ConstNode->getSExtValue();
1111 // If only the lower 8 bits are used, try to convert this to a simm6 by
1112 // sign-extending bit 7. This is neutral without the C extension, and
1113 // allows C.LI to be used if C is present.
1114 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1115 Imm = SignExtend64<8>(Imm);
1116 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1117 // by sign extending bit 15.
1118 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1119 hasAllHUsers(Node))
1120 Imm = SignExtend64<16>(Imm);
1121 // If the upper 32-bits are not used try to convert this into a simm32 by
1122 // sign extending bit 32.
1123 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1124 Imm = SignExtend64<32>(Imm);
1125
1126 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1127 return;
1128 }
1129 case ISD::ConstantFP: {
1130 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1131
1132 bool Is64Bit = Subtarget->is64Bit();
1133 bool HasZdinx = Subtarget->hasStdExtZdinx();
1134
1135 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1136 SDValue Imm;
1137 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1138 // create an integer immediate.
1139 if (APF.isPosZero() || NegZeroF64) {
1140 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1141 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1142 else
1143 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1144 } else {
1145 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1146 *Subtarget);
1147 }
1148
1149 unsigned Opc;
1150 switch (VT.SimpleTy) {
1151 default:
1152 llvm_unreachable("Unexpected size");
1153 case MVT::bf16:
1154 assert(Subtarget->hasStdExtZfbfmin());
1155 Opc = RISCV::FMV_H_X;
1156 break;
1157 case MVT::f16:
1158 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1159 break;
1160 case MVT::f32:
1161 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1162 break;
1163 case MVT::f64:
1164 // For RV32, we can't move from a GPR, we need to convert instead. This
1165 // should only happen for +0.0 and -0.0.
1166 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1167 if (HasZdinx)
1168 Opc = RISCV::COPY;
1169 else
1170 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1171 break;
1172 }
1173
1174 SDNode *Res;
1175 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1176 Res =
1177 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1178 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1179 Res =
1180 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1181 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1182 Res = CurDAG->getMachineNode(
1183 Opc, DL, VT, Imm,
1185 else
1186 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1187
1188 // For f64 -0.0, we need to insert a fneg.d idiom.
1189 if (NegZeroF64) {
1190 Opc = RISCV::FSGNJN_D;
1191 if (HasZdinx)
1192 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1193 Res =
1194 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1195 }
1196
1197 ReplaceNode(Node, Res);
1198 return;
1199 }
1200 case RISCVISD::BuildGPRPair:
1201 case RISCVISD::BuildPairF64: {
1202 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1203 break;
1204
1205 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1206 "BuildPairF64 only handled here on rv32i_zdinx");
1207
1208 SDValue Ops[] = {
1209 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1210 Node->getOperand(0),
1211 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1212 Node->getOperand(1),
1213 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1214
1215 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1216 ReplaceNode(Node, N);
1217 return;
1218 }
1219 case RISCVISD::SplitGPRPair:
1220 case RISCVISD::SplitF64: {
1221 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1222 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1223 "SplitF64 only handled here on rv32i_zdinx");
1224
1225 if (!SDValue(Node, 0).use_empty()) {
1226 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1227 Node->getValueType(0),
1228 Node->getOperand(0));
1229 ReplaceUses(SDValue(Node, 0), Lo);
1230 }
1231
1232 if (!SDValue(Node, 1).use_empty()) {
1234 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1235 ReplaceUses(SDValue(Node, 1), Hi);
1236 }
1237
1238 CurDAG->RemoveDeadNode(Node);
1239 return;
1240 }
1241
1242 assert(Opcode != RISCVISD::SplitGPRPair &&
1243 "SplitGPRPair should already be handled");
1244
1245 if (!Subtarget->hasStdExtZfa())
1246 break;
1247 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1248 "Unexpected subtarget");
1249
1250 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1251 if (!SDValue(Node, 0).use_empty()) {
1252 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1253 Node->getOperand(0));
1254 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1255 }
1256 if (!SDValue(Node, 1).use_empty()) {
1257 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1258 Node->getOperand(0));
1259 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1260 }
1261
1262 CurDAG->RemoveDeadNode(Node);
1263 return;
1264 }
1265 case ISD::SHL: {
1266 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1267 if (!N1C)
1268 break;
1269 SDValue N0 = Node->getOperand(0);
1270 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1271 !isa<ConstantSDNode>(N0.getOperand(1)))
1272 break;
1273 unsigned ShAmt = N1C->getZExtValue();
1274 uint64_t Mask = N0.getConstantOperandVal(1);
1275
1276 if (isShiftedMask_64(Mask)) {
1277 unsigned XLen = Subtarget->getXLen();
1278 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1279 unsigned TrailingZeros = llvm::countr_zero(Mask);
1280 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1281 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1282 // where C2 has 32 leading zeros and C3 trailing zeros.
1283 SDNode *SRLIW = CurDAG->getMachineNode(
1284 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1285 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1286 SDNode *SLLI = CurDAG->getMachineNode(
1287 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1288 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1289 ReplaceNode(Node, SLLI);
1290 return;
1291 }
1292 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1293 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1294 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1295 // where C2 has C4 leading zeros and no trailing zeros.
1296 // This is profitable if the "and" was to be lowered to
1297 // (srli (slli X, C4), C4) and not (andi X, C2).
1298 // For "LeadingZeros == 32":
1299 // - with Zba it's just (slli.uw X, C)
1300 // - without Zba a tablegen pattern applies the very same
1301 // transform as we would have done here
1302 SDNode *SLLI = CurDAG->getMachineNode(
1303 RISCV::SLLI, DL, VT, N0.getOperand(0),
1304 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1305 SDNode *SRLI = CurDAG->getMachineNode(
1306 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1307 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1308 ReplaceNode(Node, SRLI);
1309 return;
1310 }
1311 }
1312 break;
1313 }
1314 case ISD::SRL: {
1315 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1316 if (!N1C)
1317 break;
1318 SDValue N0 = Node->getOperand(0);
1319 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1320 break;
1321 unsigned ShAmt = N1C->getZExtValue();
1322 uint64_t Mask = N0.getConstantOperandVal(1);
1323
1324 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1325 // 32 leading zeros and C3 trailing zeros.
1326 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1327 unsigned XLen = Subtarget->getXLen();
1328 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1329 unsigned TrailingZeros = llvm::countr_zero(Mask);
1330 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1331 SDNode *SRLIW = CurDAG->getMachineNode(
1332 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1333 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1334 SDNode *SLLI = CurDAG->getMachineNode(
1335 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1336 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1337 ReplaceNode(Node, SLLI);
1338 return;
1339 }
1340 }
1341
1342 // Optimize (srl (and X, C2), C) ->
1343 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1344 // Where C2 is a mask with C3 trailing ones.
1345 // Taking into account that the C2 may have had lower bits unset by
1346 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1347 // This pattern occurs when type legalizing right shifts for types with
1348 // less than XLen bits.
1349 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1350 if (!isMask_64(Mask))
1351 break;
1352 unsigned TrailingOnes = llvm::countr_one(Mask);
1353 if (ShAmt >= TrailingOnes)
1354 break;
1355 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1356 if (TrailingOnes == 32) {
1357 SDNode *SRLI = CurDAG->getMachineNode(
1358 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1359 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1360 ReplaceNode(Node, SRLI);
1361 return;
1362 }
1363
1364 // Only do the remaining transforms if the AND has one use.
1365 if (!N0.hasOneUse())
1366 break;
1367
1368 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1369 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1370 SDNode *BEXTI = CurDAG->getMachineNode(
1371 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1372 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1373 ReplaceNode(Node, BEXTI);
1374 return;
1375 }
1376
1377 const unsigned Msb = TrailingOnes - 1;
1378 const unsigned Lsb = ShAmt;
1379 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1380 return;
1381
1382 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1383 SDNode *SLLI =
1384 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1385 CurDAG->getTargetConstant(LShAmt, DL, VT));
1386 SDNode *SRLI = CurDAG->getMachineNode(
1387 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1388 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1389 ReplaceNode(Node, SRLI);
1390 return;
1391 }
1392 case ISD::SRA: {
1393 if (trySignedBitfieldExtract(Node))
1394 return;
1395
1397 return;
1398
1399 // Optimize (sra (sext_inreg X, i16), C) ->
1400 // (srai (slli X, (XLen-16), (XLen-16) + C)
1401 // And (sra (sext_inreg X, i8), C) ->
1402 // (srai (slli X, (XLen-8), (XLen-8) + C)
1403 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1404 // This transform matches the code we get without Zbb. The shifts are more
1405 // compressible, and this can help expose CSE opportunities in the sdiv by
1406 // constant optimization.
1407 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1408 if (!N1C)
1409 break;
1410 SDValue N0 = Node->getOperand(0);
1411 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1412 break;
1413 unsigned ShAmt = N1C->getZExtValue();
1414 unsigned ExtSize =
1415 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1416 // ExtSize of 32 should use sraiw via tablegen pattern.
1417 if (ExtSize >= 32 || ShAmt >= ExtSize)
1418 break;
1419 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1420 SDNode *SLLI =
1421 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1422 CurDAG->getTargetConstant(LShAmt, DL, VT));
1423 SDNode *SRAI = CurDAG->getMachineNode(
1424 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1425 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1426 ReplaceNode(Node, SRAI);
1427 return;
1428 }
1429 case ISD::OR: {
1431 return;
1432
1434 return;
1435
1436 if (tryShrinkShlLogicImm(Node))
1437 return;
1438
1439 break;
1440 }
1441 case ISD::XOR:
1442 if (tryShrinkShlLogicImm(Node))
1443 return;
1444
1446 return;
1447
1448 break;
1449 case ISD::AND: {
1450 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1451 if (!N1C)
1452 break;
1453
1454 SDValue N0 = Node->getOperand(0);
1455
1456 bool LeftShift = N0.getOpcode() == ISD::SHL;
1457 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1458 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1459 if (!C)
1460 break;
1461 unsigned C2 = C->getZExtValue();
1462 unsigned XLen = Subtarget->getXLen();
1463 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1464
1465 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1466 // shift pair might offer more compression opportunities.
1467 // TODO: We could check for C extension here, but we don't have many lit
1468 // tests with the C extension enabled so not checking gets better
1469 // coverage.
1470 // TODO: What if ANDI faster than shift?
1471 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1472
1473 uint64_t C1 = N1C->getZExtValue();
1474
1475 // Clear irrelevant bits in the mask.
1476 if (LeftShift)
1477 C1 &= maskTrailingZeros<uint64_t>(C2);
1478 else
1479 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1480
1481 // Some transforms should only be done if the shift has a single use or
1482 // the AND would become (srli (slli X, 32), 32)
1483 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1484
1485 SDValue X = N0.getOperand(0);
1486
1487 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1488 // with c3 leading zeros.
1489 if (!LeftShift && isMask_64(C1)) {
1490 unsigned Leading = XLen - llvm::bit_width(C1);
1491 if (C2 < Leading) {
1492 // If the number of leading zeros is C2+32 this can be SRLIW.
1493 if (C2 + 32 == Leading) {
1494 SDNode *SRLIW = CurDAG->getMachineNode(
1495 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1496 ReplaceNode(Node, SRLIW);
1497 return;
1498 }
1499
1500 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1501 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1502 //
1503 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1504 // legalized and goes through DAG combine.
1505 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1506 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1507 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1508 SDNode *SRAIW =
1509 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1510 CurDAG->getTargetConstant(31, DL, VT));
1511 SDNode *SRLIW = CurDAG->getMachineNode(
1512 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1513 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1514 ReplaceNode(Node, SRLIW);
1515 return;
1516 }
1517
1518 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1519 // available.
1520 // Transform (and (srl x, C2), C1)
1521 // -> (<bfextract> x, msb, lsb)
1522 //
1523 // Make sure to keep this below the SRLIW cases, as we always want to
1524 // prefer the more common instruction.
1525 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1526 const unsigned Lsb = C2;
1527 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1528 return;
1529
1530 // (srli (slli x, c3-c2), c3).
1531 // Skip if we could use (zext.w (sraiw X, C2)).
1532 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1533 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1534 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1535 // Also Skip if we can use bexti or th.tst.
1536 Skip |= HasBitTest && Leading == XLen - 1;
1537 if (OneUseOrZExtW && !Skip) {
1538 SDNode *SLLI = CurDAG->getMachineNode(
1539 RISCV::SLLI, DL, VT, X,
1540 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1541 SDNode *SRLI = CurDAG->getMachineNode(
1542 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1543 CurDAG->getTargetConstant(Leading, DL, VT));
1544 ReplaceNode(Node, SRLI);
1545 return;
1546 }
1547 }
1548 }
1549
1550 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1551 // shifted by c2 bits with c3 leading zeros.
1552 if (LeftShift && isShiftedMask_64(C1)) {
1553 unsigned Leading = XLen - llvm::bit_width(C1);
1554
1555 if (C2 + Leading < XLen &&
1556 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1557 // Use slli.uw when possible.
1558 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1559 SDNode *SLLI_UW =
1560 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1561 CurDAG->getTargetConstant(C2, DL, VT));
1562 ReplaceNode(Node, SLLI_UW);
1563 return;
1564 }
1565
1566 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1567 // available.
1568 // Transform (and (shl x, c2), c1)
1569 // -> (<bfinsert> x, msb, lsb)
1570 // e.g.
1571 // (and (shl x, 12), 0x00fff000)
1572 // If XLen = 32 and C2 = 12, then
1573 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1574 const unsigned Msb = XLen - Leading - 1;
1575 const unsigned Lsb = C2;
1576 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1577 return;
1578
1579 // (srli (slli c2+c3), c3)
1580 if (OneUseOrZExtW && !IsCANDI) {
1581 SDNode *SLLI = CurDAG->getMachineNode(
1582 RISCV::SLLI, DL, VT, X,
1583 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1584 SDNode *SRLI = CurDAG->getMachineNode(
1585 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1586 CurDAG->getTargetConstant(Leading, DL, VT));
1587 ReplaceNode(Node, SRLI);
1588 return;
1589 }
1590 }
1591 }
1592
1593 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1594 // shifted mask with c2 leading zeros and c3 trailing zeros.
1595 if (!LeftShift && isShiftedMask_64(C1)) {
1596 unsigned Leading = XLen - llvm::bit_width(C1);
1597 unsigned Trailing = llvm::countr_zero(C1);
1598 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1599 !IsCANDI) {
1600 unsigned SrliOpc = RISCV::SRLI;
1601 // If the input is zexti32 we should use SRLIW.
1602 if (X.getOpcode() == ISD::AND &&
1603 isa<ConstantSDNode>(X.getOperand(1)) &&
1604 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1605 SrliOpc = RISCV::SRLIW;
1606 X = X.getOperand(0);
1607 }
1608 SDNode *SRLI = CurDAG->getMachineNode(
1609 SrliOpc, DL, VT, X,
1610 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1611 SDNode *SLLI = CurDAG->getMachineNode(
1612 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1613 CurDAG->getTargetConstant(Trailing, DL, VT));
1614 ReplaceNode(Node, SLLI);
1615 return;
1616 }
1617 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1618 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1619 OneUseOrZExtW && !IsCANDI) {
1620 SDNode *SRLIW = CurDAG->getMachineNode(
1621 RISCV::SRLIW, DL, VT, X,
1622 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1623 SDNode *SLLI = CurDAG->getMachineNode(
1624 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1625 CurDAG->getTargetConstant(Trailing, DL, VT));
1626 ReplaceNode(Node, SLLI);
1627 return;
1628 }
1629 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1630 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1631 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1632 SDNode *SRLI = CurDAG->getMachineNode(
1633 RISCV::SRLI, DL, VT, X,
1634 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1635 SDNode *SLLI_UW = CurDAG->getMachineNode(
1636 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1637 CurDAG->getTargetConstant(Trailing, DL, VT));
1638 ReplaceNode(Node, SLLI_UW);
1639 return;
1640 }
1641 }
1642
1643 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1644 // shifted mask with no leading zeros and c3 trailing zeros.
1645 if (LeftShift && isShiftedMask_64(C1)) {
1646 unsigned Leading = XLen - llvm::bit_width(C1);
1647 unsigned Trailing = llvm::countr_zero(C1);
1648 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1649 SDNode *SRLI = CurDAG->getMachineNode(
1650 RISCV::SRLI, DL, VT, X,
1651 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1652 SDNode *SLLI = CurDAG->getMachineNode(
1653 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1654 CurDAG->getTargetConstant(Trailing, DL, VT));
1655 ReplaceNode(Node, SLLI);
1656 return;
1657 }
1658 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1659 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1660 SDNode *SRLIW = CurDAG->getMachineNode(
1661 RISCV::SRLIW, DL, VT, X,
1662 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1663 SDNode *SLLI = CurDAG->getMachineNode(
1664 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1665 CurDAG->getTargetConstant(Trailing, DL, VT));
1666 ReplaceNode(Node, SLLI);
1667 return;
1668 }
1669
1670 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1671 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1672 Subtarget->hasStdExtZba()) {
1673 SDNode *SRLI = CurDAG->getMachineNode(
1674 RISCV::SRLI, DL, VT, X,
1675 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1676 SDNode *SLLI_UW = CurDAG->getMachineNode(
1677 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1678 CurDAG->getTargetConstant(Trailing, DL, VT));
1679 ReplaceNode(Node, SLLI_UW);
1680 return;
1681 }
1682 }
1683 }
1684
1685 const uint64_t C1 = N1C->getZExtValue();
1686
1687 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1688 N0.hasOneUse()) {
1689 unsigned C2 = N0.getConstantOperandVal(1);
1690 unsigned XLen = Subtarget->getXLen();
1691 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1692
1693 SDValue X = N0.getOperand(0);
1694
1695 // Prefer SRAIW + ANDI when possible.
1696 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1697 X.getOpcode() == ISD::SHL &&
1698 isa<ConstantSDNode>(X.getOperand(1)) &&
1699 X.getConstantOperandVal(1) == 32;
1700 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1701 // mask with c3 leading zeros and c2 is larger than c3.
1702 if (isMask_64(C1) && !Skip) {
1703 unsigned Leading = XLen - llvm::bit_width(C1);
1704 if (C2 > Leading) {
1705 SDNode *SRAI = CurDAG->getMachineNode(
1706 RISCV::SRAI, DL, VT, X,
1707 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1708 SDNode *SRLI = CurDAG->getMachineNode(
1709 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1710 CurDAG->getTargetConstant(Leading, DL, VT));
1711 ReplaceNode(Node, SRLI);
1712 return;
1713 }
1714 }
1715
1716 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1717 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1718 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1719 if (isShiftedMask_64(C1) && !Skip) {
1720 unsigned Leading = XLen - llvm::bit_width(C1);
1721 unsigned Trailing = llvm::countr_zero(C1);
1722 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1723 SDNode *SRAI = CurDAG->getMachineNode(
1724 RISCV::SRAI, DL, VT, N0.getOperand(0),
1725 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1726 SDNode *SRLI = CurDAG->getMachineNode(
1727 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1728 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1729 SDNode *SLLI = CurDAG->getMachineNode(
1730 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1731 CurDAG->getTargetConstant(Trailing, DL, VT));
1732 ReplaceNode(Node, SLLI);
1733 return;
1734 }
1735 }
1736 }
1737
1738 // If C1 masks off the upper bits only (but can't be formed as an
1739 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1740 // available.
1741 // Transform (and x, C1)
1742 // -> (<bfextract> x, msb, lsb)
1743 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1744 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1745 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1746 const unsigned Msb = llvm::bit_width(C1) - 1;
1747 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1748 return;
1749 }
1750
1751 if (tryShrinkShlLogicImm(Node))
1752 return;
1753
1754 break;
1755 }
1756 case ISD::MUL: {
1757 // Special case for calculating (mul (and X, C2), C1) where the full product
1758 // fits in XLen bits. We can shift X left by the number of leading zeros in
1759 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1760 // product has XLen trailing zeros, putting it in the output of MULHU. This
1761 // can avoid materializing a constant in a register for C2.
1762
1763 // RHS should be a constant.
1764 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1765 if (!N1C || !N1C->hasOneUse())
1766 break;
1767
1768 // LHS should be an AND with constant.
1769 SDValue N0 = Node->getOperand(0);
1770 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1771 break;
1772
1774
1775 // Constant should be a mask.
1776 if (!isMask_64(C2))
1777 break;
1778
1779 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1780 // multiple users or the constant is a simm12. This prevents inserting a
1781 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1782 // make it more costly to materialize. Otherwise, using a SLLI might allow
1783 // it to be compressed.
1784 bool IsANDIOrZExt =
1785 isInt<12>(C2) ||
1786 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1787 // With XTHeadBb, we can use TH.EXTU.
1788 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1789 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1790 break;
1791 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1792 // the constant is a simm32.
1793 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1794 // With XTHeadBb, we can use TH.EXTU.
1795 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1796 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1797 break;
1798
1799 // We need to shift left the AND input and C1 by a total of XLen bits.
1800
1801 // How far left do we need to shift the AND input?
1802 unsigned XLen = Subtarget->getXLen();
1803 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1804
1805 // The constant gets shifted by the remaining amount unless that would
1806 // shift bits out.
1807 uint64_t C1 = N1C->getZExtValue();
1808 unsigned ConstantShift = XLen - LeadingZeros;
1809 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1810 break;
1811
1812 uint64_t ShiftedC1 = C1 << ConstantShift;
1813 // If this RV32, we need to sign extend the constant.
1814 if (XLen == 32)
1815 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1816
1817 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1818 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1819 SDNode *SLLI =
1820 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1821 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1822 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1823 SDValue(SLLI, 0), SDValue(Imm, 0));
1824 ReplaceNode(Node, MULHU);
1825 return;
1826 }
1827 case ISD::LOAD: {
1828 if (tryIndexedLoad(Node))
1829 return;
1830
1831 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1832 // We match post-incrementing load here
1833 LoadSDNode *Load = cast<LoadSDNode>(Node);
1834 if (Load->getAddressingMode() != ISD::POST_INC)
1835 break;
1836
1837 SDValue Chain = Node->getOperand(0);
1838 SDValue Base = Node->getOperand(1);
1839 SDValue Offset = Node->getOperand(2);
1840
1841 bool Simm12 = false;
1842 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1843
1844 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1845 int ConstantVal = ConstantOffset->getSExtValue();
1846 Simm12 = isInt<12>(ConstantVal);
1847 if (Simm12)
1848 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1849 Offset.getValueType());
1850 }
1851
1852 unsigned Opcode = 0;
1853 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1854 case MVT::i8:
1855 if (Simm12 && SignExtend)
1856 Opcode = RISCV::CV_LB_ri_inc;
1857 else if (Simm12 && !SignExtend)
1858 Opcode = RISCV::CV_LBU_ri_inc;
1859 else if (!Simm12 && SignExtend)
1860 Opcode = RISCV::CV_LB_rr_inc;
1861 else
1862 Opcode = RISCV::CV_LBU_rr_inc;
1863 break;
1864 case MVT::i16:
1865 if (Simm12 && SignExtend)
1866 Opcode = RISCV::CV_LH_ri_inc;
1867 else if (Simm12 && !SignExtend)
1868 Opcode = RISCV::CV_LHU_ri_inc;
1869 else if (!Simm12 && SignExtend)
1870 Opcode = RISCV::CV_LH_rr_inc;
1871 else
1872 Opcode = RISCV::CV_LHU_rr_inc;
1873 break;
1874 case MVT::i32:
1875 if (Simm12)
1876 Opcode = RISCV::CV_LW_ri_inc;
1877 else
1878 Opcode = RISCV::CV_LW_rr_inc;
1879 break;
1880 default:
1881 break;
1882 }
1883 if (!Opcode)
1884 break;
1885
1886 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1887 Chain.getSimpleValueType(), Base,
1888 Offset, Chain));
1889 return;
1890 }
1891 break;
1892 }
1893 case RISCVISD::LD_RV32: {
1894 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1895
1897 SDValue Chain = Node->getOperand(0);
1898 SDValue Addr = Node->getOperand(1);
1900
1901 SDValue Ops[] = {Base, Offset, Chain};
1903 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1904 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1905 MVT::i32, SDValue(New, 0));
1906 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1907 MVT::i32, SDValue(New, 0));
1908 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1909 ReplaceUses(SDValue(Node, 0), Lo);
1910 ReplaceUses(SDValue(Node, 1), Hi);
1911 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1912 CurDAG->RemoveDeadNode(Node);
1913 return;
1914 }
1915 case RISCVISD::SD_RV32: {
1917 SDValue Chain = Node->getOperand(0);
1918 SDValue Addr = Node->getOperand(3);
1920
1921 SDValue Lo = Node->getOperand(1);
1922 SDValue Hi = Node->getOperand(2);
1923
1924 SDValue RegPair;
1925 // Peephole to use X0_Pair for storing zero.
1927 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1928 } else {
1929 SDValue Ops[] = {
1930 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1931 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1932 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1933
1934 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1935 MVT::Untyped, Ops),
1936 0);
1937 }
1938
1939 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1940 {RegPair, Base, Offset, Chain});
1941 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1942 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1943 CurDAG->RemoveDeadNode(Node);
1944 return;
1945 }
1947 unsigned IntNo = Node->getConstantOperandVal(0);
1948 switch (IntNo) {
1949 // By default we do not custom select any intrinsic.
1950 default:
1951 break;
1952 case Intrinsic::riscv_vmsgeu:
1953 case Intrinsic::riscv_vmsge: {
1954 SDValue Src1 = Node->getOperand(1);
1955 SDValue Src2 = Node->getOperand(2);
1956 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1957 bool IsCmpConstant = false;
1958 bool IsCmpMinimum = false;
1959 // Only custom select scalar second operand.
1960 if (Src2.getValueType() != XLenVT)
1961 break;
1962 // Small constants are handled with patterns.
1963 int64_t CVal = 0;
1964 MVT Src1VT = Src1.getSimpleValueType();
1965 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1966 IsCmpConstant = true;
1967 CVal = C->getSExtValue();
1968 if (CVal >= -15 && CVal <= 16) {
1969 if (!IsUnsigned || CVal != 0)
1970 break;
1971 IsCmpMinimum = true;
1972 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1973 Src1VT.getScalarSizeInBits())
1974 .getSExtValue()) {
1975 IsCmpMinimum = true;
1976 }
1977 }
1978 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1979 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1980 default:
1981 llvm_unreachable("Unexpected LMUL!");
1982#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1983 case RISCVVType::lmulenum: \
1984 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1985 : RISCV::PseudoVMSLT_VX_##suffix; \
1986 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1987 : RISCV::PseudoVMSGT_VX_##suffix; \
1988 break;
1989 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1990 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1991 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1992 CASE_VMSLT_OPCODES(LMUL_1, M1)
1993 CASE_VMSLT_OPCODES(LMUL_2, M2)
1994 CASE_VMSLT_OPCODES(LMUL_4, M4)
1995 CASE_VMSLT_OPCODES(LMUL_8, M8)
1996#undef CASE_VMSLT_OPCODES
1997 }
1998 // Mask operations use the LMUL from the mask type.
1999 switch (RISCVTargetLowering::getLMUL(VT)) {
2000 default:
2001 llvm_unreachable("Unexpected LMUL!");
2002#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2003 case RISCVVType::lmulenum: \
2004 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2005 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2006 break;
2007 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2008 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2009 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2010 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2011 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2012 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2013 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2014#undef CASE_VMNAND_VMSET_OPCODES
2015 }
2017 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2018 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2019 SDValue VL;
2020 selectVLOp(Node->getOperand(3), VL);
2021
2022 // If vmsge(u) with minimum value, expand it to vmset.
2023 if (IsCmpMinimum) {
2024 ReplaceNode(Node,
2025 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2026 return;
2027 }
2028
2029 if (IsCmpConstant) {
2030 SDValue Imm =
2031 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2032
2033 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2034 {Src1, Imm, VL, SEW}));
2035 return;
2036 }
2037
2038 // Expand to
2039 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2040 SDValue Cmp = SDValue(
2041 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2042 0);
2043 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2044 {Cmp, Cmp, VL, MaskSEW}));
2045 return;
2046 }
2047 case Intrinsic::riscv_vmsgeu_mask:
2048 case Intrinsic::riscv_vmsge_mask: {
2049 SDValue Src1 = Node->getOperand(2);
2050 SDValue Src2 = Node->getOperand(3);
2051 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2052 bool IsCmpConstant = false;
2053 bool IsCmpMinimum = false;
2054 // Only custom select scalar second operand.
2055 if (Src2.getValueType() != XLenVT)
2056 break;
2057 // Small constants are handled with patterns.
2058 MVT Src1VT = Src1.getSimpleValueType();
2059 int64_t CVal = 0;
2060 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2061 IsCmpConstant = true;
2062 CVal = C->getSExtValue();
2063 if (CVal >= -15 && CVal <= 16) {
2064 if (!IsUnsigned || CVal != 0)
2065 break;
2066 IsCmpMinimum = true;
2067 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2068 Src1VT.getScalarSizeInBits())
2069 .getSExtValue()) {
2070 IsCmpMinimum = true;
2071 }
2072 }
2073 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2074 VMOROpcode, VMSGTMaskOpcode;
2075 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2076 default:
2077 llvm_unreachable("Unexpected LMUL!");
2078#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2079 case RISCVVType::lmulenum: \
2080 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2081 : RISCV::PseudoVMSLT_VX_##suffix; \
2082 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2083 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2084 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2085 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2086 break;
2087 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2088 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2089 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2090 CASE_VMSLT_OPCODES(LMUL_1, M1)
2091 CASE_VMSLT_OPCODES(LMUL_2, M2)
2092 CASE_VMSLT_OPCODES(LMUL_4, M4)
2093 CASE_VMSLT_OPCODES(LMUL_8, M8)
2094#undef CASE_VMSLT_OPCODES
2095 }
2096 // Mask operations use the LMUL from the mask type.
2097 switch (RISCVTargetLowering::getLMUL(VT)) {
2098 default:
2099 llvm_unreachable("Unexpected LMUL!");
2100#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2101 case RISCVVType::lmulenum: \
2102 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2103 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2104 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2105 break;
2106 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2107 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2108 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2113#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2114 }
2116 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2117 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2118 SDValue VL;
2119 selectVLOp(Node->getOperand(5), VL);
2120 SDValue MaskedOff = Node->getOperand(1);
2121 SDValue Mask = Node->getOperand(4);
2122
2123 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2124 if (IsCmpMinimum) {
2125 // We don't need vmor if the MaskedOff and the Mask are the same
2126 // value.
2127 if (Mask == MaskedOff) {
2128 ReplaceUses(Node, Mask.getNode());
2129 return;
2130 }
2131 ReplaceNode(Node,
2132 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2133 {Mask, MaskedOff, VL, MaskSEW}));
2134 return;
2135 }
2136
2137 // If the MaskedOff value and the Mask are the same value use
2138 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2139 // This avoids needing to copy v0 to vd before starting the next sequence.
2140 if (Mask == MaskedOff) {
2141 SDValue Cmp = SDValue(
2142 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2143 0);
2144 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2145 {Mask, Cmp, VL, MaskSEW}));
2146 return;
2147 }
2148
2149 SDValue PolicyOp =
2151
2152 if (IsCmpConstant) {
2153 SDValue Imm =
2154 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2155
2157 VMSGTMaskOpcode, DL, VT,
2158 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2159 return;
2160 }
2161
2162 // Otherwise use
2163 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2164 // The result is mask undisturbed.
2165 // We use the same instructions to emulate mask agnostic behavior, because
2166 // the agnostic result can be either undisturbed or all 1.
2167 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2168 {MaskedOff, Src1, Src2, Mask,
2169 VL, SEW, PolicyOp}),
2170 0);
2171 // vmxor.mm vd, vd, v0 is used to update active value.
2172 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2173 {Cmp, Mask, VL, MaskSEW}));
2174 return;
2175 }
2176 case Intrinsic::riscv_vsetvli:
2177 case Intrinsic::riscv_vsetvlimax:
2178 return selectVSETVLI(Node);
2179 }
2180 break;
2181 }
2183 unsigned IntNo = Node->getConstantOperandVal(1);
2184 switch (IntNo) {
2185 // By default we do not custom select any intrinsic.
2186 default:
2187 break;
2188 case Intrinsic::riscv_vlseg2:
2189 case Intrinsic::riscv_vlseg3:
2190 case Intrinsic::riscv_vlseg4:
2191 case Intrinsic::riscv_vlseg5:
2192 case Intrinsic::riscv_vlseg6:
2193 case Intrinsic::riscv_vlseg7:
2194 case Intrinsic::riscv_vlseg8: {
2195 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2196 /*IsStrided*/ false);
2197 return;
2198 }
2199 case Intrinsic::riscv_vlseg2_mask:
2200 case Intrinsic::riscv_vlseg3_mask:
2201 case Intrinsic::riscv_vlseg4_mask:
2202 case Intrinsic::riscv_vlseg5_mask:
2203 case Intrinsic::riscv_vlseg6_mask:
2204 case Intrinsic::riscv_vlseg7_mask:
2205 case Intrinsic::riscv_vlseg8_mask: {
2206 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2207 /*IsStrided*/ false);
2208 return;
2209 }
2210 case Intrinsic::riscv_vlsseg2:
2211 case Intrinsic::riscv_vlsseg3:
2212 case Intrinsic::riscv_vlsseg4:
2213 case Intrinsic::riscv_vlsseg5:
2214 case Intrinsic::riscv_vlsseg6:
2215 case Intrinsic::riscv_vlsseg7:
2216 case Intrinsic::riscv_vlsseg8: {
2217 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2218 /*IsStrided*/ true);
2219 return;
2220 }
2221 case Intrinsic::riscv_vlsseg2_mask:
2222 case Intrinsic::riscv_vlsseg3_mask:
2223 case Intrinsic::riscv_vlsseg4_mask:
2224 case Intrinsic::riscv_vlsseg5_mask:
2225 case Intrinsic::riscv_vlsseg6_mask:
2226 case Intrinsic::riscv_vlsseg7_mask:
2227 case Intrinsic::riscv_vlsseg8_mask: {
2228 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2229 /*IsStrided*/ true);
2230 return;
2231 }
2232 case Intrinsic::riscv_vloxseg2:
2233 case Intrinsic::riscv_vloxseg3:
2234 case Intrinsic::riscv_vloxseg4:
2235 case Intrinsic::riscv_vloxseg5:
2236 case Intrinsic::riscv_vloxseg6:
2237 case Intrinsic::riscv_vloxseg7:
2238 case Intrinsic::riscv_vloxseg8:
2239 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2240 /*IsOrdered*/ true);
2241 return;
2242 case Intrinsic::riscv_vluxseg2:
2243 case Intrinsic::riscv_vluxseg3:
2244 case Intrinsic::riscv_vluxseg4:
2245 case Intrinsic::riscv_vluxseg5:
2246 case Intrinsic::riscv_vluxseg6:
2247 case Intrinsic::riscv_vluxseg7:
2248 case Intrinsic::riscv_vluxseg8:
2249 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2250 /*IsOrdered*/ false);
2251 return;
2252 case Intrinsic::riscv_vloxseg2_mask:
2253 case Intrinsic::riscv_vloxseg3_mask:
2254 case Intrinsic::riscv_vloxseg4_mask:
2255 case Intrinsic::riscv_vloxseg5_mask:
2256 case Intrinsic::riscv_vloxseg6_mask:
2257 case Intrinsic::riscv_vloxseg7_mask:
2258 case Intrinsic::riscv_vloxseg8_mask:
2259 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2260 /*IsOrdered*/ true);
2261 return;
2262 case Intrinsic::riscv_vluxseg2_mask:
2263 case Intrinsic::riscv_vluxseg3_mask:
2264 case Intrinsic::riscv_vluxseg4_mask:
2265 case Intrinsic::riscv_vluxseg5_mask:
2266 case Intrinsic::riscv_vluxseg6_mask:
2267 case Intrinsic::riscv_vluxseg7_mask:
2268 case Intrinsic::riscv_vluxseg8_mask:
2269 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2270 /*IsOrdered*/ false);
2271 return;
2272 case Intrinsic::riscv_vlseg8ff:
2273 case Intrinsic::riscv_vlseg7ff:
2274 case Intrinsic::riscv_vlseg6ff:
2275 case Intrinsic::riscv_vlseg5ff:
2276 case Intrinsic::riscv_vlseg4ff:
2277 case Intrinsic::riscv_vlseg3ff:
2278 case Intrinsic::riscv_vlseg2ff: {
2279 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2280 return;
2281 }
2282 case Intrinsic::riscv_vlseg8ff_mask:
2283 case Intrinsic::riscv_vlseg7ff_mask:
2284 case Intrinsic::riscv_vlseg6ff_mask:
2285 case Intrinsic::riscv_vlseg5ff_mask:
2286 case Intrinsic::riscv_vlseg4ff_mask:
2287 case Intrinsic::riscv_vlseg3ff_mask:
2288 case Intrinsic::riscv_vlseg2ff_mask: {
2289 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2290 return;
2291 }
2292 case Intrinsic::riscv_vloxei:
2293 case Intrinsic::riscv_vloxei_mask:
2294 case Intrinsic::riscv_vluxei:
2295 case Intrinsic::riscv_vluxei_mask: {
2296 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2297 IntNo == Intrinsic::riscv_vluxei_mask;
2298 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2299 IntNo == Intrinsic::riscv_vloxei_mask;
2300
2301 MVT VT = Node->getSimpleValueType(0);
2302 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2303
2304 unsigned CurOp = 2;
2306 Operands.push_back(Node->getOperand(CurOp++));
2307
2308 MVT IndexVT;
2309 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2310 /*IsStridedOrIndexed*/ true, Operands,
2311 /*IsLoad=*/true, &IndexVT);
2312
2314 "Element count mismatch");
2315
2318 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2319 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2320 report_fatal_error("The V extension does not support EEW=64 for index "
2321 "values when XLEN=32");
2322 }
2323 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2324 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2325 static_cast<unsigned>(IndexLMUL));
2326 MachineSDNode *Load =
2327 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2328
2329 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2330
2331 ReplaceNode(Node, Load);
2332 return;
2333 }
2334 case Intrinsic::riscv_vlm:
2335 case Intrinsic::riscv_vle:
2336 case Intrinsic::riscv_vle_mask:
2337 case Intrinsic::riscv_vlse:
2338 case Intrinsic::riscv_vlse_mask: {
2339 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2340 IntNo == Intrinsic::riscv_vlse_mask;
2341 bool IsStrided =
2342 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2343
2344 MVT VT = Node->getSimpleValueType(0);
2345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2346
2347 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2348 // operand at the IR level. In pseudos, they have both policy and
2349 // passthru operand. The passthru operand is needed to track the
2350 // "tail undefined" state, and the policy is there just for
2351 // for consistency - it will always be "don't care" for the
2352 // unmasked form.
2353 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2354 unsigned CurOp = 2;
2356 if (HasPassthruOperand)
2357 Operands.push_back(Node->getOperand(CurOp++));
2358 else {
2359 // We eagerly lower to implicit_def (instead of undef), as we
2360 // otherwise fail to select nodes such as: nxv1i1 = undef
2361 SDNode *Passthru =
2362 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2363 Operands.push_back(SDValue(Passthru, 0));
2364 }
2365 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2366 Operands, /*IsLoad=*/true);
2367
2369 const RISCV::VLEPseudo *P =
2370 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2371 static_cast<unsigned>(LMUL));
2372 MachineSDNode *Load =
2373 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2374
2375 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2376
2377 ReplaceNode(Node, Load);
2378 return;
2379 }
2380 case Intrinsic::riscv_vleff:
2381 case Intrinsic::riscv_vleff_mask: {
2382 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2383
2384 MVT VT = Node->getSimpleValueType(0);
2385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2386
2387 unsigned CurOp = 2;
2389 Operands.push_back(Node->getOperand(CurOp++));
2390 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2391 /*IsStridedOrIndexed*/ false, Operands,
2392 /*IsLoad=*/true);
2393
2395 const RISCV::VLEPseudo *P =
2396 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2397 Log2SEW, static_cast<unsigned>(LMUL));
2399 P->Pseudo, DL, Node->getVTList(), Operands);
2400 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2401
2402 ReplaceNode(Node, Load);
2403 return;
2404 }
2405 case Intrinsic::riscv_nds_vln:
2406 case Intrinsic::riscv_nds_vln_mask:
2407 case Intrinsic::riscv_nds_vlnu:
2408 case Intrinsic::riscv_nds_vlnu_mask: {
2409 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2410 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2411 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2412 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2413
2414 MVT VT = Node->getSimpleValueType(0);
2415 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2416 unsigned CurOp = 2;
2418
2419 Operands.push_back(Node->getOperand(CurOp++));
2420 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2421 /*IsStridedOrIndexed=*/false, Operands,
2422 /*IsLoad=*/true);
2423
2425 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2426 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2427 MachineSDNode *Load =
2428 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2429
2430 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2431 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2432
2433 ReplaceNode(Node, Load);
2434 return;
2435 }
2436 }
2437 break;
2438 }
2439 case ISD::INTRINSIC_VOID: {
2440 unsigned IntNo = Node->getConstantOperandVal(1);
2441 switch (IntNo) {
2442 case Intrinsic::riscv_vsseg2:
2443 case Intrinsic::riscv_vsseg3:
2444 case Intrinsic::riscv_vsseg4:
2445 case Intrinsic::riscv_vsseg5:
2446 case Intrinsic::riscv_vsseg6:
2447 case Intrinsic::riscv_vsseg7:
2448 case Intrinsic::riscv_vsseg8: {
2449 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2450 /*IsStrided*/ false);
2451 return;
2452 }
2453 case Intrinsic::riscv_vsseg2_mask:
2454 case Intrinsic::riscv_vsseg3_mask:
2455 case Intrinsic::riscv_vsseg4_mask:
2456 case Intrinsic::riscv_vsseg5_mask:
2457 case Intrinsic::riscv_vsseg6_mask:
2458 case Intrinsic::riscv_vsseg7_mask:
2459 case Intrinsic::riscv_vsseg8_mask: {
2460 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2461 /*IsStrided*/ false);
2462 return;
2463 }
2464 case Intrinsic::riscv_vssseg2:
2465 case Intrinsic::riscv_vssseg3:
2466 case Intrinsic::riscv_vssseg4:
2467 case Intrinsic::riscv_vssseg5:
2468 case Intrinsic::riscv_vssseg6:
2469 case Intrinsic::riscv_vssseg7:
2470 case Intrinsic::riscv_vssseg8: {
2471 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2472 /*IsStrided*/ true);
2473 return;
2474 }
2475 case Intrinsic::riscv_vssseg2_mask:
2476 case Intrinsic::riscv_vssseg3_mask:
2477 case Intrinsic::riscv_vssseg4_mask:
2478 case Intrinsic::riscv_vssseg5_mask:
2479 case Intrinsic::riscv_vssseg6_mask:
2480 case Intrinsic::riscv_vssseg7_mask:
2481 case Intrinsic::riscv_vssseg8_mask: {
2482 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2483 /*IsStrided*/ true);
2484 return;
2485 }
2486 case Intrinsic::riscv_vsoxseg2:
2487 case Intrinsic::riscv_vsoxseg3:
2488 case Intrinsic::riscv_vsoxseg4:
2489 case Intrinsic::riscv_vsoxseg5:
2490 case Intrinsic::riscv_vsoxseg6:
2491 case Intrinsic::riscv_vsoxseg7:
2492 case Intrinsic::riscv_vsoxseg8:
2493 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2494 /*IsOrdered*/ true);
2495 return;
2496 case Intrinsic::riscv_vsuxseg2:
2497 case Intrinsic::riscv_vsuxseg3:
2498 case Intrinsic::riscv_vsuxseg4:
2499 case Intrinsic::riscv_vsuxseg5:
2500 case Intrinsic::riscv_vsuxseg6:
2501 case Intrinsic::riscv_vsuxseg7:
2502 case Intrinsic::riscv_vsuxseg8:
2503 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2504 /*IsOrdered*/ false);
2505 return;
2506 case Intrinsic::riscv_vsoxseg2_mask:
2507 case Intrinsic::riscv_vsoxseg3_mask:
2508 case Intrinsic::riscv_vsoxseg4_mask:
2509 case Intrinsic::riscv_vsoxseg5_mask:
2510 case Intrinsic::riscv_vsoxseg6_mask:
2511 case Intrinsic::riscv_vsoxseg7_mask:
2512 case Intrinsic::riscv_vsoxseg8_mask:
2513 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2514 /*IsOrdered*/ true);
2515 return;
2516 case Intrinsic::riscv_vsuxseg2_mask:
2517 case Intrinsic::riscv_vsuxseg3_mask:
2518 case Intrinsic::riscv_vsuxseg4_mask:
2519 case Intrinsic::riscv_vsuxseg5_mask:
2520 case Intrinsic::riscv_vsuxseg6_mask:
2521 case Intrinsic::riscv_vsuxseg7_mask:
2522 case Intrinsic::riscv_vsuxseg8_mask:
2523 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2524 /*IsOrdered*/ false);
2525 return;
2526 case Intrinsic::riscv_vsoxei:
2527 case Intrinsic::riscv_vsoxei_mask:
2528 case Intrinsic::riscv_vsuxei:
2529 case Intrinsic::riscv_vsuxei_mask: {
2530 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2531 IntNo == Intrinsic::riscv_vsuxei_mask;
2532 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2533 IntNo == Intrinsic::riscv_vsoxei_mask;
2534
2535 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2536 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2537
2538 unsigned CurOp = 2;
2540 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2541
2542 MVT IndexVT;
2543 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2544 /*IsStridedOrIndexed*/ true, Operands,
2545 /*IsLoad=*/false, &IndexVT);
2546
2548 "Element count mismatch");
2549
2552 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2553 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2554 report_fatal_error("The V extension does not support EEW=64 for index "
2555 "values when XLEN=32");
2556 }
2557 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2558 IsMasked, IsOrdered, IndexLog2EEW,
2559 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2560 MachineSDNode *Store =
2561 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2562
2563 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2564
2565 ReplaceNode(Node, Store);
2566 return;
2567 }
2568 case Intrinsic::riscv_vsm:
2569 case Intrinsic::riscv_vse:
2570 case Intrinsic::riscv_vse_mask:
2571 case Intrinsic::riscv_vsse:
2572 case Intrinsic::riscv_vsse_mask: {
2573 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2574 IntNo == Intrinsic::riscv_vsse_mask;
2575 bool IsStrided =
2576 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2577
2578 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2579 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2580
2581 unsigned CurOp = 2;
2583 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2584
2585 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2586 Operands);
2587
2589 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2590 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2591 MachineSDNode *Store =
2592 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2593 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2594
2595 ReplaceNode(Node, Store);
2596 return;
2597 }
2598 case Intrinsic::riscv_sf_vc_x_se:
2599 case Intrinsic::riscv_sf_vc_i_se:
2600 selectSF_VC_X_SE(Node);
2601 return;
2602 }
2603 break;
2604 }
2605 case ISD::BITCAST: {
2606 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2607 // Just drop bitcasts between vectors if both are fixed or both are
2608 // scalable.
2609 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2610 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2611 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2612 CurDAG->RemoveDeadNode(Node);
2613 return;
2614 }
2615 break;
2616 }
2618 case RISCVISD::TUPLE_INSERT: {
2619 SDValue V = Node->getOperand(0);
2620 SDValue SubV = Node->getOperand(1);
2621 SDLoc DL(SubV);
2622 auto Idx = Node->getConstantOperandVal(2);
2623 MVT SubVecVT = SubV.getSimpleValueType();
2624
2625 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2626 MVT SubVecContainerVT = SubVecVT;
2627 // Establish the correct scalable-vector types for any fixed-length type.
2628 if (SubVecVT.isFixedLengthVector()) {
2629 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2631 [[maybe_unused]] bool ExactlyVecRegSized =
2632 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2633 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2634 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2635 .getKnownMinValue()));
2636 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2637 }
2638 MVT ContainerVT = VT;
2639 if (VT.isFixedLengthVector())
2640 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2641
2642 const auto *TRI = Subtarget->getRegisterInfo();
2643 unsigned SubRegIdx;
2644 std::tie(SubRegIdx, Idx) =
2646 ContainerVT, SubVecContainerVT, Idx, TRI);
2647
2648 // If the Idx hasn't been completely eliminated then this is a subvector
2649 // insert which doesn't naturally align to a vector register. These must
2650 // be handled using instructions to manipulate the vector registers.
2651 if (Idx != 0)
2652 break;
2653
2654 RISCVVType::VLMUL SubVecLMUL =
2655 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2656 [[maybe_unused]] bool IsSubVecPartReg =
2657 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2658 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2659 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2660 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2661 V.isUndef()) &&
2662 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2663 "the subvector is smaller than a full-sized register");
2664
2665 // If we haven't set a SubRegIdx, then we must be going between
2666 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2667 if (SubRegIdx == RISCV::NoSubRegister) {
2668 unsigned InRegClassID =
2671 InRegClassID &&
2672 "Unexpected subvector extraction");
2673 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2674 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2675 DL, VT, SubV, RC);
2676 ReplaceNode(Node, NewNode);
2677 return;
2678 }
2679
2680 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2681 ReplaceNode(Node, Insert.getNode());
2682 return;
2683 }
2685 case RISCVISD::TUPLE_EXTRACT: {
2686 SDValue V = Node->getOperand(0);
2687 auto Idx = Node->getConstantOperandVal(1);
2688 MVT InVT = V.getSimpleValueType();
2689 SDLoc DL(V);
2690
2691 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2692 MVT SubVecContainerVT = VT;
2693 // Establish the correct scalable-vector types for any fixed-length type.
2694 if (VT.isFixedLengthVector()) {
2695 assert(Idx == 0);
2696 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2697 }
2698 if (InVT.isFixedLengthVector())
2699 InVT = TLI.getContainerForFixedLengthVector(InVT);
2700
2701 const auto *TRI = Subtarget->getRegisterInfo();
2702 unsigned SubRegIdx;
2703 std::tie(SubRegIdx, Idx) =
2705 InVT, SubVecContainerVT, Idx, TRI);
2706
2707 // If the Idx hasn't been completely eliminated then this is a subvector
2708 // extract which doesn't naturally align to a vector register. These must
2709 // be handled using instructions to manipulate the vector registers.
2710 if (Idx != 0)
2711 break;
2712
2713 // If we haven't set a SubRegIdx, then we must be going between
2714 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2715 if (SubRegIdx == RISCV::NoSubRegister) {
2716 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2718 InRegClassID &&
2719 "Unexpected subvector extraction");
2720 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2721 SDNode *NewNode =
2722 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2723 ReplaceNode(Node, NewNode);
2724 return;
2725 }
2726
2727 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2728 ReplaceNode(Node, Extract.getNode());
2729 return;
2730 }
2731 case RISCVISD::VMV_S_X_VL:
2732 case RISCVISD::VFMV_S_F_VL:
2733 case RISCVISD::VMV_V_X_VL:
2734 case RISCVISD::VFMV_V_F_VL: {
2735 // Try to match splat of a scalar load to a strided load with stride of x0.
2736 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2737 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2738 if (!Node->getOperand(0).isUndef())
2739 break;
2740 SDValue Src = Node->getOperand(1);
2741 auto *Ld = dyn_cast<LoadSDNode>(Src);
2742 // Can't fold load update node because the second
2743 // output is used so that load update node can't be removed.
2744 if (!Ld || Ld->isIndexed())
2745 break;
2746 EVT MemVT = Ld->getMemoryVT();
2747 // The memory VT should be the same size as the element type.
2748 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2749 break;
2750 if (!IsProfitableToFold(Src, Node, Node) ||
2751 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2752 break;
2753
2754 SDValue VL;
2755 if (IsScalarMove) {
2756 // We could deal with more VL if we update the VSETVLI insert pass to
2757 // avoid introducing more VSETVLI.
2758 if (!isOneConstant(Node->getOperand(2)))
2759 break;
2760 selectVLOp(Node->getOperand(2), VL);
2761 } else
2762 selectVLOp(Node->getOperand(2), VL);
2763
2764 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2765 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2766
2767 // If VL=1, then we don't need to do a strided load and can just do a
2768 // regular load.
2769 bool IsStrided = !isOneConstant(VL);
2770
2771 // Only do a strided load if we have optimized zero-stride vector load.
2772 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2773 break;
2774
2776 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2777 Ld->getBasePtr()};
2778 if (IsStrided)
2779 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2781 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2782 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2783
2785 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2786 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2787 Log2SEW, static_cast<unsigned>(LMUL));
2788 MachineSDNode *Load =
2789 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2790 // Update the chain.
2791 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2792 // Record the mem-refs
2793 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2794 // Replace the splat with the vlse.
2795 ReplaceNode(Node, Load);
2796 return;
2797 }
2798 case ISD::PREFETCH:
2799 unsigned Locality = Node->getConstantOperandVal(3);
2800 if (Locality > 2)
2801 break;
2802
2803 auto *LoadStoreMem = cast<MemSDNode>(Node);
2804 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2806
2807 int NontemporalLevel = 0;
2808 switch (Locality) {
2809 case 0:
2810 NontemporalLevel = 3; // NTL.ALL
2811 break;
2812 case 1:
2813 NontemporalLevel = 1; // NTL.PALL
2814 break;
2815 case 2:
2816 NontemporalLevel = 0; // NTL.P1
2817 break;
2818 default:
2819 llvm_unreachable("unexpected locality value.");
2820 }
2821
2822 if (NontemporalLevel & 0b1)
2824 if (NontemporalLevel & 0b10)
2826 break;
2827 }
2828
2829 // Select the default instruction.
2830 SelectCode(Node);
2831}
2832
2834 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2835 std::vector<SDValue> &OutOps) {
2836 // Always produce a register and immediate operand, as expected by
2837 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2838 switch (ConstraintID) {
2841 SDValue Op0, Op1;
2842 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2843 assert(Found && "SelectAddrRegImm should always succeed");
2844 OutOps.push_back(Op0);
2845 OutOps.push_back(Op1);
2846 return false;
2847 }
2849 OutOps.push_back(Op);
2850 OutOps.push_back(
2851 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2852 return false;
2853 default:
2854 report_fatal_error("Unexpected asm memory constraint " +
2855 InlineAsm::getMemConstraintName(ConstraintID));
2856 }
2857
2858 return true;
2859}
2860
2862 SDValue &Offset) {
2863 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2864 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2865 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2866 return true;
2867 }
2868
2869 return false;
2870}
2871
2872// Fold constant addresses.
2873static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2874 const MVT VT, const RISCVSubtarget *Subtarget,
2876 bool IsPrefetch = false) {
2877 if (!isa<ConstantSDNode>(Addr))
2878 return false;
2879
2880 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2881
2882 // If the constant is a simm12, we can fold the whole constant and use X0 as
2883 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2884 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2885 int64_t Lo12 = SignExtend64<12>(CVal);
2886 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2887 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2888 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2889 return false;
2890 if (Hi) {
2891 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2892 Base = SDValue(
2893 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2894 CurDAG->getTargetConstant(Hi20, DL, VT)),
2895 0);
2896 } else {
2897 Base = CurDAG->getRegister(RISCV::X0, VT);
2898 }
2899 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2900 return true;
2901 }
2902
2903 // Ask how constant materialization would handle this constant.
2904 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2905
2906 // If the last instruction would be an ADDI, we can fold its immediate and
2907 // emit the rest of the sequence as the base.
2908 if (Seq.back().getOpcode() != RISCV::ADDI)
2909 return false;
2910 Lo12 = Seq.back().getImm();
2911 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2912 return false;
2913
2914 // Drop the last instruction.
2915 Seq.pop_back();
2916 assert(!Seq.empty() && "Expected more instructions in sequence");
2917
2918 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2919 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2920 return true;
2921}
2922
2923// Is this ADD instruction only used as the base pointer of scalar loads and
2924// stores?
2926 for (auto *User : Add->users()) {
2927 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2928 User->getOpcode() != RISCVISD::LD_RV32 &&
2929 User->getOpcode() != RISCVISD::SD_RV32 &&
2930 User->getOpcode() != ISD::ATOMIC_LOAD &&
2931 User->getOpcode() != ISD::ATOMIC_STORE)
2932 return false;
2933 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2934 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2935 VT != MVT::f64)
2936 return false;
2937 // Don't allow stores of the value. It must be used as the address.
2938 if (User->getOpcode() == ISD::STORE &&
2939 cast<StoreSDNode>(User)->getValue() == Add)
2940 return false;
2941 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2942 cast<AtomicSDNode>(User)->getVal() == Add)
2943 return false;
2944 if (User->getOpcode() == RISCVISD::SD_RV32 &&
2945 (User->getOperand(0) == Add || User->getOperand(1) == Add))
2946 return false;
2947 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
2948 return false;
2949 }
2950
2951 return true;
2952}
2953
2955 SDValue &Offset) {
2957 return true;
2958
2959 SDLoc DL(Addr);
2960 MVT VT = Addr.getSimpleValueType();
2961
2962 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2963 Base = Addr.getOperand(0);
2964 Offset = Addr.getOperand(1);
2965 return true;
2966 }
2967
2969 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2970 if (isInt<12>(CVal)) {
2971 Base = Addr.getOperand(0);
2972 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2973 SDValue LoOperand = Base.getOperand(1);
2974 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2975 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2976 // (its low part, really), then we can rely on the alignment of that
2977 // variable to provide a margin of safety before low part can overflow
2978 // the 12 bits of the load/store offset. Check if CVal falls within
2979 // that margin; if so (low part + CVal) can't overflow.
2980 const DataLayout &DL = CurDAG->getDataLayout();
2981 Align Alignment = commonAlignment(
2982 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2983 if ((CVal == 0 || Alignment > CVal)) {
2984 int64_t CombinedOffset = CVal + GA->getOffset();
2985 Base = Base.getOperand(0);
2987 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2988 CombinedOffset, GA->getTargetFlags());
2989 return true;
2990 }
2991 }
2992 }
2993
2994 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2995 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2997 return true;
2998 }
2999 }
3000
3001 // Handle ADD with large immediates.
3002 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3003 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3004 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3005
3006 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3007 // an ADDI for part of the offset and fold the rest into the load/store.
3008 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3009 if (CVal >= -4096 && CVal <= 4094) {
3010 int64_t Adj = CVal < 0 ? -2048 : 2047;
3011 Base = SDValue(
3012 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3013 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3014 0);
3015 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3016 return true;
3017 }
3018
3019 // For larger immediates, we might be able to save one instruction from
3020 // constant materialization by folding the Lo12 bits of the immediate into
3021 // the address. We should only do this if the ADD is only used by loads and
3022 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3023 // separately with the full materialized immediate creating extra
3024 // instructions.
3025 if (isWorthFoldingAdd(Addr) &&
3026 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3027 Offset, /*IsPrefetch=*/false)) {
3028 // Insert an ADD instruction with the materialized Hi52 bits.
3029 Base = SDValue(
3030 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3031 0);
3032 return true;
3033 }
3034 }
3035
3036 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3037 /*IsPrefetch=*/false))
3038 return true;
3039
3040 Base = Addr;
3041 Offset = CurDAG->getTargetConstant(0, DL, VT);
3042 return true;
3043}
3044
3045/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3047 SDValue &Offset) {
3049 return true;
3050
3051 SDLoc DL(Addr);
3052 MVT VT = Addr.getSimpleValueType();
3053
3055 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3056 if (isUInt<9>(CVal)) {
3057 Base = Addr.getOperand(0);
3058
3059 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3060 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3062 return true;
3063 }
3064 }
3065
3066 Base = Addr;
3067 Offset = CurDAG->getTargetConstant(0, DL, VT);
3068 return true;
3069}
3070
3071/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3072/// Offset should be all zeros.
3074 SDValue &Offset) {
3076 return true;
3077
3078 SDLoc DL(Addr);
3079 MVT VT = Addr.getSimpleValueType();
3080
3082 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3083 if (isInt<12>(CVal)) {
3084 Base = Addr.getOperand(0);
3085
3086 // Early-out if not a valid offset.
3087 if ((CVal & 0b11111) != 0) {
3088 Base = Addr;
3089 Offset = CurDAG->getTargetConstant(0, DL, VT);
3090 return true;
3091 }
3092
3093 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3094 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3096 return true;
3097 }
3098 }
3099
3100 // Handle ADD with large immediates.
3101 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3102 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3103 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3104
3105 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3106 // one instruction by folding adjustment (-2048 or 2016) into the address.
3107 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3108 int64_t Adj = CVal < 0 ? -2048 : 2016;
3109 int64_t AdjustedOffset = CVal - Adj;
3110 Base =
3112 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3113 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3114 0);
3116 return true;
3117 }
3118
3119 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3120 Offset, /*IsPrefetch=*/true)) {
3121 // Insert an ADD instruction with the materialized Hi52 bits.
3122 Base = SDValue(
3123 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3124 0);
3125 return true;
3126 }
3127 }
3128
3129 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3130 /*IsPrefetch=*/true))
3131 return true;
3132
3133 Base = Addr;
3134 Offset = CurDAG->getTargetConstant(0, DL, VT);
3135 return true;
3136}
3137
3138/// Return true if this a load/store that we have a RegRegScale instruction for.
3140 const RISCVSubtarget &Subtarget) {
3141 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3142 return false;
3143 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3144 if (!(VT.isScalarInteger() &&
3145 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3146 !((VT == MVT::f32 || VT == MVT::f64) &&
3147 Subtarget.hasVendorXTHeadFMemIdx()))
3148 return false;
3149 // Don't allow stores of the value. It must be used as the address.
3150 if (User->getOpcode() == ISD::STORE &&
3151 cast<StoreSDNode>(User)->getValue() == Add)
3152 return false;
3153
3154 return true;
3155}
3156
3157/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3158/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3159/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3160/// single addi and we don't have a SHXADD instruction we could use.
3161/// FIXME: May still need to check how many and what kind of users the SHL has.
3163 SDValue Add,
3164 SDValue Shift = SDValue()) {
3165 bool FoundADDI = false;
3166 for (auto *User : Add->users()) {
3167 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3168 continue;
3169
3170 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3171 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3172 !isa<ConstantSDNode>(User->getOperand(1)) ||
3173 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3174 return false;
3175
3176 FoundADDI = true;
3177
3178 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3179 assert(Shift.getOpcode() == ISD::SHL);
3180 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3181 if ((ShiftAmt <= 3 &&
3182 (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
3183 (ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
3184 return false;
3185
3186 // All users of the ADDI should be load/store.
3187 for (auto *ADDIUser : User->users())
3188 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3189 return false;
3190 }
3191
3192 return true;
3193}
3194
3196 unsigned MaxShiftAmount,
3197 SDValue &Base, SDValue &Index,
3198 SDValue &Scale) {
3199 if (Addr.getOpcode() != ISD::ADD)
3200 return false;
3201 SDValue LHS = Addr.getOperand(0);
3202 SDValue RHS = Addr.getOperand(1);
3203
3204 EVT VT = Addr.getSimpleValueType();
3205 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3206 SDValue &Shift) {
3207 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3208 return false;
3209
3210 // Only match shifts by a value in range [0, MaxShiftAmount].
3211 unsigned ShiftAmt = N.getConstantOperandVal(1);
3212 if (ShiftAmt > MaxShiftAmount)
3213 return false;
3214
3215 Index = N.getOperand(0);
3216 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3217 return true;
3218 };
3219
3220 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3221 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3222 if (LHS.getOpcode() == ISD::ADD &&
3223 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3224 isInt<12>(C1->getSExtValue())) {
3225 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3226 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3227 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3228 SDLoc(Addr), VT);
3229 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3230 LHS.getOperand(0), C1Val),
3231 0);
3232 return true;
3233 }
3234
3235 // Add is commutative so we need to check both operands.
3236 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3237 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3238 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3239 SDLoc(Addr), VT);
3240 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3241 LHS.getOperand(1), C1Val),
3242 0);
3243 return true;
3244 }
3245 }
3246
3247 // Don't match add with constants.
3248 // FIXME: Is this profitable for large constants that have 0s in the lower
3249 // 12 bits that we can materialize with LUI?
3250 return false;
3251 }
3252
3253 // Try to match a shift on the RHS.
3254 if (SelectShl(RHS, Index, Scale)) {
3255 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3256 return false;
3257 Base = LHS;
3258 return true;
3259 }
3260
3261 // Try to match a shift on the LHS.
3262 if (SelectShl(LHS, Index, Scale)) {
3263 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3264 return false;
3265 Base = RHS;
3266 return true;
3267 }
3268
3269 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3270 return false;
3271
3272 Base = LHS;
3273 Index = RHS;
3274 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3275 return true;
3276}
3277
3279 unsigned MaxShiftAmount,
3280 unsigned Bits, SDValue &Base,
3281 SDValue &Index,
3282 SDValue &Scale) {
3283 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3284 return false;
3285
3286 if (Index.getOpcode() == ISD::AND) {
3287 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3288 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3289 Index = Index.getOperand(0);
3290 return true;
3291 }
3292 }
3293
3294 return false;
3295}
3296
3298 SDValue &Offset) {
3299 if (Addr.getOpcode() != ISD::ADD)
3300 return false;
3301
3302 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3303 return false;
3304
3305 Base = Addr.getOperand(0);
3306 Offset = Addr.getOperand(1);
3307 return true;
3308}
3309
3311 SDValue &ShAmt) {
3312 ShAmt = N;
3313
3314 // Peek through zext.
3315 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3316 ShAmt = ShAmt.getOperand(0);
3317
3318 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3319 // amount. If there is an AND on the shift amount, we can bypass it if it
3320 // doesn't affect any of those bits.
3321 if (ShAmt.getOpcode() == ISD::AND &&
3322 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3323 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3324
3325 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3326 // mask that covers the bits needed to represent all shift amounts.
3327 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3328 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3329
3330 if (ShMask.isSubsetOf(AndMask)) {
3331 ShAmt = ShAmt.getOperand(0);
3332 } else {
3333 // SimplifyDemandedBits may have optimized the mask so try restoring any
3334 // bits that are known zero.
3335 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3336 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3337 return true;
3338 ShAmt = ShAmt.getOperand(0);
3339 }
3340 }
3341
3342 if (ShAmt.getOpcode() == ISD::ADD &&
3343 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3344 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3345 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3346 // to avoid the ADD.
3347 if (Imm != 0 && Imm % ShiftWidth == 0) {
3348 ShAmt = ShAmt.getOperand(0);
3349 return true;
3350 }
3351 } else if (ShAmt.getOpcode() == ISD::SUB &&
3352 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3353 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3354 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3355 // generate a NEG instead of a SUB of a constant.
3356 if (Imm != 0 && Imm % ShiftWidth == 0) {
3357 SDLoc DL(ShAmt);
3358 EVT VT = ShAmt.getValueType();
3359 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3360 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3361 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3362 ShAmt.getOperand(1));
3363 ShAmt = SDValue(Neg, 0);
3364 return true;
3365 }
3366 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3367 // to generate a NOT instead of a SUB of a constant.
3368 if (Imm % ShiftWidth == ShiftWidth - 1) {
3369 SDLoc DL(ShAmt);
3370 EVT VT = ShAmt.getValueType();
3372 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3373 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3374 ShAmt = SDValue(Not, 0);
3375 return true;
3376 }
3377 }
3378
3379 return true;
3380}
3381
3382/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3383/// check for equality with 0. This function emits instructions that convert the
3384/// seteq/setne into something that can be compared with 0.
3385/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3386/// ISD::SETNE).
3388 SDValue &Val) {
3389 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3390 "Unexpected condition code!");
3391
3392 // We're looking for a setcc.
3393 if (N->getOpcode() != ISD::SETCC)
3394 return false;
3395
3396 // Must be an equality comparison.
3397 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3398 if (CCVal != ExpectedCCVal)
3399 return false;
3400
3401 SDValue LHS = N->getOperand(0);
3402 SDValue RHS = N->getOperand(1);
3403
3404 if (!LHS.getValueType().isScalarInteger())
3405 return false;
3406
3407 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3408 if (isNullConstant(RHS)) {
3409 Val = LHS;
3410 return true;
3411 }
3412
3413 SDLoc DL(N);
3414
3415 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3416 int64_t CVal = C->getSExtValue();
3417 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3418 // non-zero otherwise.
3419 if (CVal == -2048) {
3420 Val = SDValue(
3422 RISCV::XORI, DL, N->getValueType(0), LHS,
3423 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3424 0);
3425 return true;
3426 }
3427 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
3428 // LHS is equal to the RHS and non-zero otherwise.
3429 if (isInt<12>(CVal) || CVal == 2048) {
3430 Val = SDValue(
3432 RISCV::ADDI, DL, N->getValueType(0), LHS,
3433 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3434 0);
3435 return true;
3436 }
3437 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3438 Val = SDValue(
3440 RISCV::BINVI, DL, N->getValueType(0), LHS,
3441 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3442 0);
3443 return true;
3444 }
3445 // Same as the addi case above but for larger immediates (signed 26-bit) use
3446 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3447 // anything which can be done with a single lui as it might be compressible.
3448 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3449 (CVal & 0xFFF) != 0) {
3450 Val = SDValue(
3452 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3453 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3454 0);
3455 return true;
3456 }
3457 }
3458
3459 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3460 // equal and a non-zero value if they aren't.
3461 Val = SDValue(
3462 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3463 return true;
3464}
3465
3467 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3468 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3469 Val = N.getOperand(0);
3470 return true;
3471 }
3472
3473 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3474 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3475 return N;
3476
3477 SDValue N0 = N.getOperand(0);
3478 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3479 N.getConstantOperandVal(1) == ShiftAmt &&
3480 N0.getConstantOperandVal(1) == ShiftAmt)
3481 return N0.getOperand(0);
3482
3483 return N;
3484 };
3485
3486 MVT VT = N.getSimpleValueType();
3487 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3488 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3489 return true;
3490 }
3491
3492 return false;
3493}
3494
3496 if (N.getOpcode() == ISD::AND) {
3497 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3498 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3499 Val = N.getOperand(0);
3500 return true;
3501 }
3502 }
3503 MVT VT = N.getSimpleValueType();
3504 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3505 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3506 Val = N;
3507 return true;
3508 }
3509
3510 return false;
3511}
3512
3513/// Look for various patterns that can be done with a SHL that can be folded
3514/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3515/// SHXADD we are trying to match.
3517 SDValue &Val) {
3518 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3519 SDValue N0 = N.getOperand(0);
3520
3521 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3522 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3523 isa<ConstantSDNode>(N0.getOperand(1))) {
3524 uint64_t Mask = N.getConstantOperandVal(1);
3525 unsigned C2 = N0.getConstantOperandVal(1);
3526
3527 unsigned XLen = Subtarget->getXLen();
3528 if (LeftShift)
3529 Mask &= maskTrailingZeros<uint64_t>(C2);
3530 else
3531 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3532
3533 if (isShiftedMask_64(Mask)) {
3534 unsigned Leading = XLen - llvm::bit_width(Mask);
3535 unsigned Trailing = llvm::countr_zero(Mask);
3536 if (Trailing != ShAmt)
3537 return false;
3538
3539 unsigned Opcode;
3540 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3541 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3542 // followed by a SHXADD with c3 for the X amount.
3543 if (LeftShift && Leading == 0 && C2 < Trailing)
3544 Opcode = RISCV::SRLI;
3545 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3546 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3547 // followed by a SHXADD with c3 for the X amount.
3548 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3549 Opcode = RISCV::SRLIW;
3550 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3551 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3552 // followed by a SHXADD using c3 for the X amount.
3553 else if (!LeftShift && Leading == C2)
3554 Opcode = RISCV::SRLI;
3555 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3556 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3557 // followed by a SHXADD using c3 for the X amount.
3558 else if (!LeftShift && Leading == 32 + C2)
3559 Opcode = RISCV::SRLIW;
3560 else
3561 return false;
3562
3563 SDLoc DL(N);
3564 EVT VT = N.getValueType();
3565 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3566 Val = SDValue(
3567 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3568 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3569 0);
3570 return true;
3571 }
3572 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3573 isa<ConstantSDNode>(N0.getOperand(1))) {
3574 uint64_t Mask = N.getConstantOperandVal(1);
3575 unsigned C2 = N0.getConstantOperandVal(1);
3576
3577 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3578 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3579 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3580 // the X amount.
3581 if (isShiftedMask_64(Mask)) {
3582 unsigned XLen = Subtarget->getXLen();
3583 unsigned Leading = XLen - llvm::bit_width(Mask);
3584 unsigned Trailing = llvm::countr_zero(Mask);
3585 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3586 SDLoc DL(N);
3587 EVT VT = N.getValueType();
3589 RISCV::SRAI, DL, VT, N0.getOperand(0),
3590 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3591 0);
3593 RISCV::SRLI, DL, VT, Val,
3594 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3595 0);
3596 return true;
3597 }
3598 }
3599 }
3600 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3601 (LeftShift || N.getOpcode() == ISD::SRL) &&
3602 isa<ConstantSDNode>(N.getOperand(1))) {
3603 SDValue N0 = N.getOperand(0);
3604 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3605 isa<ConstantSDNode>(N0.getOperand(1))) {
3606 uint64_t Mask = N0.getConstantOperandVal(1);
3607 if (isShiftedMask_64(Mask)) {
3608 unsigned C1 = N.getConstantOperandVal(1);
3609 unsigned XLen = Subtarget->getXLen();
3610 unsigned Leading = XLen - llvm::bit_width(Mask);
3611 unsigned Trailing = llvm::countr_zero(Mask);
3612 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3613 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3614 if (LeftShift && Leading == 32 && Trailing > 0 &&
3615 (Trailing + C1) == ShAmt) {
3616 SDLoc DL(N);
3617 EVT VT = N.getValueType();
3619 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3620 CurDAG->getTargetConstant(Trailing, DL, VT)),
3621 0);
3622 return true;
3623 }
3624 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3625 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3626 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3627 (Trailing - C1) == ShAmt) {
3628 SDLoc DL(N);
3629 EVT VT = N.getValueType();
3631 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3632 CurDAG->getTargetConstant(Trailing, DL, VT)),
3633 0);
3634 return true;
3635 }
3636 }
3637 }
3638 }
3639
3640 return false;
3641}
3642
3643/// Look for various patterns that can be done with a SHL that can be folded
3644/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3645/// SHXADD_UW we are trying to match.
3647 SDValue &Val) {
3648 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3649 N.hasOneUse()) {
3650 SDValue N0 = N.getOperand(0);
3651 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3652 N0.hasOneUse()) {
3653 uint64_t Mask = N.getConstantOperandVal(1);
3654 unsigned C2 = N0.getConstantOperandVal(1);
3655
3656 Mask &= maskTrailingZeros<uint64_t>(C2);
3657
3658 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3659 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3660 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3661 if (isShiftedMask_64(Mask)) {
3662 unsigned Leading = llvm::countl_zero(Mask);
3663 unsigned Trailing = llvm::countr_zero(Mask);
3664 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3665 SDLoc DL(N);
3666 EVT VT = N.getValueType();
3668 RISCV::SLLI, DL, VT, N0.getOperand(0),
3669 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3670 0);
3671 return true;
3672 }
3673 }
3674 }
3675 }
3676
3677 return false;
3678}
3679
3681 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3682 if (N->getFlags().hasDisjoint())
3683 return true;
3684 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3685}
3686
3687bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3688 SDValue N, SDValue &Val) {
3689 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3690 /*CompressionCost=*/true);
3691 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3692 /*CompressionCost=*/true);
3693 if (OrigCost <= Cost)
3694 return false;
3695
3696 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3697 return true;
3698}
3699
3701 if (!isa<ConstantSDNode>(N))
3702 return false;
3703 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3704 if ((Imm >> 31) != 1)
3705 return false;
3706
3707 for (const SDNode *U : N->users()) {
3708 switch (U->getOpcode()) {
3709 case ISD::ADD:
3710 break;
3711 case ISD::OR:
3712 if (orDisjoint(U))
3713 break;
3714 return false;
3715 default:
3716 return false;
3717 }
3718 }
3719
3720 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3721}
3722
3724 if (!isa<ConstantSDNode>(N))
3725 return false;
3726 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3727 if (isInt<32>(Imm))
3728 return false;
3729
3730 for (const SDNode *U : N->users()) {
3731 switch (U->getOpcode()) {
3732 case ISD::ADD:
3733 break;
3734 case RISCVISD::VMV_V_X_VL:
3735 if (!all_of(U->users(), [](const SDNode *V) {
3736 return V->getOpcode() == ISD::ADD ||
3737 V->getOpcode() == RISCVISD::ADD_VL;
3738 }))
3739 return false;
3740 break;
3741 default:
3742 return false;
3743 }
3744 }
3745
3746 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3747}
3748
3750 if (!isa<ConstantSDNode>(N))
3751 return false;
3752 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3753
3754 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3755 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3756 return false;
3757
3758 // Abandon this transform if the constant is needed elsewhere.
3759 for (const SDNode *U : N->users()) {
3760 switch (U->getOpcode()) {
3761 case ISD::AND:
3762 case ISD::OR:
3763 case ISD::XOR:
3764 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3765 return false;
3766 break;
3767 case RISCVISD::VMV_V_X_VL:
3768 if (!Subtarget->hasStdExtZvkb())
3769 return false;
3770 if (!all_of(U->users(), [](const SDNode *V) {
3771 return V->getOpcode() == ISD::AND ||
3772 V->getOpcode() == RISCVISD::AND_VL;
3773 }))
3774 return false;
3775 break;
3776 default:
3777 return false;
3778 }
3779 }
3780
3781 if (isInt<32>(Imm)) {
3782 Val =
3783 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3784 return true;
3785 }
3786
3787 // For 64-bit constants, the instruction sequences get complex,
3788 // so we select inverted only if it's cheaper.
3789 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3790}
3791
3792static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3793 unsigned Bits,
3794 const TargetInstrInfo *TII) {
3795 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3796
3797 if (!MCOpcode)
3798 return false;
3799
3800 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3801 const uint64_t TSFlags = MCID.TSFlags;
3802 if (!RISCVII::hasSEWOp(TSFlags))
3803 return false;
3804 assert(RISCVII::hasVLOp(TSFlags));
3805
3806 unsigned ChainOpIdx = User->getNumOperands() - 1;
3807 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3808 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3809 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3810 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3811
3812 if (UserOpNo == VLIdx)
3813 return false;
3814
3815 auto NumDemandedBits =
3816 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3817 return NumDemandedBits && Bits >= *NumDemandedBits;
3818}
3819
3820// Return true if all users of this SDNode* only consume the lower \p Bits.
3821// This can be used to form W instructions for add/sub/mul/shl even when the
3822// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3823// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3824// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3825// the add/sub/mul/shl to become non-W instructions. By checking the users we
3826// may be able to use a W instruction and CSE with the other instruction if
3827// this has happened. We could try to detect that the CSE opportunity exists
3828// before doing this, but that would be more complicated.
3830 const unsigned Depth) const {
3831 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3832 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3833 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3834 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3835 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3836 isa<ConstantSDNode>(Node) || Depth != 0) &&
3837 "Unexpected opcode");
3838
3840 return false;
3841
3842 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3843 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3844 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3845 return false;
3846
3847 for (SDUse &Use : Node->uses()) {
3848 SDNode *User = Use.getUser();
3849 // Users of this node should have already been instruction selected
3850 if (!User->isMachineOpcode())
3851 return false;
3852
3853 // TODO: Add more opcodes?
3854 switch (User->getMachineOpcode()) {
3855 default:
3857 break;
3858 return false;
3859 case RISCV::ADDW:
3860 case RISCV::ADDIW:
3861 case RISCV::SUBW:
3862 case RISCV::MULW:
3863 case RISCV::SLLW:
3864 case RISCV::SLLIW:
3865 case RISCV::SRAW:
3866 case RISCV::SRAIW:
3867 case RISCV::SRLW:
3868 case RISCV::SRLIW:
3869 case RISCV::DIVW:
3870 case RISCV::DIVUW:
3871 case RISCV::REMW:
3872 case RISCV::REMUW:
3873 case RISCV::ROLW:
3874 case RISCV::RORW:
3875 case RISCV::RORIW:
3876 case RISCV::CLZW:
3877 case RISCV::CTZW:
3878 case RISCV::CPOPW:
3879 case RISCV::SLLI_UW:
3880 case RISCV::FMV_W_X:
3881 case RISCV::FCVT_H_W:
3882 case RISCV::FCVT_H_W_INX:
3883 case RISCV::FCVT_H_WU:
3884 case RISCV::FCVT_H_WU_INX:
3885 case RISCV::FCVT_S_W:
3886 case RISCV::FCVT_S_W_INX:
3887 case RISCV::FCVT_S_WU:
3888 case RISCV::FCVT_S_WU_INX:
3889 case RISCV::FCVT_D_W:
3890 case RISCV::FCVT_D_W_INX:
3891 case RISCV::FCVT_D_WU:
3892 case RISCV::FCVT_D_WU_INX:
3893 case RISCV::TH_REVW:
3894 case RISCV::TH_SRRIW:
3895 if (Bits >= 32)
3896 break;
3897 return false;
3898 case RISCV::SLL:
3899 case RISCV::SRA:
3900 case RISCV::SRL:
3901 case RISCV::ROL:
3902 case RISCV::ROR:
3903 case RISCV::BSET:
3904 case RISCV::BCLR:
3905 case RISCV::BINV:
3906 // Shift amount operands only use log2(Xlen) bits.
3907 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3908 break;
3909 return false;
3910 case RISCV::SLLI:
3911 // SLLI only uses the lower (XLen - ShAmt) bits.
3912 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3913 break;
3914 return false;
3915 case RISCV::ANDI:
3916 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3917 break;
3918 goto RecCheck;
3919 case RISCV::ORI: {
3920 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3921 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3922 break;
3923 [[fallthrough]];
3924 }
3925 case RISCV::AND:
3926 case RISCV::OR:
3927 case RISCV::XOR:
3928 case RISCV::XORI:
3929 case RISCV::ANDN:
3930 case RISCV::ORN:
3931 case RISCV::XNOR:
3932 case RISCV::SH1ADD:
3933 case RISCV::SH2ADD:
3934 case RISCV::SH3ADD:
3935 RecCheck:
3936 if (hasAllNBitUsers(User, Bits, Depth + 1))
3937 break;
3938 return false;
3939 case RISCV::SRLI: {
3940 unsigned ShAmt = User->getConstantOperandVal(1);
3941 // If we are shifting right by less than Bits, and users don't demand any
3942 // bits that were shifted into [Bits-1:0], then we can consider this as an
3943 // N-Bit user.
3944 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3945 break;
3946 return false;
3947 }
3948 case RISCV::SEXT_B:
3949 case RISCV::PACKH:
3950 if (Bits >= 8)
3951 break;
3952 return false;
3953 case RISCV::SEXT_H:
3954 case RISCV::FMV_H_X:
3955 case RISCV::ZEXT_H_RV32:
3956 case RISCV::ZEXT_H_RV64:
3957 case RISCV::PACKW:
3958 if (Bits >= 16)
3959 break;
3960 return false;
3961 case RISCV::PACK:
3962 if (Bits >= (Subtarget->getXLen() / 2))
3963 break;
3964 return false;
3965 case RISCV::ADD_UW:
3966 case RISCV::SH1ADD_UW:
3967 case RISCV::SH2ADD_UW:
3968 case RISCV::SH3ADD_UW:
3969 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3970 // 32 bits.
3971 if (Use.getOperandNo() == 0 && Bits >= 32)
3972 break;
3973 return false;
3974 case RISCV::SB:
3975 if (Use.getOperandNo() == 0 && Bits >= 8)
3976 break;
3977 return false;
3978 case RISCV::SH:
3979 if (Use.getOperandNo() == 0 && Bits >= 16)
3980 break;
3981 return false;
3982 case RISCV::SW:
3983 if (Use.getOperandNo() == 0 && Bits >= 32)
3984 break;
3985 return false;
3986 }
3987 }
3988
3989 return true;
3990}
3991
3992// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3994 SDValue &Shl2) {
3995 auto *C = dyn_cast<ConstantSDNode>(N);
3996 if (!C)
3997 return false;
3998
3999 int64_t Offset = C->getSExtValue();
4000 for (unsigned Shift = 0; Shift < 4; Shift++) {
4001 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4002 EVT VT = N->getValueType(0);
4003 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4004 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4005 return true;
4006 }
4007 }
4008
4009 return false;
4010}
4011
4012// Select VL as a 5 bit immediate or a value that will become a register. This
4013// allows us to choose between VSETIVLI or VSETVLI later.
4015 auto *C = dyn_cast<ConstantSDNode>(N);
4016 if (C && isUInt<5>(C->getZExtValue())) {
4017 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4018 N->getValueType(0));
4019 } else if (C && C->isAllOnes()) {
4020 // Treat all ones as VLMax.
4022 N->getValueType(0));
4023 } else if (isa<RegisterSDNode>(N) &&
4024 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4025 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4026 // as the register class. Convert X0 to a special immediate to pass the
4027 // MachineVerifier. This is recognized specially by the vsetvli insertion
4028 // pass.
4030 N->getValueType(0));
4031 } else {
4032 VL = N;
4033 }
4034
4035 return true;
4036}
4037
4039 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4040 if (!N.getOperand(0).isUndef())
4041 return SDValue();
4042 N = N.getOperand(1);
4043 }
4044 SDValue Splat = N;
4045 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4046 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4047 !Splat.getOperand(0).isUndef())
4048 return SDValue();
4049 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4050 return Splat;
4051}
4052
4055 if (!Splat)
4056 return false;
4057
4058 SplatVal = Splat.getOperand(1);
4059 return true;
4060}
4061
4063 SelectionDAG &DAG,
4064 const RISCVSubtarget &Subtarget,
4065 std::function<bool(int64_t)> ValidateImm,
4066 bool Decrement = false) {
4068 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4069 return false;
4070
4071 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4072 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4073 "Unexpected splat operand type");
4074
4075 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4076 // type is wider than the resulting vector element type: an implicit
4077 // truncation first takes place. Therefore, perform a manual
4078 // truncation/sign-extension in order to ignore any truncated bits and catch
4079 // any zero-extended immediate.
4080 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4081 // sign-extending to (XLenVT -1).
4082 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4083
4084 int64_t SplatImm = SplatConst.getSExtValue();
4085
4086 if (!ValidateImm(SplatImm))
4087 return false;
4088
4089 if (Decrement)
4090 SplatImm -= 1;
4091
4092 SplatVal =
4093 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4094 return true;
4095}
4096
4098 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4099 [](int64_t Imm) { return isInt<5>(Imm); });
4100}
4101
4103 return selectVSplatImmHelper(
4104 N, SplatVal, *CurDAG, *Subtarget,
4105 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4106 /*Decrement=*/true);
4107}
4108
4110 return selectVSplatImmHelper(
4111 N, SplatVal, *CurDAG, *Subtarget,
4112 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4113 /*Decrement=*/false);
4114}
4115
4117 SDValue &SplatVal) {
4118 return selectVSplatImmHelper(
4119 N, SplatVal, *CurDAG, *Subtarget,
4120 [](int64_t Imm) {
4121 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
4122 },
4123 /*Decrement=*/true);
4124}
4125
4127 SDValue &SplatVal) {
4128 return selectVSplatImmHelper(
4129 N, SplatVal, *CurDAG, *Subtarget,
4130 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4131}
4132
4135 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4136}
4137
4139 auto IsExtOrTrunc = [](SDValue N) {
4140 switch (N->getOpcode()) {
4141 case ISD::SIGN_EXTEND:
4142 case ISD::ZERO_EXTEND:
4143 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4144 // inactive elements will be undef.
4145 case RISCVISD::TRUNCATE_VECTOR_VL:
4146 case RISCVISD::VSEXT_VL:
4147 case RISCVISD::VZEXT_VL:
4148 return true;
4149 default:
4150 return false;
4151 }
4152 };
4153
4154 // We can have multiple nested nodes, so unravel them all if needed.
4155 while (IsExtOrTrunc(N)) {
4156 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4157 return false;
4158 N = N->getOperand(0);
4159 }
4160
4161 return selectVSplat(N, SplatVal);
4162}
4163
4165 // Allow bitcasts from XLenVT -> FP.
4166 if (N.getOpcode() == ISD::BITCAST &&
4167 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4168 Imm = N.getOperand(0);
4169 return true;
4170 }
4171 // Allow moves from XLenVT to FP.
4172 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4173 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4174 Imm = N.getOperand(0);
4175 return true;
4176 }
4177
4178 // Otherwise, look for FP constants that can materialized with scalar int.
4179 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
4180 if (!CFP)
4181 return false;
4182 const APFloat &APF = CFP->getValueAPF();
4183 // td can handle +0.0 already.
4184 if (APF.isPosZero())
4185 return false;
4186
4187 MVT VT = CFP->getSimpleValueType(0);
4188
4189 MVT XLenVT = Subtarget->getXLenVT();
4190 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4191 assert(APF.isNegZero() && "Unexpected constant.");
4192 return false;
4193 }
4194 SDLoc DL(N);
4195 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4196 *Subtarget);
4197 return true;
4198}
4199
4201 SDValue &Imm) {
4202 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4203 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4204
4205 if (!isInt<5>(ImmVal))
4206 return false;
4207
4208 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4209 Subtarget->getXLenVT());
4210 return true;
4211 }
4212
4213 return false;
4214}
4215
4216// Try to remove sext.w if the input is a W instruction or can be made into
4217// a W instruction cheaply.
4218bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4219 // Look for the sext.w pattern, addiw rd, rs1, 0.
4220 if (N->getMachineOpcode() != RISCV::ADDIW ||
4221 !isNullConstant(N->getOperand(1)))
4222 return false;
4223
4224 SDValue N0 = N->getOperand(0);
4225 if (!N0.isMachineOpcode())
4226 return false;
4227
4228 switch (N0.getMachineOpcode()) {
4229 default:
4230 break;
4231 case RISCV::ADD:
4232 case RISCV::ADDI:
4233 case RISCV::SUB:
4234 case RISCV::MUL:
4235 case RISCV::SLLI: {
4236 // Convert sext.w+add/sub/mul to their W instructions. This will create
4237 // a new independent instruction. This improves latency.
4238 unsigned Opc;
4239 switch (N0.getMachineOpcode()) {
4240 default:
4241 llvm_unreachable("Unexpected opcode!");
4242 case RISCV::ADD: Opc = RISCV::ADDW; break;
4243 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4244 case RISCV::SUB: Opc = RISCV::SUBW; break;
4245 case RISCV::MUL: Opc = RISCV::MULW; break;
4246 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4247 }
4248
4249 SDValue N00 = N0.getOperand(0);
4250 SDValue N01 = N0.getOperand(1);
4251
4252 // Shift amount needs to be uimm5.
4253 if (N0.getMachineOpcode() == RISCV::SLLI &&
4254 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4255 break;
4256
4257 SDNode *Result =
4258 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4259 N00, N01);
4260 ReplaceUses(N, Result);
4261 return true;
4262 }
4263 case RISCV::ADDW:
4264 case RISCV::ADDIW:
4265 case RISCV::SUBW:
4266 case RISCV::MULW:
4267 case RISCV::SLLIW:
4268 case RISCV::PACKW:
4269 case RISCV::TH_MULAW:
4270 case RISCV::TH_MULAH:
4271 case RISCV::TH_MULSW:
4272 case RISCV::TH_MULSH:
4273 if (N0.getValueType() == MVT::i32)
4274 break;
4275
4276 // Result is already sign extended just remove the sext.w.
4277 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4278 ReplaceUses(N, N0.getNode());
4279 return true;
4280 }
4281
4282 return false;
4283}
4284
4285static bool usesAllOnesMask(SDValue MaskOp) {
4286 const auto IsVMSet = [](unsigned Opc) {
4287 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4288 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4289 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4290 Opc == RISCV::PseudoVMSET_M_B8;
4291 };
4292
4293 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4294 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4295 // assume that it's all-ones? Same applies to its VL.
4296 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4297}
4298
4299static bool isImplicitDef(SDValue V) {
4300 if (!V.isMachineOpcode())
4301 return false;
4302 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4303 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4304 if (!isImplicitDef(V.getOperand(I)))
4305 return false;
4306 return true;
4307 }
4308 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4309}
4310
4311// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4312// corresponding "unmasked" pseudo versions.
4313bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4315 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4316 if (!I)
4317 return false;
4318
4319 unsigned MaskOpIdx = I->MaskOpIdx;
4320 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4321 return false;
4322
4323 // There are two classes of pseudos in the table - compares and
4324 // everything else. See the comment on RISCVMaskedPseudo for details.
4325 const unsigned Opc = I->UnmaskedPseudo;
4326 const MCInstrDesc &MCID = TII->get(Opc);
4327 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4328
4329 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4330 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4331
4332 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4334 "Unmasked pseudo has policy but masked pseudo doesn't?");
4335 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4336 "Unexpected pseudo structure");
4337 assert(!(HasPassthru && !MaskedHasPassthru) &&
4338 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4339
4341 // Skip the passthru operand at index 0 if the unmasked don't have one.
4342 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4343 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4344 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4345 bool HasChainOp =
4346 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4347 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4348 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4349 // Skip the mask
4350 SDValue Op = N->getOperand(I);
4351 if (I == MaskOpIdx)
4352 continue;
4353 if (DropPolicy && I == LastOpNum)
4354 continue;
4355 Ops.push_back(Op);
4356 }
4357
4359 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4360
4361 if (!N->memoperands_empty())
4362 CurDAG->setNodeMemRefs(Result, N->memoperands());
4363
4364 Result->setFlags(N->getFlags());
4365 ReplaceUses(N, Result);
4366
4367 return true;
4368}
4369
4370/// If our passthru is an implicit_def, use noreg instead. This side
4371/// steps issues with MachineCSE not being able to CSE expressions with
4372/// IMPLICIT_DEF operands while preserving the semantic intent. See
4373/// pr64282 for context. Note that this transform is the last one
4374/// performed at ISEL DAG to DAG.
4375bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4376 bool MadeChange = false;
4378
4379 while (Position != CurDAG->allnodes_begin()) {
4380 SDNode *N = &*--Position;
4381 if (N->use_empty() || !N->isMachineOpcode())
4382 continue;
4383
4384 const unsigned Opc = N->getMachineOpcode();
4385 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4387 !isImplicitDef(N->getOperand(0)))
4388 continue;
4389
4391 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4392 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4393 SDValue Op = N->getOperand(I);
4394 Ops.push_back(Op);
4395 }
4396
4398 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4399 Result->setFlags(N->getFlags());
4400 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4401 ReplaceUses(N, Result);
4402 MadeChange = true;
4403 }
4404 return MadeChange;
4405}
4406
4407
4408// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4409// for instruction scheduling.
4411 CodeGenOptLevel OptLevel) {
4412 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4413}
4414
4416
4418 CodeGenOptLevel OptLevel)
4420 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4421
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
static bool usesAllOnesMask(SDValue MaskOp)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1445
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
bool isPosZero() const
Definition: APFloat.h:1460
bool isNegZero() const
Definition: APFloat.h:1461
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool trySignedBitfieldInsertInMask(SDNode *Node)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool tryBitfieldInsertOpFromXor(SDNode *Node)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:578
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:558
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:559
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:763
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:587
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:561
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:35
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
iterator_range< user_iterator > users()
Definition: Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1634
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1730
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:980
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
static const MachineMemOperand::Flags MONontemporalBit1
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:270
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:252
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
unsigned M1(unsigned Val)
Definition: VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
@ Add
Sum of integers.
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.