LLVM 21.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#include "RISCVGenSearchableTables.inc"
47} // namespace llvm::RISCV
48
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
118 MVT::i64, MPI, Align(8),
120 break;
121 }
122 case ISD::FP_EXTEND: {
123 // We only have vector patterns for riscv_fpextend_vl in isel.
124 SDLoc DL(N);
125 MVT VT = N->getSimpleValueType(0);
126 if (!VT.isVector())
127 break;
128 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
129 SDValue TrueMask = CurDAG->getNode(
130 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
131 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
132 TrueMask, VLMAX);
133 break;
134 }
135 }
136
137 if (Result) {
138 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
139 LLVM_DEBUG(N->dump(CurDAG));
140 LLVM_DEBUG(dbgs() << "\nNew: ");
141 LLVM_DEBUG(Result->dump(CurDAG));
142 LLVM_DEBUG(dbgs() << "\n");
143
145 MadeChange = true;
146 }
147 }
148
149 if (MadeChange)
151}
152
154 HandleSDNode Dummy(CurDAG->getRoot());
156
157 bool MadeChange = false;
158 while (Position != CurDAG->allnodes_begin()) {
159 SDNode *N = &*--Position;
160 // Skip dead nodes and any non-machine opcodes.
161 if (N->use_empty() || !N->isMachineOpcode())
162 continue;
163
164 MadeChange |= doPeepholeSExtW(N);
165
166 // FIXME: This is here only because the VMerge transform doesn't
167 // know how to handle masked true inputs. Once that has been moved
168 // to post-ISEL, this can be deleted as well.
169 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
170 }
171
172 CurDAG->setRoot(Dummy.getValue());
173
174 MadeChange |= doPeepholeMergeVVMFold();
175
176 // After we're done with everything else, convert IMPLICIT_DEF
177 // passthru operands to NoRegister. This is required to workaround
178 // an optimization deficiency in MachineCSE. This really should
179 // be merged back into each of the patterns (i.e. there's no good
180 // reason not to go directly to NoReg), but is being done this way
181 // to allow easy backporting.
182 MadeChange |= doPeepholeNoRegPassThru();
183
184 if (MadeChange)
186}
187
188static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
190 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
191 for (const RISCVMatInt::Inst &Inst : Seq) {
192 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
193 SDNode *Result = nullptr;
194 switch (Inst.getOpndKind()) {
195 case RISCVMatInt::Imm:
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
200 CurDAG->getRegister(RISCV::X0, VT));
201 break;
203 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
204 break;
206 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
207 break;
208 }
209
210 // Only the first instruction has X0 as its source.
211 SrcReg = SDValue(Result, 0);
212 }
213
214 return SrcReg;
215}
216
217static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
218 int64_t Imm, const RISCVSubtarget &Subtarget) {
220
221 // Use a rematerializable pseudo instruction for short sequences if enabled.
222 if (Seq.size() == 2 && UsePseudoMovImm)
223 return SDValue(
224 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
225 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
226 0);
227
228 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
229 // worst an LUI+ADDIW. This will require an extra register, but avoids a
230 // constant pool.
231 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
232 // low and high 32 bits are the same and bit 31 and 63 are set.
233 if (Seq.size() > 3) {
234 unsigned ShiftAmt, AddOpc;
236 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
237 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
238 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
239
240 SDValue SLLI = SDValue(
241 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
242 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
243 0);
244 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
245 }
246 }
247
248 // Otherwise, use the original sequence.
249 return selectImmSeq(CurDAG, DL, VT, Seq);
250}
251
253 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
254 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
255 bool IsLoad, MVT *IndexVT) {
256 SDValue Chain = Node->getOperand(0);
257
258 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
259
260 if (IsStridedOrIndexed) {
261 Operands.push_back(Node->getOperand(CurOp++)); // Index.
262 if (IndexVT)
263 *IndexVT = Operands.back()->getSimpleValueType(0);
264 }
265
266 if (IsMasked) {
267 SDValue Mask = Node->getOperand(CurOp++);
268 Operands.push_back(Mask);
269 }
270 SDValue VL;
271 selectVLOp(Node->getOperand(CurOp++), VL);
272 Operands.push_back(VL);
273
274 MVT XLenVT = Subtarget->getXLenVT();
275 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
276 Operands.push_back(SEWOp);
277
278 // At the IR layer, all the masked load intrinsics have policy operands,
279 // none of the others do. All have passthru operands. For our pseudos,
280 // all loads have policy operands.
281 if (IsLoad) {
283 if (IsMasked)
284 Policy = Node->getConstantOperandVal(CurOp++);
285 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
286 Operands.push_back(PolicyOp);
287 }
288
289 Operands.push_back(Chain); // Chain.
290}
291
292void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
293 bool IsStrided) {
294 SDLoc DL(Node);
295 MVT VT = Node->getSimpleValueType(0);
296 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
298
299 unsigned CurOp = 2;
301
302 Operands.push_back(Node->getOperand(CurOp++));
303
304 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
305 Operands, /*IsLoad=*/true);
306
307 const RISCV::VLSEGPseudo *P =
308 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
309 static_cast<unsigned>(LMUL));
310 MachineSDNode *Load =
311 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
312
313 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
314 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
315
316 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
317 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
318 CurDAG->RemoveDeadNode(Node);
319}
320
322 bool IsMasked) {
323 SDLoc DL(Node);
324 MVT VT = Node->getSimpleValueType(0);
325 MVT XLenVT = Subtarget->getXLenVT();
326 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
328
329 unsigned CurOp = 2;
331
332 Operands.push_back(Node->getOperand(CurOp++));
333
334 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
335 /*IsStridedOrIndexed*/ false, Operands,
336 /*IsLoad=*/true);
337
338 const RISCV::VLSEGPseudo *P =
339 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
340 Log2SEW, static_cast<unsigned>(LMUL));
341 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
342 XLenVT, MVT::Other, Operands);
343
344 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
345 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
346
347 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
348 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
349 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
350 CurDAG->RemoveDeadNode(Node);
351}
352
353void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
354 bool IsOrdered) {
355 SDLoc DL(Node);
356 MVT VT = Node->getSimpleValueType(0);
357 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
359
360 unsigned CurOp = 2;
362
363 Operands.push_back(Node->getOperand(CurOp++));
364
365 MVT IndexVT;
366 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
367 /*IsStridedOrIndexed*/ true, Operands,
368 /*IsLoad=*/true, &IndexVT);
369
370#ifndef NDEBUG
371 // Number of element = RVVBitsPerBlock * LMUL / SEW
372 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
373 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
374 if (DecodedLMUL.second)
375 ContainedTyNumElts /= DecodedLMUL.first;
376 else
377 ContainedTyNumElts *= DecodedLMUL.first;
378 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
379 "Element count mismatch");
380#endif
381
382 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
383 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
384 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
385 report_fatal_error("The V extension does not support EEW=64 for index "
386 "values when XLEN=32");
387 }
388 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
389 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
390 static_cast<unsigned>(IndexLMUL));
391 MachineSDNode *Load =
392 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
393
394 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
395 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
396
397 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
398 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
399 CurDAG->RemoveDeadNode(Node);
400}
401
402void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
403 bool IsStrided) {
404 SDLoc DL(Node);
405 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
406 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
408
409 unsigned CurOp = 2;
411
412 Operands.push_back(Node->getOperand(CurOp++));
413
414 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
415 Operands);
416
417 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
418 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
419 MachineSDNode *Store =
420 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
421
422 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
423 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
424
425 ReplaceNode(Node, Store);
426}
427
428void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
429 bool IsOrdered) {
430 SDLoc DL(Node);
431 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
432 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
434
435 unsigned CurOp = 2;
437
438 Operands.push_back(Node->getOperand(CurOp++));
439
440 MVT IndexVT;
441 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
442 /*IsStridedOrIndexed*/ true, Operands,
443 /*IsLoad=*/false, &IndexVT);
444
445#ifndef NDEBUG
446 // Number of element = RVVBitsPerBlock * LMUL / SEW
447 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
448 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
449 if (DecodedLMUL.second)
450 ContainedTyNumElts /= DecodedLMUL.first;
451 else
452 ContainedTyNumElts *= DecodedLMUL.first;
453 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
454 "Element count mismatch");
455#endif
456
457 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
458 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
459 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
460 report_fatal_error("The V extension does not support EEW=64 for index "
461 "values when XLEN=32");
462 }
463 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
464 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
465 static_cast<unsigned>(IndexLMUL));
466 MachineSDNode *Store =
467 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
468
469 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
470 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
471
472 ReplaceNode(Node, Store);
473}
474
476 if (!Subtarget->hasVInstructions())
477 return;
478
479 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
480
481 SDLoc DL(Node);
482 MVT XLenVT = Subtarget->getXLenVT();
483
484 unsigned IntNo = Node->getConstantOperandVal(0);
485
486 assert((IntNo == Intrinsic::riscv_vsetvli ||
487 IntNo == Intrinsic::riscv_vsetvlimax) &&
488 "Unexpected vsetvli intrinsic");
489
490 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
491 unsigned Offset = (VLMax ? 1 : 2);
492
493 assert(Node->getNumOperands() == Offset + 2 &&
494 "Unexpected number of operands");
495
496 unsigned SEW =
497 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
498 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
499 Node->getConstantOperandVal(Offset + 1) & 0x7);
500
501 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
502 /*MaskAgnostic*/ true);
503 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
504
505 SDValue VLOperand;
506 unsigned Opcode = RISCV::PseudoVSETVLI;
507 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
508 if (auto VLEN = Subtarget->getRealVLen())
509 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
510 VLMax = true;
511 }
512 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
513 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
514 Opcode = RISCV::PseudoVSETVLIX0;
515 } else {
516 VLOperand = Node->getOperand(1);
517
518 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
519 uint64_t AVL = C->getZExtValue();
520 if (isUInt<5>(AVL)) {
521 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
522 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
523 XLenVT, VLImm, VTypeIOp));
524 return;
525 }
526 }
527 }
528
529 ReplaceNode(Node,
530 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
531}
532
534 MVT VT = Node->getSimpleValueType(0);
535 unsigned Opcode = Node->getOpcode();
536 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
537 "Unexpected opcode");
538 SDLoc DL(Node);
539
540 // For operations of the form (x << C1) op C2, check if we can use
541 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
542 SDValue N0 = Node->getOperand(0);
543 SDValue N1 = Node->getOperand(1);
544
545 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
546 if (!Cst)
547 return false;
548
549 int64_t Val = Cst->getSExtValue();
550
551 // Check if immediate can already use ANDI/ORI/XORI.
552 if (isInt<12>(Val))
553 return false;
554
555 SDValue Shift = N0;
556
557 // If Val is simm32 and we have a sext_inreg from i32, then the binop
558 // produces at least 33 sign bits. We can peek through the sext_inreg and use
559 // a SLLIW at the end.
560 bool SignExt = false;
561 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
562 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
563 SignExt = true;
564 Shift = N0.getOperand(0);
565 }
566
567 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
568 return false;
569
570 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
571 if (!ShlCst)
572 return false;
573
574 uint64_t ShAmt = ShlCst->getZExtValue();
575
576 // Make sure that we don't change the operation by removing bits.
577 // This only matters for OR and XOR, AND is unaffected.
578 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
579 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
580 return false;
581
582 int64_t ShiftedVal = Val >> ShAmt;
583 if (!isInt<12>(ShiftedVal))
584 return false;
585
586 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
587 if (SignExt && ShAmt >= 32)
588 return false;
589
590 // Ok, we can reorder to get a smaller immediate.
591 unsigned BinOpc;
592 switch (Opcode) {
593 default: llvm_unreachable("Unexpected opcode");
594 case ISD::AND: BinOpc = RISCV::ANDI; break;
595 case ISD::OR: BinOpc = RISCV::ORI; break;
596 case ISD::XOR: BinOpc = RISCV::XORI; break;
597 }
598
599 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
600
601 SDNode *BinOp = CurDAG->getMachineNode(
602 BinOpc, DL, VT, Shift.getOperand(0),
603 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
604 SDNode *SLLI =
605 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
606 CurDAG->getTargetConstant(ShAmt, DL, VT));
607 ReplaceNode(Node, SLLI);
608 return true;
609}
610
612 // Only supported with XTHeadBb at the moment.
613 if (!Subtarget->hasVendorXTHeadBb())
614 return false;
615
616 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
617 if (!N1C)
618 return false;
619
620 SDValue N0 = Node->getOperand(0);
621 if (!N0.hasOneUse())
622 return false;
623
624 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
625 MVT VT) {
626 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
627 CurDAG->getTargetConstant(Msb, DL, VT),
628 CurDAG->getTargetConstant(Lsb, DL, VT));
629 };
630
631 SDLoc DL(Node);
632 MVT VT = Node->getSimpleValueType(0);
633 const unsigned RightShAmt = N1C->getZExtValue();
634
635 // Transform (sra (shl X, C1) C2) with C1 < C2
636 // -> (TH.EXT X, msb, lsb)
637 if (N0.getOpcode() == ISD::SHL) {
638 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
639 if (!N01C)
640 return false;
641
642 const unsigned LeftShAmt = N01C->getZExtValue();
643 // Make sure that this is a bitfield extraction (i.e., the shift-right
644 // amount can not be less than the left-shift).
645 if (LeftShAmt > RightShAmt)
646 return false;
647
648 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
649 const unsigned Msb = MsbPlusOne - 1;
650 const unsigned Lsb = RightShAmt - LeftShAmt;
651
652 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
653 ReplaceNode(Node, TH_EXT);
654 return true;
655 }
656
657 // Transform (sra (sext_inreg X, _), C) ->
658 // (TH.EXT X, msb, lsb)
659 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
660 unsigned ExtSize =
661 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
662
663 // ExtSize of 32 should use sraiw via tablegen pattern.
664 if (ExtSize == 32)
665 return false;
666
667 const unsigned Msb = ExtSize - 1;
668 const unsigned Lsb = RightShAmt;
669
670 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
671 ReplaceNode(Node, TH_EXT);
672 return true;
673 }
674
675 return false;
676}
677
679 // Target does not support indexed loads.
680 if (!Subtarget->hasVendorXTHeadMemIdx())
681 return false;
682
683 LoadSDNode *Ld = cast<LoadSDNode>(Node);
685 if (AM == ISD::UNINDEXED)
686 return false;
687
688 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
689 if (!C)
690 return false;
691
692 EVT LoadVT = Ld->getMemoryVT();
693 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
694 "Unexpected addressing mode");
695 bool IsPre = AM == ISD::PRE_INC;
696 bool IsPost = AM == ISD::POST_INC;
697 int64_t Offset = C->getSExtValue();
698
699 // The constants that can be encoded in the THeadMemIdx instructions
700 // are of the form (sign_extend(imm5) << imm2).
701 unsigned Shift;
702 for (Shift = 0; Shift < 4; Shift++)
703 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
704 break;
705
706 // Constant cannot be encoded.
707 if (Shift == 4)
708 return false;
709
710 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
711 unsigned Opcode;
712 if (LoadVT == MVT::i8 && IsPre)
713 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
714 else if (LoadVT == MVT::i8 && IsPost)
715 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
716 else if (LoadVT == MVT::i16 && IsPre)
717 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
718 else if (LoadVT == MVT::i16 && IsPost)
719 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
720 else if (LoadVT == MVT::i32 && IsPre)
721 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
722 else if (LoadVT == MVT::i32 && IsPost)
723 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
724 else if (LoadVT == MVT::i64 && IsPre)
725 Opcode = RISCV::TH_LDIB;
726 else if (LoadVT == MVT::i64 && IsPost)
727 Opcode = RISCV::TH_LDIA;
728 else
729 return false;
730
731 EVT Ty = Ld->getOffset().getValueType();
732 SDValue Ops[] = {
733 Ld->getBasePtr(),
734 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
735 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
736 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
737 Ld->getValueType(1), MVT::Other, Ops);
738
739 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
740 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
741
742 ReplaceNode(Node, New);
743
744 return true;
745}
746
748 if (!Subtarget->hasVInstructions())
749 return;
750
751 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
752
753 SDLoc DL(Node);
754 unsigned IntNo = Node->getConstantOperandVal(1);
755
756 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
757 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
758 "Unexpected vsetvli intrinsic");
759
760 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
761 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
762 SDValue SEWOp =
763 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
764 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
765 Node->getOperand(4), Node->getOperand(5),
766 Node->getOperand(8), SEWOp,
767 Node->getOperand(0)};
768
769 unsigned Opcode;
770 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
771 switch (LMulSDNode->getSExtValue()) {
772 case 5:
773 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
774 : RISCV::PseudoVC_I_SE_MF8;
775 break;
776 case 6:
777 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
778 : RISCV::PseudoVC_I_SE_MF4;
779 break;
780 case 7:
781 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
782 : RISCV::PseudoVC_I_SE_MF2;
783 break;
784 case 0:
785 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
786 : RISCV::PseudoVC_I_SE_M1;
787 break;
788 case 1:
789 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
790 : RISCV::PseudoVC_I_SE_M2;
791 break;
792 case 2:
793 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
794 : RISCV::PseudoVC_I_SE_M4;
795 break;
796 case 3:
797 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
798 : RISCV::PseudoVC_I_SE_M8;
799 break;
800 }
801
803 Opcode, DL, Node->getSimpleValueType(0), Operands));
804}
805
806static unsigned getSegInstNF(unsigned Intrinsic) {
807#define INST_NF_CASE(NAME, NF) \
808 case Intrinsic::riscv_##NAME##NF: \
809 return NF;
810#define INST_NF_CASE_MASK(NAME, NF) \
811 case Intrinsic::riscv_##NAME##NF##_mask: \
812 return NF;
813#define INST_NF_CASE_FF(NAME, NF) \
814 case Intrinsic::riscv_##NAME##NF##ff: \
815 return NF;
816#define INST_NF_CASE_FF_MASK(NAME, NF) \
817 case Intrinsic::riscv_##NAME##NF##ff_mask: \
818 return NF;
819#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
820 MACRO_NAME(NAME, 2) \
821 MACRO_NAME(NAME, 3) \
822 MACRO_NAME(NAME, 4) \
823 MACRO_NAME(NAME, 5) \
824 MACRO_NAME(NAME, 6) \
825 MACRO_NAME(NAME, 7) \
826 MACRO_NAME(NAME, 8)
827#define INST_ALL_NF_CASE(NAME) \
828 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
829 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
830#define INST_ALL_NF_CASE_WITH_FF(NAME) \
831 INST_ALL_NF_CASE(NAME) \
832 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
833 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
834 switch (Intrinsic) {
835 default:
836 llvm_unreachable("Unexpected segment load/store intrinsic");
838 INST_ALL_NF_CASE(vlsseg)
839 INST_ALL_NF_CASE(vloxseg)
840 INST_ALL_NF_CASE(vluxseg)
841 INST_ALL_NF_CASE(vsseg)
842 INST_ALL_NF_CASE(vssseg)
843 INST_ALL_NF_CASE(vsoxseg)
844 INST_ALL_NF_CASE(vsuxseg)
845 }
846}
847
849 // If we have a custom node, we have already selected.
850 if (Node->isMachineOpcode()) {
851 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
852 Node->setNodeId(-1);
853 return;
854 }
855
856 // Instruction Selection not handled by the auto-generated tablegen selection
857 // should be handled here.
858 unsigned Opcode = Node->getOpcode();
859 MVT XLenVT = Subtarget->getXLenVT();
860 SDLoc DL(Node);
861 MVT VT = Node->getSimpleValueType(0);
862
863 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
864
865 switch (Opcode) {
866 case ISD::Constant: {
867 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
868 auto *ConstNode = cast<ConstantSDNode>(Node);
869 if (ConstNode->isZero()) {
870 SDValue New =
871 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
872 ReplaceNode(Node, New.getNode());
873 return;
874 }
875 int64_t Imm = ConstNode->getSExtValue();
876 // If only the lower 8 bits are used, try to convert this to a simm6 by
877 // sign-extending bit 7. This is neutral without the C extension, and
878 // allows C.LI to be used if C is present.
879 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
880 Imm = SignExtend64<8>(Imm);
881 // If the upper XLen-16 bits are not used, try to convert this to a simm12
882 // by sign extending bit 15.
883 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
884 hasAllHUsers(Node))
885 Imm = SignExtend64<16>(Imm);
886 // If the upper 32-bits are not used try to convert this into a simm32 by
887 // sign extending bit 32.
888 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
889 Imm = SignExtend64<32>(Imm);
890
891 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
892 return;
893 }
894 case ISD::ConstantFP: {
895 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
896
897 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
898 SDValue Imm;
899 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
900 // create an integer immediate.
901 if (APF.isPosZero() || NegZeroF64)
902 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
903 else
904 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
905 *Subtarget);
906
907 bool HasZdinx = Subtarget->hasStdExtZdinx();
908 bool Is64Bit = Subtarget->is64Bit();
909 unsigned Opc;
910 switch (VT.SimpleTy) {
911 default:
912 llvm_unreachable("Unexpected size");
913 case MVT::bf16:
914 assert(Subtarget->hasStdExtZfbfmin());
915 Opc = RISCV::FMV_H_X;
916 break;
917 case MVT::f16:
918 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
919 break;
920 case MVT::f32:
921 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
922 break;
923 case MVT::f64:
924 // For RV32, we can't move from a GPR, we need to convert instead. This
925 // should only happen for +0.0 and -0.0.
926 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
927 if (Is64Bit)
928 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
929 else
930 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
931 break;
932 }
933
934 SDNode *Res;
935 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
936 Res =
937 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
938 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
939 Res =
940 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
941 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
942 Res = CurDAG->getMachineNode(
943 Opc, DL, VT, Imm,
945 else
946 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
947
948 // For f64 -0.0, we need to insert a fneg.d idiom.
949 if (NegZeroF64) {
950 Opc = RISCV::FSGNJN_D;
951 if (HasZdinx)
952 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
953 Res =
954 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
955 }
956
957 ReplaceNode(Node, Res);
958 return;
959 }
962 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
963 break;
964
965 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
966 "BuildPairF64 only handled here on rv32i_zdinx");
967
968 SDValue Ops[] = {
969 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
970 Node->getOperand(0),
971 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
972 Node->getOperand(1),
973 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
974
975 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
976 ReplaceNode(Node, N);
977 return;
978 }
980 case RISCVISD::SplitF64: {
981 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
982 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
983 "SplitF64 only handled here on rv32i_zdinx");
984
985 if (!SDValue(Node, 0).use_empty()) {
986 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
987 Node->getValueType(0),
988 Node->getOperand(0));
989 ReplaceUses(SDValue(Node, 0), Lo);
990 }
991
992 if (!SDValue(Node, 1).use_empty()) {
994 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
995 ReplaceUses(SDValue(Node, 1), Hi);
996 }
997
998 CurDAG->RemoveDeadNode(Node);
999 return;
1000 }
1001
1002 assert(Opcode != RISCVISD::SplitGPRPair &&
1003 "SplitGPRPair should already be handled");
1004
1005 if (!Subtarget->hasStdExtZfa())
1006 break;
1007 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1008 "Unexpected subtarget");
1009
1010 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1011 if (!SDValue(Node, 0).use_empty()) {
1012 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1013 Node->getOperand(0));
1014 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1015 }
1016 if (!SDValue(Node, 1).use_empty()) {
1017 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1018 Node->getOperand(0));
1019 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1020 }
1021
1022 CurDAG->RemoveDeadNode(Node);
1023 return;
1024 }
1025 case ISD::SHL: {
1026 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1027 if (!N1C)
1028 break;
1029 SDValue N0 = Node->getOperand(0);
1030 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1031 !isa<ConstantSDNode>(N0.getOperand(1)))
1032 break;
1033 unsigned ShAmt = N1C->getZExtValue();
1034 uint64_t Mask = N0.getConstantOperandVal(1);
1035
1036 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1037 unsigned XLen = Subtarget->getXLen();
1038 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1039 unsigned TrailingZeros = llvm::countr_zero(Mask);
1040 if (TrailingZeros > 0 && LeadingZeros == 32) {
1041 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1042 // where C2 has 32 leading zeros and C3 trailing zeros.
1043 SDNode *SRLIW = CurDAG->getMachineNode(
1044 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1045 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1046 SDNode *SLLI = CurDAG->getMachineNode(
1047 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1048 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1049 ReplaceNode(Node, SLLI);
1050 return;
1051 }
1052 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1053 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1054 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1055 // where C2 has C4 leading zeros and no trailing zeros.
1056 // This is profitable if the "and" was to be lowered to
1057 // (srli (slli X, C4), C4) and not (andi X, C2).
1058 // For "LeadingZeros == 32":
1059 // - with Zba it's just (slli.uw X, C)
1060 // - without Zba a tablegen pattern applies the very same
1061 // transform as we would have done here
1062 SDNode *SLLI = CurDAG->getMachineNode(
1063 RISCV::SLLI, DL, VT, N0->getOperand(0),
1064 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1065 SDNode *SRLI = CurDAG->getMachineNode(
1066 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1067 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1068 ReplaceNode(Node, SRLI);
1069 return;
1070 }
1071 }
1072 break;
1073 }
1074 case ISD::SRL: {
1075 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1076 if (!N1C)
1077 break;
1078 SDValue N0 = Node->getOperand(0);
1079 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1080 break;
1081 unsigned ShAmt = N1C->getZExtValue();
1082 uint64_t Mask = N0.getConstantOperandVal(1);
1083
1084 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1085 // 32 leading zeros and C3 trailing zeros.
1086 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1087 unsigned XLen = Subtarget->getXLen();
1088 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1089 unsigned TrailingZeros = llvm::countr_zero(Mask);
1090 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1091 SDNode *SRLIW = CurDAG->getMachineNode(
1092 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1093 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1094 SDNode *SLLI = CurDAG->getMachineNode(
1095 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1096 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1097 ReplaceNode(Node, SLLI);
1098 return;
1099 }
1100 }
1101
1102 // Optimize (srl (and X, C2), C) ->
1103 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1104 // Where C2 is a mask with C3 trailing ones.
1105 // Taking into account that the C2 may have had lower bits unset by
1106 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1107 // This pattern occurs when type legalizing right shifts for types with
1108 // less than XLen bits.
1109 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1110 if (!isMask_64(Mask))
1111 break;
1112 unsigned TrailingOnes = llvm::countr_one(Mask);
1113 if (ShAmt >= TrailingOnes)
1114 break;
1115 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1116 if (TrailingOnes == 32) {
1117 SDNode *SRLI = CurDAG->getMachineNode(
1118 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1119 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1120 ReplaceNode(Node, SRLI);
1121 return;
1122 }
1123
1124 // Only do the remaining transforms if the AND has one use.
1125 if (!N0.hasOneUse())
1126 break;
1127
1128 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1129 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1130 SDNode *BEXTI = CurDAG->getMachineNode(
1131 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1132 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1133 ReplaceNode(Node, BEXTI);
1134 return;
1135 }
1136
1137 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1138 if (Subtarget->hasVendorXTHeadBb()) {
1139 SDNode *THEXTU = CurDAG->getMachineNode(
1140 RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1141 CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1142 CurDAG->getTargetConstant(ShAmt, DL, VT));
1143 ReplaceNode(Node, THEXTU);
1144 return;
1145 }
1146
1147 SDNode *SLLI =
1148 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1149 CurDAG->getTargetConstant(LShAmt, DL, VT));
1150 SDNode *SRLI = CurDAG->getMachineNode(
1151 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1152 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1153 ReplaceNode(Node, SRLI);
1154 return;
1155 }
1156 case ISD::SRA: {
1157 if (trySignedBitfieldExtract(Node))
1158 return;
1159
1160 // Optimize (sra (sext_inreg X, i16), C) ->
1161 // (srai (slli X, (XLen-16), (XLen-16) + C)
1162 // And (sra (sext_inreg X, i8), C) ->
1163 // (srai (slli X, (XLen-8), (XLen-8) + C)
1164 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1165 // This transform matches the code we get without Zbb. The shifts are more
1166 // compressible, and this can help expose CSE opportunities in the sdiv by
1167 // constant optimization.
1168 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1169 if (!N1C)
1170 break;
1171 SDValue N0 = Node->getOperand(0);
1172 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1173 break;
1174 unsigned ShAmt = N1C->getZExtValue();
1175 unsigned ExtSize =
1176 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1177 // ExtSize of 32 should use sraiw via tablegen pattern.
1178 if (ExtSize >= 32 || ShAmt >= ExtSize)
1179 break;
1180 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1181 SDNode *SLLI =
1182 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1183 CurDAG->getTargetConstant(LShAmt, DL, VT));
1184 SDNode *SRAI = CurDAG->getMachineNode(
1185 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1186 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1187 ReplaceNode(Node, SRAI);
1188 return;
1189 }
1190 case ISD::OR:
1191 case ISD::XOR:
1192 if (tryShrinkShlLogicImm(Node))
1193 return;
1194
1195 break;
1196 case ISD::AND: {
1197 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1198 if (!N1C)
1199 break;
1200
1201 SDValue N0 = Node->getOperand(0);
1202
1203 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1204 SDValue X, unsigned Msb,
1205 unsigned Lsb) {
1206 if (!Subtarget->hasVendorXTHeadBb())
1207 return false;
1208
1209 SDNode *TH_EXTU = CurDAG->getMachineNode(
1210 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1211 CurDAG->getTargetConstant(Lsb, DL, VT));
1212 ReplaceNode(Node, TH_EXTU);
1213 return true;
1214 };
1215
1216 bool LeftShift = N0.getOpcode() == ISD::SHL;
1217 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1218 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1219 if (!C)
1220 break;
1221 unsigned C2 = C->getZExtValue();
1222 unsigned XLen = Subtarget->getXLen();
1223 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1224
1225 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1226 // shift pair might offer more compression opportunities.
1227 // TODO: We could check for C extension here, but we don't have many lit
1228 // tests with the C extension enabled so not checking gets better
1229 // coverage.
1230 // TODO: What if ANDI faster than shift?
1231 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1232
1233 uint64_t C1 = N1C->getZExtValue();
1234
1235 // Clear irrelevant bits in the mask.
1236 if (LeftShift)
1237 C1 &= maskTrailingZeros<uint64_t>(C2);
1238 else
1239 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1240
1241 // Some transforms should only be done if the shift has a single use or
1242 // the AND would become (srli (slli X, 32), 32)
1243 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1244
1245 SDValue X = N0.getOperand(0);
1246
1247 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1248 // with c3 leading zeros.
1249 if (!LeftShift && isMask_64(C1)) {
1250 unsigned Leading = XLen - llvm::bit_width(C1);
1251 if (C2 < Leading) {
1252 // If the number of leading zeros is C2+32 this can be SRLIW.
1253 if (C2 + 32 == Leading) {
1254 SDNode *SRLIW = CurDAG->getMachineNode(
1255 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1256 ReplaceNode(Node, SRLIW);
1257 return;
1258 }
1259
1260 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1261 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1262 //
1263 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1264 // legalized and goes through DAG combine.
1265 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1266 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1267 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1268 SDNode *SRAIW =
1269 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1270 CurDAG->getTargetConstant(31, DL, VT));
1271 SDNode *SRLIW = CurDAG->getMachineNode(
1272 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1273 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1274 ReplaceNode(Node, SRLIW);
1275 return;
1276 }
1277
1278 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1279 // available.
1280 // Transform (and (srl x, C2), C1)
1281 // -> (<bfextract> x, msb, lsb)
1282 //
1283 // Make sure to keep this below the SRLIW cases, as we always want to
1284 // prefer the more common instruction.
1285 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1286 const unsigned Lsb = C2;
1287 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1288 return;
1289
1290 // (srli (slli x, c3-c2), c3).
1291 // Skip if we could use (zext.w (sraiw X, C2)).
1292 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1293 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1294 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1295 // Also Skip if we can use bexti or th.tst.
1296 Skip |= HasBitTest && Leading == XLen - 1;
1297 if (OneUseOrZExtW && !Skip) {
1298 SDNode *SLLI = CurDAG->getMachineNode(
1299 RISCV::SLLI, DL, VT, X,
1300 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1301 SDNode *SRLI = CurDAG->getMachineNode(
1302 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1303 CurDAG->getTargetConstant(Leading, DL, VT));
1304 ReplaceNode(Node, SRLI);
1305 return;
1306 }
1307 }
1308 }
1309
1310 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1311 // shifted by c2 bits with c3 leading zeros.
1312 if (LeftShift && isShiftedMask_64(C1)) {
1313 unsigned Leading = XLen - llvm::bit_width(C1);
1314
1315 if (C2 + Leading < XLen &&
1316 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1317 // Use slli.uw when possible.
1318 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1319 SDNode *SLLI_UW =
1320 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1321 CurDAG->getTargetConstant(C2, DL, VT));
1322 ReplaceNode(Node, SLLI_UW);
1323 return;
1324 }
1325
1326 // (srli (slli c2+c3), c3)
1327 if (OneUseOrZExtW && !IsCANDI) {
1328 SDNode *SLLI = CurDAG->getMachineNode(
1329 RISCV::SLLI, DL, VT, X,
1330 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1331 SDNode *SRLI = CurDAG->getMachineNode(
1332 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1333 CurDAG->getTargetConstant(Leading, DL, VT));
1334 ReplaceNode(Node, SRLI);
1335 return;
1336 }
1337 }
1338 }
1339
1340 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1341 // shifted mask with c2 leading zeros and c3 trailing zeros.
1342 if (!LeftShift && isShiftedMask_64(C1)) {
1343 unsigned Leading = XLen - llvm::bit_width(C1);
1344 unsigned Trailing = llvm::countr_zero(C1);
1345 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1346 !IsCANDI) {
1347 unsigned SrliOpc = RISCV::SRLI;
1348 // If the input is zexti32 we should use SRLIW.
1349 if (X.getOpcode() == ISD::AND &&
1350 isa<ConstantSDNode>(X.getOperand(1)) &&
1351 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1352 SrliOpc = RISCV::SRLIW;
1353 X = X.getOperand(0);
1354 }
1355 SDNode *SRLI = CurDAG->getMachineNode(
1356 SrliOpc, DL, VT, X,
1357 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1358 SDNode *SLLI = CurDAG->getMachineNode(
1359 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1360 CurDAG->getTargetConstant(Trailing, DL, VT));
1361 ReplaceNode(Node, SLLI);
1362 return;
1363 }
1364 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1365 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1366 OneUseOrZExtW && !IsCANDI) {
1367 SDNode *SRLIW = CurDAG->getMachineNode(
1368 RISCV::SRLIW, DL, VT, X,
1369 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1370 SDNode *SLLI = CurDAG->getMachineNode(
1371 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1372 CurDAG->getTargetConstant(Trailing, DL, VT));
1373 ReplaceNode(Node, SLLI);
1374 return;
1375 }
1376 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1377 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1378 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1379 SDNode *SRLI = CurDAG->getMachineNode(
1380 RISCV::SRLI, DL, VT, X,
1381 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1382 SDNode *SLLI_UW = CurDAG->getMachineNode(
1383 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1384 CurDAG->getTargetConstant(Trailing, DL, VT));
1385 ReplaceNode(Node, SLLI_UW);
1386 return;
1387 }
1388 }
1389
1390 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1391 // shifted mask with no leading zeros and c3 trailing zeros.
1392 if (LeftShift && isShiftedMask_64(C1)) {
1393 unsigned Leading = XLen - llvm::bit_width(C1);
1394 unsigned Trailing = llvm::countr_zero(C1);
1395 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1396 SDNode *SRLI = CurDAG->getMachineNode(
1397 RISCV::SRLI, DL, VT, X,
1398 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1399 SDNode *SLLI = CurDAG->getMachineNode(
1400 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1401 CurDAG->getTargetConstant(Trailing, DL, VT));
1402 ReplaceNode(Node, SLLI);
1403 return;
1404 }
1405 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1406 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1407 SDNode *SRLIW = CurDAG->getMachineNode(
1408 RISCV::SRLIW, DL, VT, X,
1409 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1410 SDNode *SLLI = CurDAG->getMachineNode(
1411 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1412 CurDAG->getTargetConstant(Trailing, DL, VT));
1413 ReplaceNode(Node, SLLI);
1414 return;
1415 }
1416
1417 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1418 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1419 Subtarget->hasStdExtZba()) {
1420 SDNode *SRLI = CurDAG->getMachineNode(
1421 RISCV::SRLI, DL, VT, X,
1422 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1423 SDNode *SLLI_UW = CurDAG->getMachineNode(
1424 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1425 CurDAG->getTargetConstant(Trailing, DL, VT));
1426 ReplaceNode(Node, SLLI_UW);
1427 return;
1428 }
1429 }
1430 }
1431
1432 const uint64_t C1 = N1C->getZExtValue();
1433
1434 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1435 N0.hasOneUse()) {
1436 unsigned C2 = N0.getConstantOperandVal(1);
1437 unsigned XLen = Subtarget->getXLen();
1438 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1439
1440 SDValue X = N0.getOperand(0);
1441
1442 // Prefer SRAIW + ANDI when possible.
1443 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1444 X.getOpcode() == ISD::SHL &&
1445 isa<ConstantSDNode>(X.getOperand(1)) &&
1446 X.getConstantOperandVal(1) == 32;
1447 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1448 // mask with c3 leading zeros and c2 is larger than c3.
1449 if (isMask_64(C1) && !Skip) {
1450 unsigned Leading = XLen - llvm::bit_width(C1);
1451 if (C2 > Leading) {
1452 SDNode *SRAI = CurDAG->getMachineNode(
1453 RISCV::SRAI, DL, VT, X,
1454 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1455 SDNode *SRLI = CurDAG->getMachineNode(
1456 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1457 CurDAG->getTargetConstant(Leading, DL, VT));
1458 ReplaceNode(Node, SRLI);
1459 return;
1460 }
1461 }
1462
1463 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1464 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1465 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1466 if (isShiftedMask_64(C1) && !Skip) {
1467 unsigned Leading = XLen - llvm::bit_width(C1);
1468 unsigned Trailing = llvm::countr_zero(C1);
1469 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1470 SDNode *SRAI = CurDAG->getMachineNode(
1471 RISCV::SRAI, DL, VT, N0.getOperand(0),
1472 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1473 SDNode *SRLI = CurDAG->getMachineNode(
1474 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1475 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1476 SDNode *SLLI = CurDAG->getMachineNode(
1477 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1478 CurDAG->getTargetConstant(Trailing, DL, VT));
1479 ReplaceNode(Node, SLLI);
1480 return;
1481 }
1482 }
1483 }
1484
1485 // If C1 masks off the upper bits only (but can't be formed as an
1486 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1487 // available.
1488 // Transform (and x, C1)
1489 // -> (<bfextract> x, msb, lsb)
1490 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1491 const unsigned Msb = llvm::bit_width(C1) - 1;
1492 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1493 return;
1494 }
1495
1496 if (tryShrinkShlLogicImm(Node))
1497 return;
1498
1499 break;
1500 }
1501 case ISD::MUL: {
1502 // Special case for calculating (mul (and X, C2), C1) where the full product
1503 // fits in XLen bits. We can shift X left by the number of leading zeros in
1504 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1505 // product has XLen trailing zeros, putting it in the output of MULHU. This
1506 // can avoid materializing a constant in a register for C2.
1507
1508 // RHS should be a constant.
1509 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1510 if (!N1C || !N1C->hasOneUse())
1511 break;
1512
1513 // LHS should be an AND with constant.
1514 SDValue N0 = Node->getOperand(0);
1515 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1516 break;
1517
1519
1520 // Constant should be a mask.
1521 if (!isMask_64(C2))
1522 break;
1523
1524 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1525 // multiple users or the constant is a simm12. This prevents inserting a
1526 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1527 // make it more costly to materialize. Otherwise, using a SLLI might allow
1528 // it to be compressed.
1529 bool IsANDIOrZExt =
1530 isInt<12>(C2) ||
1531 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1532 // With XTHeadBb, we can use TH.EXTU.
1533 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1534 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1535 break;
1536 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1537 // the constant is a simm32.
1538 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1539 // With XTHeadBb, we can use TH.EXTU.
1540 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1541 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1542 break;
1543
1544 // We need to shift left the AND input and C1 by a total of XLen bits.
1545
1546 // How far left do we need to shift the AND input?
1547 unsigned XLen = Subtarget->getXLen();
1548 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1549
1550 // The constant gets shifted by the remaining amount unless that would
1551 // shift bits out.
1552 uint64_t C1 = N1C->getZExtValue();
1553 unsigned ConstantShift = XLen - LeadingZeros;
1554 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1555 break;
1556
1557 uint64_t ShiftedC1 = C1 << ConstantShift;
1558 // If this RV32, we need to sign extend the constant.
1559 if (XLen == 32)
1560 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1561
1562 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1563 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1564 SDNode *SLLI =
1565 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1566 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1567 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1568 SDValue(SLLI, 0), SDValue(Imm, 0));
1569 ReplaceNode(Node, MULHU);
1570 return;
1571 }
1572 case ISD::LOAD: {
1573 if (tryIndexedLoad(Node))
1574 return;
1575
1576 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1577 // We match post-incrementing load here
1578 LoadSDNode *Load = cast<LoadSDNode>(Node);
1579 if (Load->getAddressingMode() != ISD::POST_INC)
1580 break;
1581
1582 SDValue Chain = Node->getOperand(0);
1583 SDValue Base = Node->getOperand(1);
1584 SDValue Offset = Node->getOperand(2);
1585
1586 bool Simm12 = false;
1587 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1588
1589 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1590 int ConstantVal = ConstantOffset->getSExtValue();
1591 Simm12 = isInt<12>(ConstantVal);
1592 if (Simm12)
1593 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1594 Offset.getValueType());
1595 }
1596
1597 unsigned Opcode = 0;
1598 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1599 case MVT::i8:
1600 if (Simm12 && SignExtend)
1601 Opcode = RISCV::CV_LB_ri_inc;
1602 else if (Simm12 && !SignExtend)
1603 Opcode = RISCV::CV_LBU_ri_inc;
1604 else if (!Simm12 && SignExtend)
1605 Opcode = RISCV::CV_LB_rr_inc;
1606 else
1607 Opcode = RISCV::CV_LBU_rr_inc;
1608 break;
1609 case MVT::i16:
1610 if (Simm12 && SignExtend)
1611 Opcode = RISCV::CV_LH_ri_inc;
1612 else if (Simm12 && !SignExtend)
1613 Opcode = RISCV::CV_LHU_ri_inc;
1614 else if (!Simm12 && SignExtend)
1615 Opcode = RISCV::CV_LH_rr_inc;
1616 else
1617 Opcode = RISCV::CV_LHU_rr_inc;
1618 break;
1619 case MVT::i32:
1620 if (Simm12)
1621 Opcode = RISCV::CV_LW_ri_inc;
1622 else
1623 Opcode = RISCV::CV_LW_rr_inc;
1624 break;
1625 default:
1626 break;
1627 }
1628 if (!Opcode)
1629 break;
1630
1631 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1632 Chain.getSimpleValueType(), Base,
1633 Offset, Chain));
1634 return;
1635 }
1636 break;
1637 }
1639 unsigned IntNo = Node->getConstantOperandVal(0);
1640 switch (IntNo) {
1641 // By default we do not custom select any intrinsic.
1642 default:
1643 break;
1644 case Intrinsic::riscv_vmsgeu:
1645 case Intrinsic::riscv_vmsge: {
1646 SDValue Src1 = Node->getOperand(1);
1647 SDValue Src2 = Node->getOperand(2);
1648 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1649 bool IsCmpConstant = false;
1650 bool IsCmpMinimum = false;
1651 // Only custom select scalar second operand.
1652 if (Src2.getValueType() != XLenVT)
1653 break;
1654 // Small constants are handled with patterns.
1655 int64_t CVal = 0;
1656 MVT Src1VT = Src1.getSimpleValueType();
1657 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1658 IsCmpConstant = true;
1659 CVal = C->getSExtValue();
1660 if (CVal >= -15 && CVal <= 16) {
1661 if (!IsUnsigned || CVal != 0)
1662 break;
1663 IsCmpMinimum = true;
1664 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1665 Src1VT.getScalarSizeInBits())
1666 .getSExtValue()) {
1667 IsCmpMinimum = true;
1668 }
1669 }
1670 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1671 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1672 default:
1673 llvm_unreachable("Unexpected LMUL!");
1674#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1675 case RISCVII::VLMUL::lmulenum: \
1676 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1677 : RISCV::PseudoVMSLT_VX_##suffix; \
1678 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1679 : RISCV::PseudoVMSGT_VX_##suffix; \
1680 break;
1681 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1682 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1683 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1684 CASE_VMSLT_OPCODES(LMUL_1, M1)
1685 CASE_VMSLT_OPCODES(LMUL_2, M2)
1686 CASE_VMSLT_OPCODES(LMUL_4, M4)
1687 CASE_VMSLT_OPCODES(LMUL_8, M8)
1688#undef CASE_VMSLT_OPCODES
1689 }
1690 // Mask operations use the LMUL from the mask type.
1691 switch (RISCVTargetLowering::getLMUL(VT)) {
1692 default:
1693 llvm_unreachable("Unexpected LMUL!");
1694#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1695 case RISCVII::VLMUL::lmulenum: \
1696 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1697 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1698 break;
1699 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1700 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1701 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1702 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1703 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1704 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1705 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1706#undef CASE_VMNAND_VMSET_OPCODES
1707 }
1709 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1710 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1711 SDValue VL;
1712 selectVLOp(Node->getOperand(3), VL);
1713
1714 // If vmsge(u) with minimum value, expand it to vmset.
1715 if (IsCmpMinimum) {
1716 ReplaceNode(Node,
1717 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1718 return;
1719 }
1720
1721 if (IsCmpConstant) {
1722 SDValue Imm =
1723 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1724
1725 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1726 {Src1, Imm, VL, SEW}));
1727 return;
1728 }
1729
1730 // Expand to
1731 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1732 SDValue Cmp = SDValue(
1733 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1734 0);
1735 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1736 {Cmp, Cmp, VL, MaskSEW}));
1737 return;
1738 }
1739 case Intrinsic::riscv_vmsgeu_mask:
1740 case Intrinsic::riscv_vmsge_mask: {
1741 SDValue Src1 = Node->getOperand(2);
1742 SDValue Src2 = Node->getOperand(3);
1743 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1744 bool IsCmpConstant = false;
1745 bool IsCmpMinimum = false;
1746 // Only custom select scalar second operand.
1747 if (Src2.getValueType() != XLenVT)
1748 break;
1749 // Small constants are handled with patterns.
1750 MVT Src1VT = Src1.getSimpleValueType();
1751 int64_t CVal = 0;
1752 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1753 IsCmpConstant = true;
1754 CVal = C->getSExtValue();
1755 if (CVal >= -15 && CVal <= 16) {
1756 if (!IsUnsigned || CVal != 0)
1757 break;
1758 IsCmpMinimum = true;
1759 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1760 Src1VT.getScalarSizeInBits())
1761 .getSExtValue()) {
1762 IsCmpMinimum = true;
1763 }
1764 }
1765 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1766 VMOROpcode, VMSGTMaskOpcode;
1767 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1768 default:
1769 llvm_unreachable("Unexpected LMUL!");
1770#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1771 case RISCVII::VLMUL::lmulenum: \
1772 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1773 : RISCV::PseudoVMSLT_VX_##suffix; \
1774 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1775 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1776 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1777 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1778 break;
1779 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1780 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1781 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1782 CASE_VMSLT_OPCODES(LMUL_1, M1)
1783 CASE_VMSLT_OPCODES(LMUL_2, M2)
1784 CASE_VMSLT_OPCODES(LMUL_4, M4)
1785 CASE_VMSLT_OPCODES(LMUL_8, M8)
1786#undef CASE_VMSLT_OPCODES
1787 }
1788 // Mask operations use the LMUL from the mask type.
1789 switch (RISCVTargetLowering::getLMUL(VT)) {
1790 default:
1791 llvm_unreachable("Unexpected LMUL!");
1792#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1793 case RISCVII::VLMUL::lmulenum: \
1794 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1795 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1796 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1797 break;
1798 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1799 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1800 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1805#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1806 }
1808 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1809 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1810 SDValue VL;
1811 selectVLOp(Node->getOperand(5), VL);
1812 SDValue MaskedOff = Node->getOperand(1);
1813 SDValue Mask = Node->getOperand(4);
1814
1815 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1816 if (IsCmpMinimum) {
1817 // We don't need vmor if the MaskedOff and the Mask are the same
1818 // value.
1819 if (Mask == MaskedOff) {
1820 ReplaceUses(Node, Mask.getNode());
1821 return;
1822 }
1823 ReplaceNode(Node,
1824 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1825 {Mask, MaskedOff, VL, MaskSEW}));
1826 return;
1827 }
1828
1829 // If the MaskedOff value and the Mask are the same value use
1830 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1831 // This avoids needing to copy v0 to vd before starting the next sequence.
1832 if (Mask == MaskedOff) {
1833 SDValue Cmp = SDValue(
1834 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1835 0);
1836 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1837 {Mask, Cmp, VL, MaskSEW}));
1838 return;
1839 }
1840
1841 if (IsCmpConstant) {
1842 SDValue Imm =
1843 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1844
1846 VMSGTMaskOpcode, DL, VT,
1847 {MaskedOff, Src1, Imm, Mask, VL, SEW}));
1848 return;
1849 }
1850
1851 // Otherwise use
1852 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1853 // The result is mask undisturbed.
1854 // We use the same instructions to emulate mask agnostic behavior, because
1855 // the agnostic result can be either undisturbed or all 1.
1856 SDValue Cmp = SDValue(
1857 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1858 {MaskedOff, Src1, Src2, Mask, VL, SEW}),
1859 0);
1860 // vmxor.mm vd, vd, v0 is used to update active value.
1861 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1862 {Cmp, Mask, VL, MaskSEW}));
1863 return;
1864 }
1865 case Intrinsic::riscv_vsetvli:
1866 case Intrinsic::riscv_vsetvlimax:
1867 return selectVSETVLI(Node);
1868 }
1869 break;
1870 }
1872 unsigned IntNo = Node->getConstantOperandVal(1);
1873 switch (IntNo) {
1874 // By default we do not custom select any intrinsic.
1875 default:
1876 break;
1877 case Intrinsic::riscv_vlseg2:
1878 case Intrinsic::riscv_vlseg3:
1879 case Intrinsic::riscv_vlseg4:
1880 case Intrinsic::riscv_vlseg5:
1881 case Intrinsic::riscv_vlseg6:
1882 case Intrinsic::riscv_vlseg7:
1883 case Intrinsic::riscv_vlseg8: {
1884 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1885 /*IsStrided*/ false);
1886 return;
1887 }
1888 case Intrinsic::riscv_vlseg2_mask:
1889 case Intrinsic::riscv_vlseg3_mask:
1890 case Intrinsic::riscv_vlseg4_mask:
1891 case Intrinsic::riscv_vlseg5_mask:
1892 case Intrinsic::riscv_vlseg6_mask:
1893 case Intrinsic::riscv_vlseg7_mask:
1894 case Intrinsic::riscv_vlseg8_mask: {
1895 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1896 /*IsStrided*/ false);
1897 return;
1898 }
1899 case Intrinsic::riscv_vlsseg2:
1900 case Intrinsic::riscv_vlsseg3:
1901 case Intrinsic::riscv_vlsseg4:
1902 case Intrinsic::riscv_vlsseg5:
1903 case Intrinsic::riscv_vlsseg6:
1904 case Intrinsic::riscv_vlsseg7:
1905 case Intrinsic::riscv_vlsseg8: {
1906 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1907 /*IsStrided*/ true);
1908 return;
1909 }
1910 case Intrinsic::riscv_vlsseg2_mask:
1911 case Intrinsic::riscv_vlsseg3_mask:
1912 case Intrinsic::riscv_vlsseg4_mask:
1913 case Intrinsic::riscv_vlsseg5_mask:
1914 case Intrinsic::riscv_vlsseg6_mask:
1915 case Intrinsic::riscv_vlsseg7_mask:
1916 case Intrinsic::riscv_vlsseg8_mask: {
1917 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1918 /*IsStrided*/ true);
1919 return;
1920 }
1921 case Intrinsic::riscv_vloxseg2:
1922 case Intrinsic::riscv_vloxseg3:
1923 case Intrinsic::riscv_vloxseg4:
1924 case Intrinsic::riscv_vloxseg5:
1925 case Intrinsic::riscv_vloxseg6:
1926 case Intrinsic::riscv_vloxseg7:
1927 case Intrinsic::riscv_vloxseg8:
1928 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1929 /*IsOrdered*/ true);
1930 return;
1931 case Intrinsic::riscv_vluxseg2:
1932 case Intrinsic::riscv_vluxseg3:
1933 case Intrinsic::riscv_vluxseg4:
1934 case Intrinsic::riscv_vluxseg5:
1935 case Intrinsic::riscv_vluxseg6:
1936 case Intrinsic::riscv_vluxseg7:
1937 case Intrinsic::riscv_vluxseg8:
1938 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1939 /*IsOrdered*/ false);
1940 return;
1941 case Intrinsic::riscv_vloxseg2_mask:
1942 case Intrinsic::riscv_vloxseg3_mask:
1943 case Intrinsic::riscv_vloxseg4_mask:
1944 case Intrinsic::riscv_vloxseg5_mask:
1945 case Intrinsic::riscv_vloxseg6_mask:
1946 case Intrinsic::riscv_vloxseg7_mask:
1947 case Intrinsic::riscv_vloxseg8_mask:
1948 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1949 /*IsOrdered*/ true);
1950 return;
1951 case Intrinsic::riscv_vluxseg2_mask:
1952 case Intrinsic::riscv_vluxseg3_mask:
1953 case Intrinsic::riscv_vluxseg4_mask:
1954 case Intrinsic::riscv_vluxseg5_mask:
1955 case Intrinsic::riscv_vluxseg6_mask:
1956 case Intrinsic::riscv_vluxseg7_mask:
1957 case Intrinsic::riscv_vluxseg8_mask:
1958 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1959 /*IsOrdered*/ false);
1960 return;
1961 case Intrinsic::riscv_vlseg8ff:
1962 case Intrinsic::riscv_vlseg7ff:
1963 case Intrinsic::riscv_vlseg6ff:
1964 case Intrinsic::riscv_vlseg5ff:
1965 case Intrinsic::riscv_vlseg4ff:
1966 case Intrinsic::riscv_vlseg3ff:
1967 case Intrinsic::riscv_vlseg2ff: {
1968 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
1969 return;
1970 }
1971 case Intrinsic::riscv_vlseg8ff_mask:
1972 case Intrinsic::riscv_vlseg7ff_mask:
1973 case Intrinsic::riscv_vlseg6ff_mask:
1974 case Intrinsic::riscv_vlseg5ff_mask:
1975 case Intrinsic::riscv_vlseg4ff_mask:
1976 case Intrinsic::riscv_vlseg3ff_mask:
1977 case Intrinsic::riscv_vlseg2ff_mask: {
1978 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
1979 return;
1980 }
1981 case Intrinsic::riscv_vloxei:
1982 case Intrinsic::riscv_vloxei_mask:
1983 case Intrinsic::riscv_vluxei:
1984 case Intrinsic::riscv_vluxei_mask: {
1985 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1986 IntNo == Intrinsic::riscv_vluxei_mask;
1987 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1988 IntNo == Intrinsic::riscv_vloxei_mask;
1989
1990 MVT VT = Node->getSimpleValueType(0);
1991 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1992
1993 unsigned CurOp = 2;
1995 Operands.push_back(Node->getOperand(CurOp++));
1996
1997 MVT IndexVT;
1998 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1999 /*IsStridedOrIndexed*/ true, Operands,
2000 /*IsLoad=*/true, &IndexVT);
2001
2003 "Element count mismatch");
2004
2006 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2007 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2008 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2009 report_fatal_error("The V extension does not support EEW=64 for index "
2010 "values when XLEN=32");
2011 }
2012 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2013 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2014 static_cast<unsigned>(IndexLMUL));
2015 MachineSDNode *Load =
2016 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2017
2018 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2019 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2020
2021 ReplaceNode(Node, Load);
2022 return;
2023 }
2024 case Intrinsic::riscv_vlm:
2025 case Intrinsic::riscv_vle:
2026 case Intrinsic::riscv_vle_mask:
2027 case Intrinsic::riscv_vlse:
2028 case Intrinsic::riscv_vlse_mask: {
2029 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2030 IntNo == Intrinsic::riscv_vlse_mask;
2031 bool IsStrided =
2032 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2033
2034 MVT VT = Node->getSimpleValueType(0);
2035 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2036
2037 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2038 // operand at the IR level. In pseudos, they have both policy and
2039 // passthru operand. The passthru operand is needed to track the
2040 // "tail undefined" state, and the policy is there just for
2041 // for consistency - it will always be "don't care" for the
2042 // unmasked form.
2043 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2044 unsigned CurOp = 2;
2046 if (HasPassthruOperand)
2047 Operands.push_back(Node->getOperand(CurOp++));
2048 else {
2049 // We eagerly lower to implicit_def (instead of undef), as we
2050 // otherwise fail to select nodes such as: nxv1i1 = undef
2051 SDNode *Passthru =
2052 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2053 Operands.push_back(SDValue(Passthru, 0));
2054 }
2055 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2056 Operands, /*IsLoad=*/true);
2057
2059 const RISCV::VLEPseudo *P =
2060 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2061 static_cast<unsigned>(LMUL));
2062 MachineSDNode *Load =
2063 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2064
2065 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2066 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2067
2068 ReplaceNode(Node, Load);
2069 return;
2070 }
2071 case Intrinsic::riscv_vleff:
2072 case Intrinsic::riscv_vleff_mask: {
2073 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2074
2075 MVT VT = Node->getSimpleValueType(0);
2076 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2077
2078 unsigned CurOp = 2;
2080 Operands.push_back(Node->getOperand(CurOp++));
2081 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2082 /*IsStridedOrIndexed*/ false, Operands,
2083 /*IsLoad=*/true);
2084
2086 const RISCV::VLEPseudo *P =
2087 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2088 Log2SEW, static_cast<unsigned>(LMUL));
2090 P->Pseudo, DL, Node->getVTList(), Operands);
2091 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2092 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2093
2094 ReplaceNode(Node, Load);
2095 return;
2096 }
2097 }
2098 break;
2099 }
2100 case ISD::INTRINSIC_VOID: {
2101 unsigned IntNo = Node->getConstantOperandVal(1);
2102 switch (IntNo) {
2103 case Intrinsic::riscv_vsseg2:
2104 case Intrinsic::riscv_vsseg3:
2105 case Intrinsic::riscv_vsseg4:
2106 case Intrinsic::riscv_vsseg5:
2107 case Intrinsic::riscv_vsseg6:
2108 case Intrinsic::riscv_vsseg7:
2109 case Intrinsic::riscv_vsseg8: {
2110 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2111 /*IsStrided*/ false);
2112 return;
2113 }
2114 case Intrinsic::riscv_vsseg2_mask:
2115 case Intrinsic::riscv_vsseg3_mask:
2116 case Intrinsic::riscv_vsseg4_mask:
2117 case Intrinsic::riscv_vsseg5_mask:
2118 case Intrinsic::riscv_vsseg6_mask:
2119 case Intrinsic::riscv_vsseg7_mask:
2120 case Intrinsic::riscv_vsseg8_mask: {
2121 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2122 /*IsStrided*/ false);
2123 return;
2124 }
2125 case Intrinsic::riscv_vssseg2:
2126 case Intrinsic::riscv_vssseg3:
2127 case Intrinsic::riscv_vssseg4:
2128 case Intrinsic::riscv_vssseg5:
2129 case Intrinsic::riscv_vssseg6:
2130 case Intrinsic::riscv_vssseg7:
2131 case Intrinsic::riscv_vssseg8: {
2132 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2133 /*IsStrided*/ true);
2134 return;
2135 }
2136 case Intrinsic::riscv_vssseg2_mask:
2137 case Intrinsic::riscv_vssseg3_mask:
2138 case Intrinsic::riscv_vssseg4_mask:
2139 case Intrinsic::riscv_vssseg5_mask:
2140 case Intrinsic::riscv_vssseg6_mask:
2141 case Intrinsic::riscv_vssseg7_mask:
2142 case Intrinsic::riscv_vssseg8_mask: {
2143 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2144 /*IsStrided*/ true);
2145 return;
2146 }
2147 case Intrinsic::riscv_vsoxseg2:
2148 case Intrinsic::riscv_vsoxseg3:
2149 case Intrinsic::riscv_vsoxseg4:
2150 case Intrinsic::riscv_vsoxseg5:
2151 case Intrinsic::riscv_vsoxseg6:
2152 case Intrinsic::riscv_vsoxseg7:
2153 case Intrinsic::riscv_vsoxseg8:
2154 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2155 /*IsOrdered*/ true);
2156 return;
2157 case Intrinsic::riscv_vsuxseg2:
2158 case Intrinsic::riscv_vsuxseg3:
2159 case Intrinsic::riscv_vsuxseg4:
2160 case Intrinsic::riscv_vsuxseg5:
2161 case Intrinsic::riscv_vsuxseg6:
2162 case Intrinsic::riscv_vsuxseg7:
2163 case Intrinsic::riscv_vsuxseg8:
2164 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2165 /*IsOrdered*/ false);
2166 return;
2167 case Intrinsic::riscv_vsoxseg2_mask:
2168 case Intrinsic::riscv_vsoxseg3_mask:
2169 case Intrinsic::riscv_vsoxseg4_mask:
2170 case Intrinsic::riscv_vsoxseg5_mask:
2171 case Intrinsic::riscv_vsoxseg6_mask:
2172 case Intrinsic::riscv_vsoxseg7_mask:
2173 case Intrinsic::riscv_vsoxseg8_mask:
2174 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2175 /*IsOrdered*/ true);
2176 return;
2177 case Intrinsic::riscv_vsuxseg2_mask:
2178 case Intrinsic::riscv_vsuxseg3_mask:
2179 case Intrinsic::riscv_vsuxseg4_mask:
2180 case Intrinsic::riscv_vsuxseg5_mask:
2181 case Intrinsic::riscv_vsuxseg6_mask:
2182 case Intrinsic::riscv_vsuxseg7_mask:
2183 case Intrinsic::riscv_vsuxseg8_mask:
2184 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2185 /*IsOrdered*/ false);
2186 return;
2187 case Intrinsic::riscv_vsoxei:
2188 case Intrinsic::riscv_vsoxei_mask:
2189 case Intrinsic::riscv_vsuxei:
2190 case Intrinsic::riscv_vsuxei_mask: {
2191 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2192 IntNo == Intrinsic::riscv_vsuxei_mask;
2193 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2194 IntNo == Intrinsic::riscv_vsoxei_mask;
2195
2196 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2197 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2198
2199 unsigned CurOp = 2;
2201 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2202
2203 MVT IndexVT;
2204 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2205 /*IsStridedOrIndexed*/ true, Operands,
2206 /*IsLoad=*/false, &IndexVT);
2207
2209 "Element count mismatch");
2210
2212 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2213 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2214 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2215 report_fatal_error("The V extension does not support EEW=64 for index "
2216 "values when XLEN=32");
2217 }
2218 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2219 IsMasked, IsOrdered, IndexLog2EEW,
2220 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2221 MachineSDNode *Store =
2222 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2223
2224 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2225 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2226
2227 ReplaceNode(Node, Store);
2228 return;
2229 }
2230 case Intrinsic::riscv_vsm:
2231 case Intrinsic::riscv_vse:
2232 case Intrinsic::riscv_vse_mask:
2233 case Intrinsic::riscv_vsse:
2234 case Intrinsic::riscv_vsse_mask: {
2235 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2236 IntNo == Intrinsic::riscv_vsse_mask;
2237 bool IsStrided =
2238 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2239
2240 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2241 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2242
2243 unsigned CurOp = 2;
2245 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2246
2247 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2248 Operands);
2249
2251 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2252 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2253 MachineSDNode *Store =
2254 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2255 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2256 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2257
2258 ReplaceNode(Node, Store);
2259 return;
2260 }
2261 case Intrinsic::riscv_sf_vc_x_se:
2262 case Intrinsic::riscv_sf_vc_i_se:
2263 selectSF_VC_X_SE(Node);
2264 return;
2265 }
2266 break;
2267 }
2268 case ISD::BITCAST: {
2269 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2270 // Just drop bitcasts between vectors if both are fixed or both are
2271 // scalable.
2272 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2273 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2274 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2275 CurDAG->RemoveDeadNode(Node);
2276 return;
2277 }
2278 break;
2279 }
2282 SDValue V = Node->getOperand(0);
2283 SDValue SubV = Node->getOperand(1);
2284 SDLoc DL(SubV);
2285 auto Idx = Node->getConstantOperandVal(2);
2286 MVT SubVecVT = SubV.getSimpleValueType();
2287
2288 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2289 MVT SubVecContainerVT = SubVecVT;
2290 // Establish the correct scalable-vector types for any fixed-length type.
2291 if (SubVecVT.isFixedLengthVector()) {
2292 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2294 [[maybe_unused]] bool ExactlyVecRegSized =
2295 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2296 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2297 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2298 .getKnownMinValue()));
2299 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2300 }
2301 MVT ContainerVT = VT;
2302 if (VT.isFixedLengthVector())
2303 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2304
2305 const auto *TRI = Subtarget->getRegisterInfo();
2306 unsigned SubRegIdx;
2307 std::tie(SubRegIdx, Idx) =
2309 ContainerVT, SubVecContainerVT, Idx, TRI);
2310
2311 // If the Idx hasn't been completely eliminated then this is a subvector
2312 // insert which doesn't naturally align to a vector register. These must
2313 // be handled using instructions to manipulate the vector registers.
2314 if (Idx != 0)
2315 break;
2316
2317 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2318 [[maybe_unused]] bool IsSubVecPartReg =
2319 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2320 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2321 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2322 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2323 V.isUndef()) &&
2324 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2325 "the subvector is smaller than a full-sized register");
2326
2327 // If we haven't set a SubRegIdx, then we must be going between
2328 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2329 if (SubRegIdx == RISCV::NoSubRegister) {
2330 unsigned InRegClassID =
2333 InRegClassID &&
2334 "Unexpected subvector extraction");
2335 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2336 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2337 DL, VT, SubV, RC);
2338 ReplaceNode(Node, NewNode);
2339 return;
2340 }
2341
2342 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2343 ReplaceNode(Node, Insert.getNode());
2344 return;
2345 }
2348 SDValue V = Node->getOperand(0);
2349 auto Idx = Node->getConstantOperandVal(1);
2350 MVT InVT = V.getSimpleValueType();
2351 SDLoc DL(V);
2352
2353 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2354 MVT SubVecContainerVT = VT;
2355 // Establish the correct scalable-vector types for any fixed-length type.
2356 if (VT.isFixedLengthVector()) {
2357 assert(Idx == 0);
2358 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2359 }
2360 if (InVT.isFixedLengthVector())
2361 InVT = TLI.getContainerForFixedLengthVector(InVT);
2362
2363 const auto *TRI = Subtarget->getRegisterInfo();
2364 unsigned SubRegIdx;
2365 std::tie(SubRegIdx, Idx) =
2367 InVT, SubVecContainerVT, Idx, TRI);
2368
2369 // If the Idx hasn't been completely eliminated then this is a subvector
2370 // extract which doesn't naturally align to a vector register. These must
2371 // be handled using instructions to manipulate the vector registers.
2372 if (Idx != 0)
2373 break;
2374
2375 // If we haven't set a SubRegIdx, then we must be going between
2376 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2377 if (SubRegIdx == RISCV::NoSubRegister) {
2378 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2380 InRegClassID &&
2381 "Unexpected subvector extraction");
2382 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2383 SDNode *NewNode =
2384 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2385 ReplaceNode(Node, NewNode);
2386 return;
2387 }
2388
2389 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2390 ReplaceNode(Node, Extract.getNode());
2391 return;
2392 }
2396 case RISCVISD::VFMV_V_F_VL: {
2397 // Try to match splat of a scalar load to a strided load with stride of x0.
2398 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2399 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2400 if (!Node->getOperand(0).isUndef())
2401 break;
2402 SDValue Src = Node->getOperand(1);
2403 auto *Ld = dyn_cast<LoadSDNode>(Src);
2404 // Can't fold load update node because the second
2405 // output is used so that load update node can't be removed.
2406 if (!Ld || Ld->isIndexed())
2407 break;
2408 EVT MemVT = Ld->getMemoryVT();
2409 // The memory VT should be the same size as the element type.
2410 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2411 break;
2412 if (!IsProfitableToFold(Src, Node, Node) ||
2413 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2414 break;
2415
2416 SDValue VL;
2417 if (IsScalarMove) {
2418 // We could deal with more VL if we update the VSETVLI insert pass to
2419 // avoid introducing more VSETVLI.
2420 if (!isOneConstant(Node->getOperand(2)))
2421 break;
2422 selectVLOp(Node->getOperand(2), VL);
2423 } else
2424 selectVLOp(Node->getOperand(2), VL);
2425
2426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2427 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2428
2429 // If VL=1, then we don't need to do a strided load and can just do a
2430 // regular load.
2431 bool IsStrided = !isOneConstant(VL);
2432
2433 // Only do a strided load if we have optimized zero-stride vector load.
2434 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2435 break;
2436
2438 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2439 Ld->getBasePtr()};
2440 if (IsStrided)
2441 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2443 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2444 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2445
2447 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2448 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2449 Log2SEW, static_cast<unsigned>(LMUL));
2450 MachineSDNode *Load =
2451 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2452 // Update the chain.
2453 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2454 // Record the mem-refs
2455 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2456 // Replace the splat with the vlse.
2457 ReplaceNode(Node, Load);
2458 return;
2459 }
2460 case ISD::PREFETCH:
2461 unsigned Locality = Node->getConstantOperandVal(3);
2462 if (Locality > 2)
2463 break;
2464
2465 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2466 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2468
2469 int NontemporalLevel = 0;
2470 switch (Locality) {
2471 case 0:
2472 NontemporalLevel = 3; // NTL.ALL
2473 break;
2474 case 1:
2475 NontemporalLevel = 1; // NTL.PALL
2476 break;
2477 case 2:
2478 NontemporalLevel = 0; // NTL.P1
2479 break;
2480 default:
2481 llvm_unreachable("unexpected locality value.");
2482 }
2483
2484 if (NontemporalLevel & 0b1)
2486 if (NontemporalLevel & 0b10)
2488 }
2489 break;
2490 }
2491
2492 // Select the default instruction.
2493 SelectCode(Node);
2494}
2495
2497 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2498 std::vector<SDValue> &OutOps) {
2499 // Always produce a register and immediate operand, as expected by
2500 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2501 switch (ConstraintID) {
2504 SDValue Op0, Op1;
2505 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2506 assert(Found && "SelectAddrRegImm should always succeed");
2507 OutOps.push_back(Op0);
2508 OutOps.push_back(Op1);
2509 return false;
2510 }
2512 OutOps.push_back(Op);
2513 OutOps.push_back(
2514 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2515 return false;
2516 default:
2517 report_fatal_error("Unexpected asm memory constraint " +
2518 InlineAsm::getMemConstraintName(ConstraintID));
2519 }
2520
2521 return true;
2522}
2523
2525 SDValue &Offset) {
2526 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2527 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2528 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2529 return true;
2530 }
2531
2532 return false;
2533}
2534
2535// Fold constant addresses.
2536static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2537 const MVT VT, const RISCVSubtarget *Subtarget,
2539 bool IsPrefetch = false,
2540 bool IsRV32Zdinx = false) {
2541 if (!isa<ConstantSDNode>(Addr))
2542 return false;
2543
2544 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2545
2546 // If the constant is a simm12, we can fold the whole constant and use X0 as
2547 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2548 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2549 int64_t Lo12 = SignExtend64<12>(CVal);
2550 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2551 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2552 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2553 return false;
2554 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2555 return false;
2556
2557 if (Hi) {
2558 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2559 Base = SDValue(
2560 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2561 CurDAG->getTargetConstant(Hi20, DL, VT)),
2562 0);
2563 } else {
2564 Base = CurDAG->getRegister(RISCV::X0, VT);
2565 }
2566 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2567 return true;
2568 }
2569
2570 // Ask how constant materialization would handle this constant.
2571 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2572
2573 // If the last instruction would be an ADDI, we can fold its immediate and
2574 // emit the rest of the sequence as the base.
2575 if (Seq.back().getOpcode() != RISCV::ADDI)
2576 return false;
2577 Lo12 = Seq.back().getImm();
2578 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2579 return false;
2580 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2581 return false;
2582
2583 // Drop the last instruction.
2584 Seq.pop_back();
2585 assert(!Seq.empty() && "Expected more instructions in sequence");
2586
2587 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2588 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2589 return true;
2590}
2591
2592// Is this ADD instruction only used as the base pointer of scalar loads and
2593// stores?
2595 for (auto *User : Add->users()) {
2596 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2597 User->getOpcode() != ISD::ATOMIC_LOAD &&
2598 User->getOpcode() != ISD::ATOMIC_STORE)
2599 return false;
2600 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2601 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2602 VT != MVT::f64)
2603 return false;
2604 // Don't allow stores of the value. It must be used as the address.
2605 if (User->getOpcode() == ISD::STORE &&
2606 cast<StoreSDNode>(User)->getValue() == Add)
2607 return false;
2608 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2609 cast<AtomicSDNode>(User)->getVal() == Add)
2610 return false;
2611 }
2612
2613 return true;
2614}
2615
2617 unsigned MaxShiftAmount,
2618 SDValue &Base, SDValue &Index,
2619 SDValue &Scale) {
2620 EVT VT = Addr.getSimpleValueType();
2621 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2622 SDValue &Shift) {
2623 uint64_t ShiftAmt = 0;
2624 Index = N;
2625
2626 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2627 // Only match shifts by a value in range [0, MaxShiftAmount].
2628 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2629 Index = N.getOperand(0);
2630 ShiftAmt = N.getConstantOperandVal(1);
2631 }
2632 }
2633
2634 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2635 return ShiftAmt != 0;
2636 };
2637
2638 if (Addr.getOpcode() == ISD::ADD) {
2639 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2640 SDValue AddrB = Addr.getOperand(0);
2641 if (AddrB.getOpcode() == ISD::ADD &&
2642 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2643 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2644 isInt<12>(C1->getSExtValue())) {
2645 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2646 SDValue C1Val =
2647 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2648 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2649 AddrB.getOperand(1), C1Val),
2650 0);
2651 return true;
2652 }
2653 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2654 Base = Addr.getOperand(1);
2655 return true;
2656 } else {
2657 UnwrapShl(Addr.getOperand(1), Index, Scale);
2658 Base = Addr.getOperand(0);
2659 return true;
2660 }
2661 } else if (UnwrapShl(Addr, Index, Scale)) {
2662 EVT VT = Addr.getValueType();
2663 Base = CurDAG->getRegister(RISCV::X0, VT);
2664 return true;
2665 }
2666
2667 return false;
2668}
2669
2671 SDValue &Offset, bool IsRV32Zdinx) {
2673 return true;
2674
2675 SDLoc DL(Addr);
2676 MVT VT = Addr.getSimpleValueType();
2677
2678 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2679 // If this is non RV32Zdinx we can always fold.
2680 if (!IsRV32Zdinx) {
2681 Base = Addr.getOperand(0);
2682 Offset = Addr.getOperand(1);
2683 return true;
2684 }
2685
2686 // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2687 // to the offset when we expand in RISCVExpandPseudoInsts.
2688 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2689 const DataLayout &DL = CurDAG->getDataLayout();
2690 Align Alignment = commonAlignment(
2691 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2692 if (Alignment > 4) {
2693 Base = Addr.getOperand(0);
2694 Offset = Addr.getOperand(1);
2695 return true;
2696 }
2697 }
2698 if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2699 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2700 if (Alignment > 4) {
2701 Base = Addr.getOperand(0);
2702 Offset = Addr.getOperand(1);
2703 return true;
2704 }
2705 }
2706 }
2707
2708 int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
2710 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2711 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2712 Base = Addr.getOperand(0);
2713 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2714 SDValue LoOperand = Base.getOperand(1);
2715 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2716 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2717 // (its low part, really), then we can rely on the alignment of that
2718 // variable to provide a margin of safety before low part can overflow
2719 // the 12 bits of the load/store offset. Check if CVal falls within
2720 // that margin; if so (low part + CVal) can't overflow.
2721 const DataLayout &DL = CurDAG->getDataLayout();
2722 Align Alignment = commonAlignment(
2723 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2724 if ((CVal == 0 || Alignment > CVal) &&
2725 (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2726 int64_t CombinedOffset = CVal + GA->getOffset();
2727 Base = Base.getOperand(0);
2729 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2730 CombinedOffset, GA->getTargetFlags());
2731 return true;
2732 }
2733 }
2734 }
2735
2736 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2737 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2739 return true;
2740 }
2741 }
2742
2743 // Handle ADD with large immediates.
2744 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2745 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2746 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2747 "simm12 not already handled?");
2748
2749 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2750 // an ADDI for part of the offset and fold the rest into the load/store.
2751 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2752 if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2753 int64_t Adj = CVal < 0 ? -2048 : 2047;
2754 Base = SDValue(
2755 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2756 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2757 0);
2758 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2759 return true;
2760 }
2761
2762 // For larger immediates, we might be able to save one instruction from
2763 // constant materialization by folding the Lo12 bits of the immediate into
2764 // the address. We should only do this if the ADD is only used by loads and
2765 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2766 // separately with the full materialized immediate creating extra
2767 // instructions.
2768 if (isWorthFoldingAdd(Addr) &&
2769 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2770 Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2771 // Insert an ADD instruction with the materialized Hi52 bits.
2772 Base = SDValue(
2773 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2774 0);
2775 return true;
2776 }
2777 }
2778
2779 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2780 /*IsPrefetch=*/false, RV32ZdinxRange))
2781 return true;
2782
2783 Base = Addr;
2784 Offset = CurDAG->getTargetConstant(0, DL, VT);
2785 return true;
2786}
2787
2788/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2789/// Offset should be all zeros.
2791 SDValue &Offset) {
2793 return true;
2794
2795 SDLoc DL(Addr);
2796 MVT VT = Addr.getSimpleValueType();
2797
2799 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2800 if (isInt<12>(CVal)) {
2801 Base = Addr.getOperand(0);
2802
2803 // Early-out if not a valid offset.
2804 if ((CVal & 0b11111) != 0) {
2805 Base = Addr;
2806 Offset = CurDAG->getTargetConstant(0, DL, VT);
2807 return true;
2808 }
2809
2810 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2811 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2813 return true;
2814 }
2815 }
2816
2817 // Handle ADD with large immediates.
2818 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2819 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2820 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2821 "simm12 not already handled?");
2822
2823 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2824 // one instruction by folding adjustment (-2048 or 2016) into the address.
2825 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2826 int64_t Adj = CVal < 0 ? -2048 : 2016;
2827 int64_t AdjustedOffset = CVal - Adj;
2828 Base =
2830 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2831 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
2832 0);
2834 return true;
2835 }
2836
2837 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2838 Offset, /*IsPrefetch=*/true)) {
2839 // Insert an ADD instruction with the materialized Hi52 bits.
2840 Base = SDValue(
2841 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2842 0);
2843 return true;
2844 }
2845 }
2846
2847 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2848 /*IsPrefetch=*/true))
2849 return true;
2850
2851 Base = Addr;
2852 Offset = CurDAG->getTargetConstant(0, DL, VT);
2853 return true;
2854}
2855
2857 SDValue &Offset) {
2858 if (Addr.getOpcode() != ISD::ADD)
2859 return false;
2860
2861 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2862 return false;
2863
2864 Base = Addr.getOperand(1);
2865 Offset = Addr.getOperand(0);
2866 return true;
2867}
2868
2870 SDValue &ShAmt) {
2871 ShAmt = N;
2872
2873 // Peek through zext.
2874 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2875 ShAmt = ShAmt.getOperand(0);
2876
2877 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2878 // amount. If there is an AND on the shift amount, we can bypass it if it
2879 // doesn't affect any of those bits.
2880 if (ShAmt.getOpcode() == ISD::AND &&
2881 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2882 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2883
2884 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2885 // mask that covers the bits needed to represent all shift amounts.
2886 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2887 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2888
2889 if (ShMask.isSubsetOf(AndMask)) {
2890 ShAmt = ShAmt.getOperand(0);
2891 } else {
2892 // SimplifyDemandedBits may have optimized the mask so try restoring any
2893 // bits that are known zero.
2894 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2895 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2896 return true;
2897 ShAmt = ShAmt.getOperand(0);
2898 }
2899 }
2900
2901 if (ShAmt.getOpcode() == ISD::ADD &&
2902 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2903 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2904 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2905 // to avoid the ADD.
2906 if (Imm != 0 && Imm % ShiftWidth == 0) {
2907 ShAmt = ShAmt.getOperand(0);
2908 return true;
2909 }
2910 } else if (ShAmt.getOpcode() == ISD::SUB &&
2911 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2912 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2913 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2914 // generate a NEG instead of a SUB of a constant.
2915 if (Imm != 0 && Imm % ShiftWidth == 0) {
2916 SDLoc DL(ShAmt);
2917 EVT VT = ShAmt.getValueType();
2918 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2919 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2920 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2921 ShAmt.getOperand(1));
2922 ShAmt = SDValue(Neg, 0);
2923 return true;
2924 }
2925 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2926 // to generate a NOT instead of a SUB of a constant.
2927 if (Imm % ShiftWidth == ShiftWidth - 1) {
2928 SDLoc DL(ShAmt);
2929 EVT VT = ShAmt.getValueType();
2931 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2932 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2933 ShAmt = SDValue(Not, 0);
2934 return true;
2935 }
2936 }
2937
2938 return true;
2939}
2940
2941/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2942/// check for equality with 0. This function emits instructions that convert the
2943/// seteq/setne into something that can be compared with 0.
2944/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2945/// ISD::SETNE).
2947 SDValue &Val) {
2948 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2949 "Unexpected condition code!");
2950
2951 // We're looking for a setcc.
2952 if (N->getOpcode() != ISD::SETCC)
2953 return false;
2954
2955 // Must be an equality comparison.
2956 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2957 if (CCVal != ExpectedCCVal)
2958 return false;
2959
2960 SDValue LHS = N->getOperand(0);
2961 SDValue RHS = N->getOperand(1);
2962
2963 if (!LHS.getValueType().isScalarInteger())
2964 return false;
2965
2966 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2967 if (isNullConstant(RHS)) {
2968 Val = LHS;
2969 return true;
2970 }
2971
2972 SDLoc DL(N);
2973
2974 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2975 int64_t CVal = C->getSExtValue();
2976 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2977 // non-zero otherwise.
2978 if (CVal == -2048) {
2979 Val = SDValue(
2981 RISCV::XORI, DL, N->getValueType(0), LHS,
2982 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
2983 0);
2984 return true;
2985 }
2986 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2987 // LHS is equal to the RHS and non-zero otherwise.
2988 if (isInt<12>(CVal) || CVal == 2048) {
2989 Val = SDValue(
2991 RISCV::ADDI, DL, N->getValueType(0), LHS,
2992 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
2993 0);
2994 return true;
2995 }
2996 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
2997 Val = SDValue(
2999 RISCV::BINVI, DL, N->getValueType(0), LHS,
3000 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3001 0);
3002 return true;
3003 }
3004 }
3005
3006 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3007 // equal and a non-zero value if they aren't.
3008 Val = SDValue(
3009 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3010 return true;
3011}
3012
3014 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3015 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3016 Val = N.getOperand(0);
3017 return true;
3018 }
3019
3020 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3021 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3022 return N;
3023
3024 SDValue N0 = N.getOperand(0);
3025 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3026 N.getConstantOperandVal(1) == ShiftAmt &&
3027 N0.getConstantOperandVal(1) == ShiftAmt)
3028 return N0.getOperand(0);
3029
3030 return N;
3031 };
3032
3033 MVT VT = N.getSimpleValueType();
3034 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3035 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3036 return true;
3037 }
3038
3039 return false;
3040}
3041
3043 if (N.getOpcode() == ISD::AND) {
3044 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3045 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3046 Val = N.getOperand(0);
3047 return true;
3048 }
3049 }
3050 MVT VT = N.getSimpleValueType();
3051 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3052 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3053 Val = N;
3054 return true;
3055 }
3056
3057 return false;
3058}
3059
3060/// Look for various patterns that can be done with a SHL that can be folded
3061/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3062/// SHXADD we are trying to match.
3064 SDValue &Val) {
3065 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3066 SDValue N0 = N.getOperand(0);
3067
3068 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3069 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3070 isa<ConstantSDNode>(N0.getOperand(1))) {
3071 uint64_t Mask = N.getConstantOperandVal(1);
3072 unsigned C2 = N0.getConstantOperandVal(1);
3073
3074 unsigned XLen = Subtarget->getXLen();
3075 if (LeftShift)
3076 Mask &= maskTrailingZeros<uint64_t>(C2);
3077 else
3078 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3079
3080 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3081 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3082 // followed by a SHXADD with c3 for the X amount.
3083 if (isShiftedMask_64(Mask)) {
3084 unsigned Leading = XLen - llvm::bit_width(Mask);
3085 unsigned Trailing = llvm::countr_zero(Mask);
3086 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3087 SDLoc DL(N);
3088 EVT VT = N.getValueType();
3090 RISCV::SRLI, DL, VT, N0.getOperand(0),
3091 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3092 0);
3093 return true;
3094 }
3095 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3096 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3097 // followed by a SHXADD using c3 for the X amount.
3098 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3099 SDLoc DL(N);
3100 EVT VT = N.getValueType();
3101 Val = SDValue(
3103 RISCV::SRLI, DL, VT, N0.getOperand(0),
3104 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3105 0);
3106 return true;
3107 }
3108 }
3109 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3110 isa<ConstantSDNode>(N0.getOperand(1))) {
3111 uint64_t Mask = N.getConstantOperandVal(1);
3112 unsigned C2 = N0.getConstantOperandVal(1);
3113
3114 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3115 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3116 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3117 // the X amount.
3118 if (isShiftedMask_64(Mask)) {
3119 unsigned XLen = Subtarget->getXLen();
3120 unsigned Leading = XLen - llvm::bit_width(Mask);
3121 unsigned Trailing = llvm::countr_zero(Mask);
3122 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3123 SDLoc DL(N);
3124 EVT VT = N.getValueType();
3126 RISCV::SRAI, DL, VT, N0.getOperand(0),
3127 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3128 0);
3130 RISCV::SRLI, DL, VT, Val,
3131 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3132 0);
3133 return true;
3134 }
3135 }
3136 }
3137 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3138 (LeftShift || N.getOpcode() == ISD::SRL) &&
3139 isa<ConstantSDNode>(N.getOperand(1))) {
3140 SDValue N0 = N.getOperand(0);
3141 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3142 isa<ConstantSDNode>(N0.getOperand(1))) {
3143 uint64_t Mask = N0.getConstantOperandVal(1);
3144 if (isShiftedMask_64(Mask)) {
3145 unsigned C1 = N.getConstantOperandVal(1);
3146 unsigned XLen = Subtarget->getXLen();
3147 unsigned Leading = XLen - llvm::bit_width(Mask);
3148 unsigned Trailing = llvm::countr_zero(Mask);
3149 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3150 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3151 if (LeftShift && Leading == 32 && Trailing > 0 &&
3152 (Trailing + C1) == ShAmt) {
3153 SDLoc DL(N);
3154 EVT VT = N.getValueType();
3156 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3157 CurDAG->getTargetConstant(Trailing, DL, VT)),
3158 0);
3159 return true;
3160 }
3161 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3162 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3163 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3164 (Trailing - C1) == ShAmt) {
3165 SDLoc DL(N);
3166 EVT VT = N.getValueType();
3168 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3169 CurDAG->getTargetConstant(Trailing, DL, VT)),
3170 0);
3171 return true;
3172 }
3173 }
3174 }
3175 }
3176
3177 return false;
3178}
3179
3180/// Look for various patterns that can be done with a SHL that can be folded
3181/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3182/// SHXADD_UW we are trying to match.
3184 SDValue &Val) {
3185 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3186 N.hasOneUse()) {
3187 SDValue N0 = N.getOperand(0);
3188 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3189 N0.hasOneUse()) {
3190 uint64_t Mask = N.getConstantOperandVal(1);
3191 unsigned C2 = N0.getConstantOperandVal(1);
3192
3193 Mask &= maskTrailingZeros<uint64_t>(C2);
3194
3195 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3196 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3197 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3198 if (isShiftedMask_64(Mask)) {
3199 unsigned Leading = llvm::countl_zero(Mask);
3200 unsigned Trailing = llvm::countr_zero(Mask);
3201 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3202 SDLoc DL(N);
3203 EVT VT = N.getValueType();
3205 RISCV::SLLI, DL, VT, N0.getOperand(0),
3206 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3207 0);
3208 return true;
3209 }
3210 }
3211 }
3212 }
3213
3214 return false;
3215}
3216
3218 if (!isa<ConstantSDNode>(N))
3219 return false;
3220 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3221
3222 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3223 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3224 return false;
3225
3226 // Abandon this transform if the constant is needed elsewhere.
3227 for (const SDNode *U : N->users()) {
3228 switch (U->getOpcode()) {
3229 case ISD::AND:
3230 case ISD::OR:
3231 case ISD::XOR:
3232 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3233 return false;
3234 break;
3236 if (!Subtarget->hasStdExtZvkb())
3237 return false;
3238 if (!all_of(U->users(), [](const SDNode *V) {
3239 return V->getOpcode() == ISD::AND ||
3240 V->getOpcode() == RISCVISD::AND_VL;
3241 }))
3242 return false;
3243 break;
3244 default:
3245 return false;
3246 }
3247 }
3248
3249 // For 64-bit constants, the instruction sequences get complex,
3250 // so we select inverted only if it's cheaper.
3251 if (!isInt<32>(Imm)) {
3252 int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3253 /*CompressionCost=*/true);
3254 int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget,
3255 /*CompressionCost=*/true);
3256 if (OrigImmCost <= NegImmCost)
3257 return false;
3258 }
3259
3260 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3261 return true;
3262}
3263
3264static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3265 unsigned Bits,
3266 const TargetInstrInfo *TII) {
3267 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3268
3269 if (!MCOpcode)
3270 return false;
3271
3272 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3273 const uint64_t TSFlags = MCID.TSFlags;
3274 if (!RISCVII::hasSEWOp(TSFlags))
3275 return false;
3276 assert(RISCVII::hasVLOp(TSFlags));
3277
3278 unsigned ChainOpIdx = User->getNumOperands() - 1;
3279 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3280 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3281 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3282 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3283
3284 if (UserOpNo == VLIdx)
3285 return false;
3286
3287 auto NumDemandedBits =
3288 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3289 return NumDemandedBits && Bits >= *NumDemandedBits;
3290}
3291
3292// Return true if all users of this SDNode* only consume the lower \p Bits.
3293// This can be used to form W instructions for add/sub/mul/shl even when the
3294// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3295// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3296// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3297// the add/sub/mul/shl to become non-W instructions. By checking the users we
3298// may be able to use a W instruction and CSE with the other instruction if
3299// this has happened. We could try to detect that the CSE opportunity exists
3300// before doing this, but that would be more complicated.
3302 const unsigned Depth) const {
3303 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3304 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3305 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3306 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3307 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3308 isa<ConstantSDNode>(Node) || Depth != 0) &&
3309 "Unexpected opcode");
3310
3312 return false;
3313
3314 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3315 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3316 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3317 return false;
3318
3319 for (SDUse &Use : Node->uses()) {
3320 SDNode *User = Use.getUser();
3321 // Users of this node should have already been instruction selected
3322 if (!User->isMachineOpcode())
3323 return false;
3324
3325 // TODO: Add more opcodes?
3326 switch (User->getMachineOpcode()) {
3327 default:
3329 break;
3330 return false;
3331 case RISCV::ADDW:
3332 case RISCV::ADDIW:
3333 case RISCV::SUBW:
3334 case RISCV::MULW:
3335 case RISCV::SLLW:
3336 case RISCV::SLLIW:
3337 case RISCV::SRAW:
3338 case RISCV::SRAIW:
3339 case RISCV::SRLW:
3340 case RISCV::SRLIW:
3341 case RISCV::DIVW:
3342 case RISCV::DIVUW:
3343 case RISCV::REMW:
3344 case RISCV::REMUW:
3345 case RISCV::ROLW:
3346 case RISCV::RORW:
3347 case RISCV::RORIW:
3348 case RISCV::CLZW:
3349 case RISCV::CTZW:
3350 case RISCV::CPOPW:
3351 case RISCV::SLLI_UW:
3352 case RISCV::FMV_W_X:
3353 case RISCV::FCVT_H_W:
3354 case RISCV::FCVT_H_W_INX:
3355 case RISCV::FCVT_H_WU:
3356 case RISCV::FCVT_H_WU_INX:
3357 case RISCV::FCVT_S_W:
3358 case RISCV::FCVT_S_W_INX:
3359 case RISCV::FCVT_S_WU:
3360 case RISCV::FCVT_S_WU_INX:
3361 case RISCV::FCVT_D_W:
3362 case RISCV::FCVT_D_W_INX:
3363 case RISCV::FCVT_D_WU:
3364 case RISCV::FCVT_D_WU_INX:
3365 case RISCV::TH_REVW:
3366 case RISCV::TH_SRRIW:
3367 if (Bits >= 32)
3368 break;
3369 return false;
3370 case RISCV::SLL:
3371 case RISCV::SRA:
3372 case RISCV::SRL:
3373 case RISCV::ROL:
3374 case RISCV::ROR:
3375 case RISCV::BSET:
3376 case RISCV::BCLR:
3377 case RISCV::BINV:
3378 // Shift amount operands only use log2(Xlen) bits.
3379 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3380 break;
3381 return false;
3382 case RISCV::SLLI:
3383 // SLLI only uses the lower (XLen - ShAmt) bits.
3384 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3385 break;
3386 return false;
3387 case RISCV::ANDI:
3388 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3389 break;
3390 goto RecCheck;
3391 case RISCV::ORI: {
3392 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3393 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3394 break;
3395 [[fallthrough]];
3396 }
3397 case RISCV::AND:
3398 case RISCV::OR:
3399 case RISCV::XOR:
3400 case RISCV::XORI:
3401 case RISCV::ANDN:
3402 case RISCV::ORN:
3403 case RISCV::XNOR:
3404 case RISCV::SH1ADD:
3405 case RISCV::SH2ADD:
3406 case RISCV::SH3ADD:
3407 RecCheck:
3408 if (hasAllNBitUsers(User, Bits, Depth + 1))
3409 break;
3410 return false;
3411 case RISCV::SRLI: {
3412 unsigned ShAmt = User->getConstantOperandVal(1);
3413 // If we are shifting right by less than Bits, and users don't demand any
3414 // bits that were shifted into [Bits-1:0], then we can consider this as an
3415 // N-Bit user.
3416 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3417 break;
3418 return false;
3419 }
3420 case RISCV::SEXT_B:
3421 case RISCV::PACKH:
3422 if (Bits >= 8)
3423 break;
3424 return false;
3425 case RISCV::SEXT_H:
3426 case RISCV::FMV_H_X:
3427 case RISCV::ZEXT_H_RV32:
3428 case RISCV::ZEXT_H_RV64:
3429 case RISCV::PACKW:
3430 if (Bits >= 16)
3431 break;
3432 return false;
3433 case RISCV::PACK:
3434 if (Bits >= (Subtarget->getXLen() / 2))
3435 break;
3436 return false;
3437 case RISCV::ADD_UW:
3438 case RISCV::SH1ADD_UW:
3439 case RISCV::SH2ADD_UW:
3440 case RISCV::SH3ADD_UW:
3441 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3442 // 32 bits.
3443 if (Use.getOperandNo() == 0 && Bits >= 32)
3444 break;
3445 return false;
3446 case RISCV::SB:
3447 if (Use.getOperandNo() == 0 && Bits >= 8)
3448 break;
3449 return false;
3450 case RISCV::SH:
3451 if (Use.getOperandNo() == 0 && Bits >= 16)
3452 break;
3453 return false;
3454 case RISCV::SW:
3455 if (Use.getOperandNo() == 0 && Bits >= 32)
3456 break;
3457 return false;
3458 }
3459 }
3460
3461 return true;
3462}
3463
3464// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3466 SDValue &Shl2) {
3467 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3468 int64_t Offset = C->getSExtValue();
3469 unsigned Shift;
3470 for (Shift = 0; Shift < 4; Shift++)
3471 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3472 break;
3473
3474 // Constant cannot be encoded.
3475 if (Shift == 4)
3476 return false;
3477
3478 EVT Ty = N->getValueType(0);
3479 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3480 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3481 return true;
3482 }
3483
3484 return false;
3485}
3486
3487// Select VL as a 5 bit immediate or a value that will become a register. This
3488// allows us to choose between VSETIVLI or VSETVLI later.
3490 auto *C = dyn_cast<ConstantSDNode>(N);
3491 if (C && isUInt<5>(C->getZExtValue())) {
3492 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3493 N->getValueType(0));
3494 } else if (C && C->isAllOnes()) {
3495 // Treat all ones as VLMax.
3497 N->getValueType(0));
3498 } else if (isa<RegisterSDNode>(N) &&
3499 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3500 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3501 // as the register class. Convert X0 to a special immediate to pass the
3502 // MachineVerifier. This is recognized specially by the vsetvli insertion
3503 // pass.
3505 N->getValueType(0));
3506 } else {
3507 VL = N;
3508 }
3509
3510 return true;
3511}
3512
3514 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3515 if (!N.getOperand(0).isUndef())
3516 return SDValue();
3517 N = N.getOperand(1);
3518 }
3519 SDValue Splat = N;
3520 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3521 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3522 !Splat.getOperand(0).isUndef())
3523 return SDValue();
3524 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3525 return Splat;
3526}
3527
3530 if (!Splat)
3531 return false;
3532
3533 SplatVal = Splat.getOperand(1);
3534 return true;
3535}
3536
3538 SelectionDAG &DAG,
3539 const RISCVSubtarget &Subtarget,
3540 std::function<bool(int64_t)> ValidateImm,
3541 bool Decrement = false) {
3543 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3544 return false;
3545
3546 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3547 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3548 "Unexpected splat operand type");
3549
3550 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3551 // type is wider than the resulting vector element type: an implicit
3552 // truncation first takes place. Therefore, perform a manual
3553 // truncation/sign-extension in order to ignore any truncated bits and catch
3554 // any zero-extended immediate.
3555 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3556 // sign-extending to (XLenVT -1).
3557 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3558
3559 int64_t SplatImm = SplatConst.getSExtValue();
3560
3561 if (!ValidateImm(SplatImm))
3562 return false;
3563
3564 if (Decrement)
3565 SplatImm -= 1;
3566
3567 SplatVal =
3568 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3569 return true;
3570}
3571
3573 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3574 [](int64_t Imm) { return isInt<5>(Imm); });
3575}
3576
3578 return selectVSplatImmHelper(
3579 N, SplatVal, *CurDAG, *Subtarget,
3580 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
3581 /*Decrement=*/true);
3582}
3583
3585 SDValue &SplatVal) {
3586 return selectVSplatImmHelper(
3587 N, SplatVal, *CurDAG, *Subtarget,
3588 [](int64_t Imm) {
3589 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3590 },
3591 /*Decrement=*/true);
3592}
3593
3595 SDValue &SplatVal) {
3596 return selectVSplatImmHelper(
3597 N, SplatVal, *CurDAG, *Subtarget,
3598 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3599}
3600
3602 auto IsExtOrTrunc = [](SDValue N) {
3603 switch (N->getOpcode()) {
3604 case ISD::SIGN_EXTEND:
3605 case ISD::ZERO_EXTEND:
3606 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3607 // inactive elements will be undef.
3609 case RISCVISD::VSEXT_VL:
3610 case RISCVISD::VZEXT_VL:
3611 return true;
3612 default:
3613 return false;
3614 }
3615 };
3616
3617 // We can have multiple nested nodes, so unravel them all if needed.
3618 while (IsExtOrTrunc(N)) {
3619 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3620 return false;
3621 N = N->getOperand(0);
3622 }
3623
3624 return selectVSplat(N, SplatVal);
3625}
3626
3628 // Allow bitcasts from XLenVT -> FP.
3629 if (N.getOpcode() == ISD::BITCAST &&
3630 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3631 Imm = N.getOperand(0);
3632 return true;
3633 }
3634 // Allow moves from XLenVT to FP.
3635 if (N.getOpcode() == RISCVISD::FMV_H_X ||
3636 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3637 Imm = N.getOperand(0);
3638 return true;
3639 }
3640
3641 // Otherwise, look for FP constants that can materialized with scalar int.
3642 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3643 if (!CFP)
3644 return false;
3645 const APFloat &APF = CFP->getValueAPF();
3646 // td can handle +0.0 already.
3647 if (APF.isPosZero())
3648 return false;
3649
3650 MVT VT = CFP->getSimpleValueType(0);
3651
3652 MVT XLenVT = Subtarget->getXLenVT();
3653 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3654 assert(APF.isNegZero() && "Unexpected constant.");
3655 return false;
3656 }
3657 SDLoc DL(N);
3658 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3659 *Subtarget);
3660 return true;
3661}
3662
3664 SDValue &Imm) {
3665 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3666 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3667
3668 if (!isInt<5>(ImmVal))
3669 return false;
3670
3671 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3672 Subtarget->getXLenVT());
3673 return true;
3674 }
3675
3676 return false;
3677}
3678
3679// Try to remove sext.w if the input is a W instruction or can be made into
3680// a W instruction cheaply.
3681bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3682 // Look for the sext.w pattern, addiw rd, rs1, 0.
3683 if (N->getMachineOpcode() != RISCV::ADDIW ||
3684 !isNullConstant(N->getOperand(1)))
3685 return false;
3686
3687 SDValue N0 = N->getOperand(0);
3688 if (!N0.isMachineOpcode())
3689 return false;
3690
3691 switch (N0.getMachineOpcode()) {
3692 default:
3693 break;
3694 case RISCV::ADD:
3695 case RISCV::ADDI:
3696 case RISCV::SUB:
3697 case RISCV::MUL:
3698 case RISCV::SLLI: {
3699 // Convert sext.w+add/sub/mul to their W instructions. This will create
3700 // a new independent instruction. This improves latency.
3701 unsigned Opc;
3702 switch (N0.getMachineOpcode()) {
3703 default:
3704 llvm_unreachable("Unexpected opcode!");
3705 case RISCV::ADD: Opc = RISCV::ADDW; break;
3706 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3707 case RISCV::SUB: Opc = RISCV::SUBW; break;
3708 case RISCV::MUL: Opc = RISCV::MULW; break;
3709 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3710 }
3711
3712 SDValue N00 = N0.getOperand(0);
3713 SDValue N01 = N0.getOperand(1);
3714
3715 // Shift amount needs to be uimm5.
3716 if (N0.getMachineOpcode() == RISCV::SLLI &&
3717 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3718 break;
3719
3720 SDNode *Result =
3721 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3722 N00, N01);
3723 ReplaceUses(N, Result);
3724 return true;
3725 }
3726 case RISCV::ADDW:
3727 case RISCV::ADDIW:
3728 case RISCV::SUBW:
3729 case RISCV::MULW:
3730 case RISCV::SLLIW:
3731 case RISCV::PACKW:
3732 case RISCV::TH_MULAW:
3733 case RISCV::TH_MULAH:
3734 case RISCV::TH_MULSW:
3735 case RISCV::TH_MULSH:
3736 if (N0.getValueType() == MVT::i32)
3737 break;
3738
3739 // Result is already sign extended just remove the sext.w.
3740 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3741 ReplaceUses(N, N0.getNode());
3742 return true;
3743 }
3744
3745 return false;
3746}
3747
3748static bool usesAllOnesMask(SDValue MaskOp) {
3749 const auto IsVMSet = [](unsigned Opc) {
3750 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3751 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3752 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3753 Opc == RISCV::PseudoVMSET_M_B8;
3754 };
3755
3756 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3757 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3758 // assume that it's all-ones? Same applies to its VL.
3759 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
3760}
3761
3762static bool isImplicitDef(SDValue V) {
3763 if (!V.isMachineOpcode())
3764 return false;
3765 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3766 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3767 if (!isImplicitDef(V.getOperand(I)))
3768 return false;
3769 return true;
3770 }
3771 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3772}
3773
3774// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3775// corresponding "unmasked" pseudo versions.
3776bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3778 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3779 if (!I)
3780 return false;
3781
3782 unsigned MaskOpIdx = I->MaskOpIdx;
3783 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
3784 return false;
3785
3786 // There are two classes of pseudos in the table - compares and
3787 // everything else. See the comment on RISCVMaskedPseudo for details.
3788 const unsigned Opc = I->UnmaskedPseudo;
3789 const MCInstrDesc &MCID = TII->get(Opc);
3790 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
3791
3792 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3793 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
3794
3797 "Masked and unmasked pseudos are inconsistent");
3798 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
3799 "Unexpected pseudo structure");
3800 assert(!(HasPassthru && !MaskedHasPassthru) &&
3801 "Unmasked pseudo has passthru but masked pseudo doesn't?");
3802
3804 // Skip the passthru operand at index 0 if the unmasked don't have one.
3805 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
3806 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3807 // Skip the mask
3808 SDValue Op = N->getOperand(I);
3809 if (I == MaskOpIdx)
3810 continue;
3811 Ops.push_back(Op);
3812 }
3813
3815 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3816
3817 if (!N->memoperands_empty())
3818 CurDAG->setNodeMemRefs(Result, N->memoperands());
3819
3820 Result->setFlags(N->getFlags());
3821 ReplaceUses(N, Result);
3822
3823 return true;
3824}
3825
3826static bool IsVMerge(SDNode *N) {
3827 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3828}
3829
3830// Try to fold away VMERGE_VVM instructions into their true operands:
3831//
3832// %true = PseudoVADD_VV ...
3833// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3834// ->
3835// %x = PseudoVADD_VV_MASK %false, ..., %mask
3836//
3837// We can only fold if vmerge's passthru operand, vmerge's false operand and
3838// %true's passthru operand (if it has one) are the same. This is because we
3839// have to consolidate them into one passthru operand in the result.
3840//
3841// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3842// mask is all ones.
3843//
3844// The resulting VL is the minimum of the two VLs.
3845//
3846// The resulting policy is the effective policy the vmerge would have had,
3847// i.e. whether or not it's passthru operand was implicit-def.
3848bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3849 SDValue Passthru, False, True, VL, Mask;
3850 assert(IsVMerge(N));
3851 Passthru = N->getOperand(0);
3852 False = N->getOperand(1);
3853 True = N->getOperand(2);
3854 Mask = N->getOperand(3);
3855 VL = N->getOperand(4);
3856
3857 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3858 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3859 return false;
3860
3861 // We require that either passthru and false are the same, or that passthru
3862 // is undefined.
3863 if (Passthru != False && !isImplicitDef(Passthru))
3864 return false;
3865
3866 assert(True.getResNo() == 0 &&
3867 "Expect True is the first output of an instruction.");
3868
3869 // Need N is the exactly one using True.
3870 if (!True.hasOneUse())
3871 return false;
3872
3873 if (!True.isMachineOpcode())
3874 return false;
3875
3876 unsigned TrueOpc = True.getMachineOpcode();
3877 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3878 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3879 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3880
3882 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3883 if (!Info)
3884 return false;
3885
3886 // If True has a passthru operand then it needs to be the same as vmerge's
3887 // False, since False will be used for the result's passthru operand.
3888 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3889 SDValue PassthruOpTrue = True->getOperand(0);
3890 if (False != PassthruOpTrue)
3891 return false;
3892 }
3893
3894 // Skip if True has side effect.
3895 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3896 return false;
3897
3898 unsigned TrueChainOpIdx = True.getNumOperands() - 1;
3899 bool HasChainOp =
3900 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3901
3902 if (HasChainOp) {
3903 // Avoid creating cycles in the DAG. We must ensure that none of the other
3904 // operands depend on True through it's Chain.
3905 SmallVector<const SDNode *, 4> LoopWorklist;
3907 LoopWorklist.push_back(False.getNode());
3908 LoopWorklist.push_back(Mask.getNode());
3909 LoopWorklist.push_back(VL.getNode());
3910 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3911 return false;
3912 }
3913
3914 // The vector policy operand may be present for masked intrinsics
3915 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3916 unsigned TrueVLIndex =
3917 True.getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3918 SDValue TrueVL = True.getOperand(TrueVLIndex);
3919 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3920
3921 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3922 if (LHS == RHS)
3923 return LHS;
3924 if (isAllOnesConstant(LHS))
3925 return RHS;
3926 if (isAllOnesConstant(RHS))
3927 return LHS;
3928 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3929 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3930 if (!CLHS || !CRHS)
3931 return SDValue();
3932 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3933 };
3934
3935 // Because N and True must have the same passthru operand (or True's operand
3936 // is implicit_def), the "effective" body is the minimum of their VLs.
3937 SDValue OrigVL = VL;
3938 VL = GetMinVL(TrueVL, VL);
3939 if (!VL)
3940 return false;
3941
3942 // Some operations produce different elementwise results depending on the
3943 // active elements, like viota.m or vredsum. This transformation is illegal
3944 // for these if we change the active elements (i.e. mask or VL).
3945 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3946 if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
3947 return false;
3948 if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
3949 (Mask && !usesAllOnesMask(Mask)))
3950 return false;
3951
3952 // Make sure it doesn't raise any observable fp exceptions, since changing the
3953 // active elements will affect how fflags is set.
3954 if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
3955 return false;
3956
3957 SDLoc DL(N);
3958
3959 unsigned MaskedOpc = Info->MaskedPseudo;
3960#ifndef NDEBUG
3961 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3963 "Expected instructions with mask have policy operand.");
3964 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3965 MCOI::TIED_TO) == 0 &&
3966 "Expected instructions with mask have a tied dest.");
3967#endif
3968
3969 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
3970 // operand is undefined.
3971 //
3972 // However, if the VL became smaller than what the vmerge had originally, then
3973 // elements past VL that were previously in the vmerge's body will have moved
3974 // to the tail. In that case we always need to use tail undisturbed to
3975 // preserve them.
3976 bool MergeVLShrunk = VL != OrigVL;
3977 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
3979 : /*TUMU*/ 0;
3980 SDValue PolicyOp =
3981 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3982
3983
3985 Ops.push_back(False);
3986
3987 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3988 const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
3989 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3990
3991 Ops.push_back(Mask);
3992
3993 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3994 // (..., rm, vl) or (..., rm, vl, policy).
3995 // Its masked version is (..., vm, rm, vl, policy).
3996 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3997 if (HasRoundingMode)
3998 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3999
4000 Ops.append({VL, SEW, PolicyOp});
4001
4002 // Result node should have chain operand of True.
4003 if (HasChainOp)
4004 Ops.push_back(True.getOperand(TrueChainOpIdx));
4005
4007 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
4008 Result->setFlags(True->getFlags());
4009
4010 if (!cast<MachineSDNode>(True)->memoperands_empty())
4011 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
4012
4013 // Replace vmerge.vvm node by Result.
4014 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
4015
4016 // Replace another value of True. E.g. chain and VL.
4017 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4018 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4019
4020 return true;
4021}
4022
4023bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4024 bool MadeChange = false;
4026
4027 while (Position != CurDAG->allnodes_begin()) {
4028 SDNode *N = &*--Position;
4029 if (N->use_empty() || !N->isMachineOpcode())
4030 continue;
4031
4032 if (IsVMerge(N))
4033 MadeChange |= performCombineVMergeAndVOps(N);
4034 }
4035 return MadeChange;
4036}
4037
4038/// If our passthru is an implicit_def, use noreg instead. This side
4039/// steps issues with MachineCSE not being able to CSE expressions with
4040/// IMPLICIT_DEF operands while preserving the semantic intent. See
4041/// pr64282 for context. Note that this transform is the last one
4042/// performed at ISEL DAG to DAG.
4043bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4044 bool MadeChange = false;
4046
4047 while (Position != CurDAG->allnodes_begin()) {
4048 SDNode *N = &*--Position;
4049 if (N->use_empty() || !N->isMachineOpcode())
4050 continue;
4051
4052 const unsigned Opc = N->getMachineOpcode();
4053 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4055 !isImplicitDef(N->getOperand(0)))
4056 continue;
4057
4059 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4060 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4061 SDValue Op = N->getOperand(I);
4062 Ops.push_back(Op);
4063 }
4064
4066 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4067 Result->setFlags(N->getFlags());
4068 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4069 ReplaceUses(N, Result);
4070 MadeChange = true;
4071 }
4072 return MadeChange;
4073}
4074
4075
4076// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4077// for instruction scheduling.
4079 CodeGenOptLevel OptLevel) {
4080 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4081}
4082
4084
4086 CodeGenOptLevel OptLevel)
4088 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4089
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static bool usesAllOnesMask(SDValue MaskOp)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false, bool IsRV32Zdinx=false)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1441
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
bool isPosZero() const
Definition: APFloat.h:1456
bool isNegZero() const
Definition: APFloat.h:1457
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsRV32Zdinx=false)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:577
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:557
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:558
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:756
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:586
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:560
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:64
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1320
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1110
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1300
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1316
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1567
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1618
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1663
static bool hasRoundModeOp(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool elementsDependOnMask(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:256
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:286
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:582
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.