LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
123 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
124 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
125 setOperationAction(ISD::VASTART, MVT::Other, Custom);
126 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
127
128 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
135 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
160 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
177 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
188
192 Custom);
193 setOperationAction(ISD::LROUND, MVT::i32, Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
205 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
206 }
207
208 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
225 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
227 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
228 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
230 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
235 setOperationAction(ISD::FSIN, MVT::f32, Expand);
236 setOperationAction(ISD::FCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
238 setOperationAction(ISD::FPOW, MVT::f32, Expand);
240 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
242 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
244 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
245 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit()) {
249 setOperationAction(ISD::FRINT, MVT::f32, Legal);
250 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
251 }
252
253 if (!Subtarget.hasBasicD()) {
255 if (Subtarget.is64Bit()) {
258 }
259 }
260 }
261
262 // Set operations for 'D' feature.
263
264 if (Subtarget.hasBasicD()) {
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
266 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
267 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
269 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
270 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
271 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
272
275 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
279 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
281 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
283 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
285 setOperationAction(ISD::FSIN, MVT::f64, Expand);
286 setOperationAction(ISD::FCOS, MVT::f64, Expand);
287 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
288 setOperationAction(ISD::FPOW, MVT::f64, Expand);
290 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
291 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
294 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
295 Subtarget.isSoftFPABI() ? LibCall : Custom);
296
297 if (Subtarget.is64Bit()) {
298 setOperationAction(ISD::FRINT, MVT::f64, Legal);
299 setOperationAction(ISD::FLOG2, MVT::f64, Legal);
300 }
301 }
302
303 // Set operations for 'LSX' feature.
304
305 if (Subtarget.hasExtLSX()) {
307 // Expand all truncating stores and extending loads.
308 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
309 setTruncStoreAction(VT, InnerVT, Expand);
312 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
313 }
314 // By default everything must be expanded. Then we will selectively turn
315 // on ones that can be effectively codegen'd.
316 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
318 }
319
320 for (MVT VT : LSXVTs) {
321 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
322 setOperationAction(ISD::BITCAST, VT, Legal);
324
328
333 }
334 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
337 Legal);
339 VT, Legal);
346 Expand);
358 }
359 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
361 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
363 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
366 }
367 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
371 setOperationAction(ISD::FSQRT, VT, Legal);
372 setOperationAction(ISD::FNEG, VT, Legal);
373 setOperationAction(ISD::FLOG2, VT, Legal);
376 VT, Expand);
378 setOperationAction(ISD::FCEIL, VT, Legal);
379 setOperationAction(ISD::FFLOOR, VT, Legal);
380 setOperationAction(ISD::FTRUNC, VT, Legal);
381 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
382 setOperationAction(ISD::FMINNUM, VT, Legal);
383 setOperationAction(ISD::FMAXNUM, VT, Legal);
384 }
386 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
388 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
389 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
390
391 for (MVT VT :
392 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
393 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
395 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
396 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
397 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
398 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
399 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
400 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
401 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
402 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
403 }
404 }
405
406 // Set operations for 'LASX' feature.
407
408 if (Subtarget.hasExtLASX()) {
409 for (MVT VT : LASXVTs) {
410 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
411 setOperationAction(ISD::BITCAST, VT, Legal);
413
419
423 }
424 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
427 Legal);
429 VT, Legal);
436 Expand);
445 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
449 }
450 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
452 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
454 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
457 }
458 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
462 setOperationAction(ISD::FSQRT, VT, Legal);
463 setOperationAction(ISD::FNEG, VT, Legal);
464 setOperationAction(ISD::FLOG2, VT, Legal);
467 VT, Expand);
469 setOperationAction(ISD::FCEIL, VT, Legal);
470 setOperationAction(ISD::FFLOOR, VT, Legal);
471 setOperationAction(ISD::FTRUNC, VT, Legal);
472 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
473 setOperationAction(ISD::FMINNUM, VT, Legal);
474 setOperationAction(ISD::FMAXNUM, VT, Legal);
475 }
476 }
477
478 // Set DAG combine for LA32 and LA64.
479
484
485 // Set DAG combine for 'LSX' feature.
486
487 if (Subtarget.hasExtLSX()) {
489 setTargetDAGCombine(ISD::BITCAST);
490 }
491
492 // Set DAG combine for 'LASX' feature.
493
494 if (Subtarget.hasExtLASX())
496
497 // Compute derived properties from the register classes.
498 computeRegisterProperties(Subtarget.getRegisterInfo());
499
501
504
505 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
506
508
509 // Function alignments.
511 // Set preferred alignments.
512 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
513 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
514 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
515
516 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
517 if (Subtarget.hasLAMCAS())
519
520 if (Subtarget.hasSCQ()) {
522 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
523 }
524}
525
527 const GlobalAddressSDNode *GA) const {
528 // In order to maximise the opportunity for common subexpression elimination,
529 // keep a separate ADD node for the global address offset instead of folding
530 // it in the global address node. Later peephole optimisations may choose to
531 // fold it back in when profitable.
532 return false;
533}
534
536 SelectionDAG &DAG) const {
537 switch (Op.getOpcode()) {
538 case ISD::ATOMIC_FENCE:
539 return lowerATOMIC_FENCE(Op, DAG);
541 return lowerEH_DWARF_CFA(Op, DAG);
543 return lowerGlobalAddress(Op, DAG);
545 return lowerGlobalTLSAddress(Op, DAG);
547 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
549 return lowerINTRINSIC_W_CHAIN(Op, DAG);
551 return lowerINTRINSIC_VOID(Op, DAG);
553 return lowerBlockAddress(Op, DAG);
554 case ISD::JumpTable:
555 return lowerJumpTable(Op, DAG);
556 case ISD::SHL_PARTS:
557 return lowerShiftLeftParts(Op, DAG);
558 case ISD::SRA_PARTS:
559 return lowerShiftRightParts(Op, DAG, true);
560 case ISD::SRL_PARTS:
561 return lowerShiftRightParts(Op, DAG, false);
563 return lowerConstantPool(Op, DAG);
564 case ISD::FP_TO_SINT:
565 return lowerFP_TO_SINT(Op, DAG);
566 case ISD::BITCAST:
567 return lowerBITCAST(Op, DAG);
568 case ISD::UINT_TO_FP:
569 return lowerUINT_TO_FP(Op, DAG);
570 case ISD::SINT_TO_FP:
571 return lowerSINT_TO_FP(Op, DAG);
572 case ISD::VASTART:
573 return lowerVASTART(Op, DAG);
574 case ISD::FRAMEADDR:
575 return lowerFRAMEADDR(Op, DAG);
576 case ISD::RETURNADDR:
577 return lowerRETURNADDR(Op, DAG);
579 return lowerWRITE_REGISTER(Op, DAG);
581 return lowerINSERT_VECTOR_ELT(Op, DAG);
583 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
585 return lowerBUILD_VECTOR(Op, DAG);
587 return lowerCONCAT_VECTORS(Op, DAG);
589 return lowerVECTOR_SHUFFLE(Op, DAG);
590 case ISD::BITREVERSE:
591 return lowerBITREVERSE(Op, DAG);
593 return lowerSCALAR_TO_VECTOR(Op, DAG);
594 case ISD::PREFETCH:
595 return lowerPREFETCH(Op, DAG);
596 case ISD::SELECT:
597 return lowerSELECT(Op, DAG);
598 case ISD::BRCOND:
599 return lowerBRCOND(Op, DAG);
600 case ISD::FP_TO_FP16:
601 return lowerFP_TO_FP16(Op, DAG);
602 case ISD::FP16_TO_FP:
603 return lowerFP16_TO_FP(Op, DAG);
604 case ISD::FP_TO_BF16:
605 return lowerFP_TO_BF16(Op, DAG);
606 case ISD::BF16_TO_FP:
607 return lowerBF16_TO_FP(Op, DAG);
608 case ISD::VECREDUCE_ADD:
609 return lowerVECREDUCE_ADD(Op, DAG);
610 case ISD::ROTL:
611 case ISD::ROTR:
612 return lowerRotate(Op, DAG);
613 case ISD::VECREDUCE_AND:
614 case ISD::VECREDUCE_OR:
615 case ISD::VECREDUCE_XOR:
616 case ISD::VECREDUCE_SMAX:
617 case ISD::VECREDUCE_SMIN:
618 case ISD::VECREDUCE_UMAX:
619 case ISD::VECREDUCE_UMIN:
620 return lowerVECREDUCE(Op, DAG);
621 case ISD::ConstantFP:
622 return lowerConstantFP(Op, DAG);
623 case ISD::SRL:
624 return lowerVectorSRL(Op, DAG);
625 }
626 return SDValue();
627}
628
629/// getVShiftAmt - Check if this is a valid build_vector for the immediate
630/// operand of a vector shift operation, where all the elements of the
631/// build_vector must have the same constant integer value.
632static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) {
633 // Ignore bit_converts.
634 while (Op.getOpcode() == ISD::BITCAST)
635 Op = Op.getOperand(0);
637 APInt SplatBits, SplatUndef;
638 unsigned SplatBitSize;
639 bool HasAnyUndefs;
640 if (!BVN ||
641 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
642 ElementBits) ||
643 SplatBitSize > ElementBits)
644 return false;
645 Amt = SplatBits.getSExtValue();
646 return true;
647}
648
649SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op,
650 SelectionDAG &DAG) const {
651 EVT VT = Op.getValueType();
652 SDLoc DL(Op);
653 int64_t Amt;
654
655 if (!Op.getOperand(1).getValueType().isVector())
656 return Op;
657 unsigned EltSize = VT.getScalarSizeInBits();
658 MVT GRLenVT = Subtarget.getGRLenVT();
659
660 assert(Op.getOpcode() == ISD::SRL && "unexpected shift opcode");
661 if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize)
662 return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0),
663 DAG.getConstant(Amt, DL, GRLenVT));
664 return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0),
665 Op.getOperand(1));
666}
667
668// Helper to attempt to return a cheaper, bit-inverted version of \p V.
670 // TODO: don't always ignore oneuse constraints.
671 V = peekThroughBitcasts(V);
672 EVT VT = V.getValueType();
673
674 // Match not(xor X, -1) -> X.
675 if (V.getOpcode() == ISD::XOR &&
676 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
677 isAllOnesConstant(V.getOperand(1))))
678 return V.getOperand(0);
679
680 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
681 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
682 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
683 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
684 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
685 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
686 V.getOperand(1));
687 }
688 }
689
690 // Match not(SplatVector(not(X)) -> SplatVector(X).
691 if (V.getOpcode() == ISD::BUILD_VECTOR) {
692 if (SDValue SplatValue =
693 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
694 if (!V->isOnlyUserOf(SplatValue.getNode()))
695 return SDValue();
696
697 if (SDValue Not = isNOT(SplatValue, DAG)) {
698 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
699 return DAG.getSplat(VT, SDLoc(Not), Not);
700 }
701 }
702 }
703
704 // Match not(or(not(X),not(Y))) -> and(X, Y).
705 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
706 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
707 // TODO: Handle cases with single NOT operand -> VANDN
708 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
709 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
710 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
711 DAG.getBitcast(VT, Op1));
712 }
713
714 // TODO: Add more matching patterns. Such as,
715 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
716 // not(slt(C, X)) -> slt(X - 1, C)
717
718 return SDValue();
719}
720
721SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
722 SelectionDAG &DAG) const {
723 EVT VT = Op.getValueType();
724 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
725 const APFloat &FPVal = CFP->getValueAPF();
726 SDLoc DL(CFP);
727
728 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
729 (VT == MVT::f64 && Subtarget.hasBasicD()));
730
731 // If value is 0.0 or -0.0, just ignore it.
732 if (FPVal.isZero())
733 return SDValue();
734
735 // If lsx enabled, use cheaper 'vldi' instruction if possible.
736 if (isFPImmVLDILegal(FPVal, VT))
737 return SDValue();
738
739 // Construct as integer, and move to float register.
740 APInt INTVal = FPVal.bitcastToAPInt();
741
742 // If more than MaterializeFPImmInsNum instructions will be used to
743 // generate the INTVal and move it to float register, fallback to
744 // use floating point load from the constant pool.
746 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
747 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
748 return SDValue();
749
750 switch (VT.getSimpleVT().SimpleTy) {
751 default:
752 llvm_unreachable("Unexpected floating point type!");
753 break;
754 case MVT::f32: {
755 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
756 if (Subtarget.is64Bit())
757 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
758 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
759 : LoongArchISD::MOVGR2FR_W,
760 DL, VT, NewVal);
761 }
762 case MVT::f64: {
763 if (Subtarget.is64Bit()) {
764 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
765 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
766 }
767 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
768 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
769 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
770 }
771 }
772
773 return SDValue();
774}
775
776// Lower vecreduce_add using vhaddw instructions.
777// For Example:
778// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
779// can be lowered to:
780// VHADDW_D_W vr0, vr0, vr0
781// VHADDW_Q_D vr0, vr0, vr0
782// VPICKVE2GR_D a0, vr0, 0
783// ADDI_W a0, a0, 0
784SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
785 SelectionDAG &DAG) const {
786
787 SDLoc DL(Op);
788 MVT OpVT = Op.getSimpleValueType();
789 SDValue Val = Op.getOperand(0);
790
791 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
792 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
793 unsigned ResBits = OpVT.getScalarSizeInBits();
794
795 unsigned LegalVecSize = 128;
796 bool isLASX256Vector =
797 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
798
799 // Ensure operand type legal or enable it legal.
800 while (!isTypeLegal(Val.getSimpleValueType())) {
801 Val = DAG.WidenVector(Val, DL);
802 }
803
804 // NumEles is designed for iterations count, v4i32 for LSX
805 // and v8i32 for LASX should have the same count.
806 if (isLASX256Vector) {
807 NumEles /= 2;
808 LegalVecSize = 256;
809 }
810
811 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
812 MVT IntTy = MVT::getIntegerVT(EleBits);
813 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
814 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
815 }
816
817 if (isLASX256Vector) {
818 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
819 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
820 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
821 }
822
823 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
824 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
825 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
826}
827
828// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
829// For Example:
830// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
831// can be lowered to:
832// VBSRL_V vr1, vr0, 8
833// VMAX_W vr0, vr1, vr0
834// VBSRL_V vr1, vr0, 4
835// VMAX_W vr0, vr1, vr0
836// VPICKVE2GR_W a0, vr0, 0
837// For 256 bit vector, it is illegal and will be spilt into
838// two 128 bit vector by default then processed by this.
839SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
840 SelectionDAG &DAG) const {
841 SDLoc DL(Op);
842
843 MVT OpVT = Op.getSimpleValueType();
844 SDValue Val = Op.getOperand(0);
845
846 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
847 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
848
849 // Ensure operand type legal or enable it legal.
850 while (!isTypeLegal(Val.getSimpleValueType())) {
851 Val = DAG.WidenVector(Val, DL);
852 }
853
854 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
855 MVT VecTy = Val.getSimpleValueType();
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 for (int i = NumEles; i > 1; i /= 2) {
859 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
860 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
861 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
862 }
863
864 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
865 DAG.getConstant(0, DL, GRLenVT));
866}
867
868SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
869 SelectionDAG &DAG) const {
870 unsigned IsData = Op.getConstantOperandVal(4);
871
872 // We don't support non-data prefetch.
873 // Just preserve the chain.
874 if (!IsData)
875 return Op.getOperand(0);
876
877 return Op;
878}
879
880SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
881 SelectionDAG &DAG) const {
882 MVT VT = Op.getSimpleValueType();
883 assert(VT.isVector() && "Unexpected type");
884
885 SDLoc DL(Op);
886 SDValue R = Op.getOperand(0);
887 SDValue Amt = Op.getOperand(1);
888 unsigned Opcode = Op.getOpcode();
889 unsigned EltSizeInBits = VT.getScalarSizeInBits();
890
891 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
892 if (V.getOpcode() != ISD::BUILD_VECTOR)
893 return false;
894 if (SDValue SplatValue =
895 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
896 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
897 CstSplatValue = C->getAPIntValue();
898 return true;
899 }
900 }
901 return false;
902 };
903
904 // Check for constant splat rotation amount.
905 APInt CstSplatValue;
906 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
907 bool isROTL = Opcode == ISD::ROTL;
908
909 // Check for splat rotate by zero.
910 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
911 return R;
912
913 // LoongArch targets always prefer ISD::ROTR.
914 if (isROTL) {
915 SDValue Zero = DAG.getConstant(0, DL, VT);
916 return DAG.getNode(ISD::ROTR, DL, VT, R,
917 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
918 }
919
920 // Rotate by a immediate.
921 if (IsCstSplat) {
922 // ISD::ROTR: Attemp to rotate by a positive immediate.
923 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
924 if (SDValue Urem =
925 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
926 return DAG.getNode(Opcode, DL, VT, R, Urem);
927 }
928
929 return Op;
930}
931
932// Return true if Val is equal to (setcc LHS, RHS, CC).
933// Return false if Val is the inverse of (setcc LHS, RHS, CC).
934// Otherwise, return std::nullopt.
935static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
936 ISD::CondCode CC, SDValue Val) {
937 assert(Val->getOpcode() == ISD::SETCC);
938 SDValue LHS2 = Val.getOperand(0);
939 SDValue RHS2 = Val.getOperand(1);
940 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
941
942 if (LHS == LHS2 && RHS == RHS2) {
943 if (CC == CC2)
944 return true;
945 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
946 return false;
947 } else if (LHS == RHS2 && RHS == LHS2) {
949 if (CC == CC2)
950 return true;
951 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
952 return false;
953 }
954
955 return std::nullopt;
956}
957
959 const LoongArchSubtarget &Subtarget) {
960 SDValue CondV = N->getOperand(0);
961 SDValue TrueV = N->getOperand(1);
962 SDValue FalseV = N->getOperand(2);
963 MVT VT = N->getSimpleValueType(0);
964 SDLoc DL(N);
965
966 // (select c, -1, y) -> -c | y
967 if (isAllOnesConstant(TrueV)) {
968 SDValue Neg = DAG.getNegative(CondV, DL, VT);
969 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
970 }
971 // (select c, y, -1) -> (c-1) | y
972 if (isAllOnesConstant(FalseV)) {
973 SDValue Neg =
974 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
975 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
976 }
977
978 // (select c, 0, y) -> (c-1) & y
979 if (isNullConstant(TrueV)) {
980 SDValue Neg =
981 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
982 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
983 }
984 // (select c, y, 0) -> -c & y
985 if (isNullConstant(FalseV)) {
986 SDValue Neg = DAG.getNegative(CondV, DL, VT);
987 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
988 }
989
990 // select c, ~x, x --> xor -c, x
991 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
992 const APInt &TrueVal = TrueV->getAsAPIntVal();
993 const APInt &FalseVal = FalseV->getAsAPIntVal();
994 if (~TrueVal == FalseVal) {
995 SDValue Neg = DAG.getNegative(CondV, DL, VT);
996 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
997 }
998 }
999
1000 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1001 // when both truev and falsev are also setcc.
1002 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1003 FalseV.getOpcode() == ISD::SETCC) {
1004 SDValue LHS = CondV.getOperand(0);
1005 SDValue RHS = CondV.getOperand(1);
1006 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1007
1008 // (select x, x, y) -> x | y
1009 // (select !x, x, y) -> x & y
1010 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1011 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1012 DAG.getFreeze(FalseV));
1013 }
1014 // (select x, y, x) -> x & y
1015 // (select !x, y, x) -> x | y
1016 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1017 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1018 DAG.getFreeze(TrueV), FalseV);
1019 }
1020 }
1021
1022 return SDValue();
1023}
1024
1025// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1026// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1027// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1028// being `0` or `-1`. In such cases we can replace `select` with `and`.
1029// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1030// than `c0`?
1031static SDValue
1033 const LoongArchSubtarget &Subtarget) {
1034 unsigned SelOpNo = 0;
1035 SDValue Sel = BO->getOperand(0);
1036 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1037 SelOpNo = 1;
1038 Sel = BO->getOperand(1);
1039 }
1040
1041 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1042 return SDValue();
1043
1044 unsigned ConstSelOpNo = 1;
1045 unsigned OtherSelOpNo = 2;
1046 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1047 ConstSelOpNo = 2;
1048 OtherSelOpNo = 1;
1049 }
1050 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1051 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1052 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1053 return SDValue();
1054
1055 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1056 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1057 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1058 return SDValue();
1059
1060 SDLoc DL(Sel);
1061 EVT VT = BO->getValueType(0);
1062
1063 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1064 if (SelOpNo == 1)
1065 std::swap(NewConstOps[0], NewConstOps[1]);
1066
1067 SDValue NewConstOp =
1068 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1069 if (!NewConstOp)
1070 return SDValue();
1071
1072 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1073 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1074 return SDValue();
1075
1076 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1077 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1078 if (SelOpNo == 1)
1079 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1080 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1081
1082 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1083 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1084 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1085}
1086
1087// Changes the condition code and swaps operands if necessary, so the SetCC
1088// operation matches one of the comparisons supported directly by branches
1089// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1090// compare with 1/-1.
1092 ISD::CondCode &CC, SelectionDAG &DAG) {
1093 // If this is a single bit test that can't be handled by ANDI, shift the
1094 // bit to be tested to the MSB and perform a signed compare with 0.
1095 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1096 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1097 isa<ConstantSDNode>(LHS.getOperand(1))) {
1098 uint64_t Mask = LHS.getConstantOperandVal(1);
1099 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1100 unsigned ShAmt = 0;
1101 if (isPowerOf2_64(Mask)) {
1102 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1103 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1104 } else {
1105 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1106 }
1107
1108 LHS = LHS.getOperand(0);
1109 if (ShAmt != 0)
1110 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1111 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1112 return;
1113 }
1114 }
1115
1116 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1117 int64_t C = RHSC->getSExtValue();
1118 switch (CC) {
1119 default:
1120 break;
1121 case ISD::SETGT:
1122 // Convert X > -1 to X >= 0.
1123 if (C == -1) {
1124 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1125 CC = ISD::SETGE;
1126 return;
1127 }
1128 break;
1129 case ISD::SETLT:
1130 // Convert X < 1 to 0 >= X.
1131 if (C == 1) {
1132 RHS = LHS;
1133 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1134 CC = ISD::SETGE;
1135 return;
1136 }
1137 break;
1138 }
1139 }
1140
1141 switch (CC) {
1142 default:
1143 break;
1144 case ISD::SETGT:
1145 case ISD::SETLE:
1146 case ISD::SETUGT:
1147 case ISD::SETULE:
1149 std::swap(LHS, RHS);
1150 break;
1151 }
1152}
1153
1154SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1155 SelectionDAG &DAG) const {
1156 SDValue CondV = Op.getOperand(0);
1157 SDValue TrueV = Op.getOperand(1);
1158 SDValue FalseV = Op.getOperand(2);
1159 SDLoc DL(Op);
1160 MVT VT = Op.getSimpleValueType();
1161 MVT GRLenVT = Subtarget.getGRLenVT();
1162
1163 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1164 return V;
1165
1166 if (Op.hasOneUse()) {
1167 unsigned UseOpc = Op->user_begin()->getOpcode();
1168 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1169 SDNode *BinOp = *Op->user_begin();
1170 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1171 DAG, Subtarget)) {
1172 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1173 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1174 // may return a constant node and cause crash in lowerSELECT.
1175 if (NewSel.getOpcode() == ISD::SELECT)
1176 return lowerSELECT(NewSel, DAG);
1177 return NewSel;
1178 }
1179 }
1180 }
1181
1182 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1183 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1184 // (select condv, truev, falsev)
1185 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1186 if (CondV.getOpcode() != ISD::SETCC ||
1187 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1188 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1189 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1190
1191 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1192
1193 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1194 }
1195
1196 // If the CondV is the output of a SETCC node which operates on GRLenVT
1197 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1198 // to take advantage of the integer compare+branch instructions. i.e.: (select
1199 // (setcc lhs, rhs, cc), truev, falsev)
1200 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1201 SDValue LHS = CondV.getOperand(0);
1202 SDValue RHS = CondV.getOperand(1);
1203 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1204
1205 // Special case for a select of 2 constants that have a difference of 1.
1206 // Normally this is done by DAGCombine, but if the select is introduced by
1207 // type legalization or op legalization, we miss it. Restricting to SETLT
1208 // case for now because that is what signed saturating add/sub need.
1209 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1210 // but we would probably want to swap the true/false values if the condition
1211 // is SETGE/SETLE to avoid an XORI.
1212 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1213 CCVal == ISD::SETLT) {
1214 const APInt &TrueVal = TrueV->getAsAPIntVal();
1215 const APInt &FalseVal = FalseV->getAsAPIntVal();
1216 if (TrueVal - 1 == FalseVal)
1217 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1218 if (TrueVal + 1 == FalseVal)
1219 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1220 }
1221
1222 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1223 // 1 < x ? x : 1 -> 0 < x ? x : 1
1224 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1225 RHS == TrueV && LHS == FalseV) {
1226 LHS = DAG.getConstant(0, DL, VT);
1227 // 0 <u x is the same as x != 0.
1228 if (CCVal == ISD::SETULT) {
1229 std::swap(LHS, RHS);
1230 CCVal = ISD::SETNE;
1231 }
1232 }
1233
1234 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1235 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1236 RHS == FalseV) {
1237 RHS = DAG.getConstant(0, DL, VT);
1238 }
1239
1240 SDValue TargetCC = DAG.getCondCode(CCVal);
1241
1242 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1243 // (select (setcc lhs, rhs, CC), constant, falsev)
1244 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1245 std::swap(TrueV, FalseV);
1246 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1247 }
1248
1249 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1250 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1251}
1252
1253SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1254 SelectionDAG &DAG) const {
1255 SDValue CondV = Op.getOperand(1);
1256 SDLoc DL(Op);
1257 MVT GRLenVT = Subtarget.getGRLenVT();
1258
1259 if (CondV.getOpcode() == ISD::SETCC) {
1260 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1261 SDValue LHS = CondV.getOperand(0);
1262 SDValue RHS = CondV.getOperand(1);
1263 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1264
1265 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1266
1267 SDValue TargetCC = DAG.getCondCode(CCVal);
1268 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1269 Op.getOperand(0), LHS, RHS, TargetCC,
1270 Op.getOperand(2));
1271 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1272 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1273 Op.getOperand(0), CondV, Op.getOperand(2));
1274 }
1275 }
1276
1277 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1278 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1279 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1280}
1281
1282SDValue
1283LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1284 SelectionDAG &DAG) const {
1285 SDLoc DL(Op);
1286 MVT OpVT = Op.getSimpleValueType();
1287
1288 SDValue Vector = DAG.getUNDEF(OpVT);
1289 SDValue Val = Op.getOperand(0);
1290 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1291
1292 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1293}
1294
1295SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1296 SelectionDAG &DAG) const {
1297 EVT ResTy = Op->getValueType(0);
1298 SDValue Src = Op->getOperand(0);
1299 SDLoc DL(Op);
1300
1301 // LoongArchISD::BITREV_8B is not supported on LA32.
1302 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1303 return SDValue();
1304
1305 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1306 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1307 unsigned int NewEltNum = NewVT.getVectorNumElements();
1308
1309 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1310
1312 for (unsigned int i = 0; i < NewEltNum; i++) {
1313 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1314 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1315 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1316 ? (unsigned)LoongArchISD::BITREV_8B
1317 : (unsigned)ISD::BITREVERSE;
1318 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1319 }
1320 SDValue Res =
1321 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1322
1323 switch (ResTy.getSimpleVT().SimpleTy) {
1324 default:
1325 return SDValue();
1326 case MVT::v16i8:
1327 case MVT::v32i8:
1328 return Res;
1329 case MVT::v8i16:
1330 case MVT::v16i16:
1331 case MVT::v4i32:
1332 case MVT::v8i32: {
1334 for (unsigned int i = 0; i < NewEltNum; i++)
1335 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1336 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1337 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1338 }
1339 }
1340}
1341
1342// Widen element type to get a new mask value (if possible).
1343// For example:
1344// shufflevector <4 x i32> %a, <4 x i32> %b,
1345// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1346// is equivalent to:
1347// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1348// can be lowered to:
1349// VPACKOD_D vr0, vr0, vr1
1351 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1352 unsigned EltBits = VT.getScalarSizeInBits();
1353
1354 if (EltBits > 32 || EltBits == 1)
1355 return SDValue();
1356
1357 SmallVector<int, 8> NewMask;
1358 if (widenShuffleMaskElts(Mask, NewMask)) {
1359 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1360 : MVT::getIntegerVT(EltBits * 2);
1361 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1362 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1363 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1364 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1365 return DAG.getBitcast(
1366 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1367 }
1368 }
1369
1370 return SDValue();
1371}
1372
1373/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1374/// instruction.
1375// The funciton matches elements from one of the input vector shuffled to the
1376// left or right with zeroable elements 'shifted in'. It handles both the
1377// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1378// lane.
1379// Mostly copied from X86.
1380static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1381 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1382 int MaskOffset, const APInt &Zeroable) {
1383 int Size = Mask.size();
1384 unsigned SizeInBits = Size * ScalarSizeInBits;
1385
1386 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1387 for (int i = 0; i < Size; i += Scale)
1388 for (int j = 0; j < Shift; ++j)
1389 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1390 return false;
1391
1392 return true;
1393 };
1394
1395 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1396 int Step = 1) {
1397 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1398 if (!(Mask[i] == -1 || Mask[i] == Low))
1399 return false;
1400 return true;
1401 };
1402
1403 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1404 for (int i = 0; i != Size; i += Scale) {
1405 unsigned Pos = Left ? i + Shift : i;
1406 unsigned Low = Left ? i : i + Shift;
1407 unsigned Len = Scale - Shift;
1408 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1409 return -1;
1410 }
1411
1412 int ShiftEltBits = ScalarSizeInBits * Scale;
1413 bool ByteShift = ShiftEltBits > 64;
1414 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1415 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1416 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1417
1418 // Normalize the scale for byte shifts to still produce an i64 element
1419 // type.
1420 Scale = ByteShift ? Scale / 2 : Scale;
1421
1422 // We need to round trip through the appropriate type for the shift.
1423 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1424 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1425 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1426 return (int)ShiftAmt;
1427 };
1428
1429 unsigned MaxWidth = 128;
1430 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1431 for (int Shift = 1; Shift != Scale; ++Shift)
1432 for (bool Left : {true, false})
1433 if (CheckZeros(Shift, Scale, Left)) {
1434 int ShiftAmt = MatchShift(Shift, Scale, Left);
1435 if (0 < ShiftAmt)
1436 return ShiftAmt;
1437 }
1438
1439 // no match
1440 return -1;
1441}
1442
1443/// Lower VECTOR_SHUFFLE as shift (if possible).
1444///
1445/// For example:
1446/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1447/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1448/// is lowered to:
1449/// (VBSLL_V $v0, $v0, 4)
1450///
1451/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1452/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1453/// is lowered to:
1454/// (VSLLI_D $v0, $v0, 32)
1456 MVT VT, SDValue V1, SDValue V2,
1457 SelectionDAG &DAG,
1458 const LoongArchSubtarget &Subtarget,
1459 const APInt &Zeroable) {
1460 int Size = Mask.size();
1461 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1462
1463 MVT ShiftVT;
1464 SDValue V = V1;
1465 unsigned Opcode;
1466
1467 // Try to match shuffle against V1 shift.
1468 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1469 Mask, 0, Zeroable);
1470
1471 // If V1 failed, try to match shuffle against V2 shift.
1472 if (ShiftAmt < 0) {
1473 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1474 Mask, Size, Zeroable);
1475 V = V2;
1476 }
1477
1478 if (ShiftAmt < 0)
1479 return SDValue();
1480
1481 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1482 "Illegal integer vector type");
1483 V = DAG.getBitcast(ShiftVT, V);
1484 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1485 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1486 return DAG.getBitcast(VT, V);
1487}
1488
1489/// Determine whether a range fits a regular pattern of values.
1490/// This function accounts for the possibility of jumping over the End iterator.
1491template <typename ValType>
1492static bool
1494 unsigned CheckStride,
1496 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1497 auto &I = Begin;
1498
1499 while (I != End) {
1500 if (*I != -1 && *I != ExpectedIndex)
1501 return false;
1502 ExpectedIndex += ExpectedIndexStride;
1503
1504 // Incrementing past End is undefined behaviour so we must increment one
1505 // step at a time and check for End at each step.
1506 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1507 ; // Empty loop body.
1508 }
1509 return true;
1510}
1511
1512/// Compute whether each element of a shuffle is zeroable.
1513///
1514/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1516 SDValue V2, APInt &KnownUndef,
1517 APInt &KnownZero) {
1518 int Size = Mask.size();
1519 KnownUndef = KnownZero = APInt::getZero(Size);
1520
1521 V1 = peekThroughBitcasts(V1);
1522 V2 = peekThroughBitcasts(V2);
1523
1524 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1525 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1526
1527 int VectorSizeInBits = V1.getValueSizeInBits();
1528 int ScalarSizeInBits = VectorSizeInBits / Size;
1529 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1530 (void)ScalarSizeInBits;
1531
1532 for (int i = 0; i < Size; ++i) {
1533 int M = Mask[i];
1534 if (M < 0) {
1535 KnownUndef.setBit(i);
1536 continue;
1537 }
1538 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1539 KnownZero.setBit(i);
1540 continue;
1541 }
1542 }
1543}
1544
1545/// Test whether a shuffle mask is equivalent within each sub-lane.
1546///
1547/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1548/// non-trivial to compute in the face of undef lanes. The representation is
1549/// suitable for use with existing 128-bit shuffles as entries from the second
1550/// vector have been remapped to [LaneSize, 2*LaneSize).
1551static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1552 ArrayRef<int> Mask,
1553 SmallVectorImpl<int> &RepeatedMask) {
1554 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1555 RepeatedMask.assign(LaneSize, -1);
1556 int Size = Mask.size();
1557 for (int i = 0; i < Size; ++i) {
1558 assert(Mask[i] == -1 || Mask[i] >= 0);
1559 if (Mask[i] < 0)
1560 continue;
1561 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1562 // This entry crosses lanes, so there is no way to model this shuffle.
1563 return false;
1564
1565 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1566 // Adjust second vector indices to start at LaneSize instead of Size.
1567 int LocalM =
1568 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1569 if (RepeatedMask[i % LaneSize] < 0)
1570 // This is the first non-undef entry in this slot of a 128-bit lane.
1571 RepeatedMask[i % LaneSize] = LocalM;
1572 else if (RepeatedMask[i % LaneSize] != LocalM)
1573 // Found a mismatch with the repeated mask.
1574 return false;
1575 }
1576 return true;
1577}
1578
1579/// Attempts to match vector shuffle as byte rotation.
1581 ArrayRef<int> Mask) {
1582
1583 SDValue Lo, Hi;
1584 SmallVector<int, 16> RepeatedMask;
1585
1586 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1587 return -1;
1588
1589 int NumElts = RepeatedMask.size();
1590 int Rotation = 0;
1591 int Scale = 16 / NumElts;
1592
1593 for (int i = 0; i < NumElts; ++i) {
1594 int M = RepeatedMask[i];
1595 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1596 "Unexpected mask index.");
1597 if (M < 0)
1598 continue;
1599
1600 // Determine where a rotated vector would have started.
1601 int StartIdx = i - (M % NumElts);
1602 if (StartIdx == 0)
1603 return -1;
1604
1605 // If we found the tail of a vector the rotation must be the missing
1606 // front. If we found the head of a vector, it must be how much of the
1607 // head.
1608 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1609
1610 if (Rotation == 0)
1611 Rotation = CandidateRotation;
1612 else if (Rotation != CandidateRotation)
1613 return -1;
1614
1615 // Compute which value this mask is pointing at.
1616 SDValue MaskV = M < NumElts ? V1 : V2;
1617
1618 // Compute which of the two target values this index should be assigned
1619 // to. This reflects whether the high elements are remaining or the low
1620 // elements are remaining.
1621 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1622
1623 // Either set up this value if we've not encountered it before, or check
1624 // that it remains consistent.
1625 if (!TargetV)
1626 TargetV = MaskV;
1627 else if (TargetV != MaskV)
1628 return -1;
1629 }
1630
1631 // Check that we successfully analyzed the mask, and normalize the results.
1632 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1633 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1634 if (!Lo)
1635 Lo = Hi;
1636 else if (!Hi)
1637 Hi = Lo;
1638
1639 V1 = Lo;
1640 V2 = Hi;
1641
1642 return Rotation * Scale;
1643}
1644
1645/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1646///
1647/// For example:
1648/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1649/// <2 x i32> <i32 3, i32 0>
1650/// is lowered to:
1651/// (VBSRL_V $v1, $v1, 8)
1652/// (VBSLL_V $v0, $v0, 8)
1653/// (VOR_V $v0, $V0, $v1)
1654static SDValue
1656 SDValue V1, SDValue V2, SelectionDAG &DAG,
1657 const LoongArchSubtarget &Subtarget) {
1658
1659 SDValue Lo = V1, Hi = V2;
1660 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1661 if (ByteRotation <= 0)
1662 return SDValue();
1663
1664 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1665 Lo = DAG.getBitcast(ByteVT, Lo);
1666 Hi = DAG.getBitcast(ByteVT, Hi);
1667
1668 int LoByteShift = 16 - ByteRotation;
1669 int HiByteShift = ByteRotation;
1670 MVT GRLenVT = Subtarget.getGRLenVT();
1671
1672 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1673 DAG.getConstant(LoByteShift, DL, GRLenVT));
1674 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1675 DAG.getConstant(HiByteShift, DL, GRLenVT));
1676 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1677}
1678
1679/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1680///
1681/// For example:
1682/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1683/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1684/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1685/// is lowered to:
1686/// (VREPLI $v1, 0)
1687/// (VILVL $v0, $v1, $v0)
1689 ArrayRef<int> Mask, MVT VT,
1690 SDValue V1, SDValue V2,
1691 SelectionDAG &DAG,
1692 const APInt &Zeroable) {
1693 int Bits = VT.getSizeInBits();
1694 int EltBits = VT.getScalarSizeInBits();
1695 int NumElements = VT.getVectorNumElements();
1696
1697 if (Zeroable.isAllOnes())
1698 return DAG.getConstant(0, DL, VT);
1699
1700 // Define a helper function to check a particular ext-scale and lower to it if
1701 // valid.
1702 auto Lower = [&](int Scale) -> SDValue {
1703 SDValue InputV;
1704 bool AnyExt = true;
1705 int Offset = 0;
1706 for (int i = 0; i < NumElements; i++) {
1707 int M = Mask[i];
1708 if (M < 0)
1709 continue;
1710 if (i % Scale != 0) {
1711 // Each of the extended elements need to be zeroable.
1712 if (!Zeroable[i])
1713 return SDValue();
1714
1715 AnyExt = false;
1716 continue;
1717 }
1718
1719 // Each of the base elements needs to be consecutive indices into the
1720 // same input vector.
1721 SDValue V = M < NumElements ? V1 : V2;
1722 M = M % NumElements;
1723 if (!InputV) {
1724 InputV = V;
1725 Offset = M - (i / Scale);
1726
1727 // These offset can't be handled
1728 if (Offset % (NumElements / Scale))
1729 return SDValue();
1730 } else if (InputV != V)
1731 return SDValue();
1732
1733 if (M != (Offset + (i / Scale)))
1734 return SDValue(); // Non-consecutive strided elements.
1735 }
1736
1737 // If we fail to find an input, we have a zero-shuffle which should always
1738 // have already been handled.
1739 if (!InputV)
1740 return SDValue();
1741
1742 do {
1743 unsigned VilVLoHi = LoongArchISD::VILVL;
1744 if (Offset >= (NumElements / 2)) {
1745 VilVLoHi = LoongArchISD::VILVH;
1746 Offset -= (NumElements / 2);
1747 }
1748
1749 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1750 SDValue Ext =
1751 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1752 InputV = DAG.getBitcast(InputVT, InputV);
1753 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1754 Scale /= 2;
1755 EltBits *= 2;
1756 NumElements /= 2;
1757 } while (Scale > 1);
1758 return DAG.getBitcast(VT, InputV);
1759 };
1760
1761 // Each iteration, try extending the elements half as much, but into twice as
1762 // many elements.
1763 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1764 NumExtElements *= 2) {
1765 if (SDValue V = Lower(NumElements / NumExtElements))
1766 return V;
1767 }
1768 return SDValue();
1769}
1770
1771/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1772///
1773/// VREPLVEI performs vector broadcast based on an element specified by an
1774/// integer immediate, with its mask being similar to:
1775/// <x, x, x, ...>
1776/// where x is any valid index.
1777///
1778/// When undef's appear in the mask they are treated as if they were whatever
1779/// value is necessary in order to fit the above form.
1780static SDValue
1782 SDValue V1, SelectionDAG &DAG,
1783 const LoongArchSubtarget &Subtarget) {
1784 int SplatIndex = -1;
1785 for (const auto &M : Mask) {
1786 if (M != -1) {
1787 SplatIndex = M;
1788 break;
1789 }
1790 }
1791
1792 if (SplatIndex == -1)
1793 return DAG.getUNDEF(VT);
1794
1795 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1796 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1797 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1798 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1799 }
1800
1801 return SDValue();
1802}
1803
1804/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1805///
1806/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1807/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1808///
1809/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1810/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1811/// When undef's appear they are treated as if they were whatever value is
1812/// necessary in order to fit the above forms.
1813///
1814/// For example:
1815/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1816/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1817/// i32 7, i32 6, i32 5, i32 4>
1818/// is lowered to:
1819/// (VSHUF4I_H $v0, $v1, 27)
1820/// where the 27 comes from:
1821/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1822static SDValue
1824 SDValue V1, SDValue V2, SelectionDAG &DAG,
1825 const LoongArchSubtarget &Subtarget) {
1826
1827 unsigned SubVecSize = 4;
1828 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1829 SubVecSize = 2;
1830
1831 int SubMask[4] = {-1, -1, -1, -1};
1832 for (unsigned i = 0; i < SubVecSize; ++i) {
1833 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1834 int M = Mask[j];
1835
1836 // Convert from vector index to 4-element subvector index
1837 // If an index refers to an element outside of the subvector then give up
1838 if (M != -1) {
1839 M -= 4 * (j / SubVecSize);
1840 if (M < 0 || M >= 4)
1841 return SDValue();
1842 }
1843
1844 // If the mask has an undef, replace it with the current index.
1845 // Note that it might still be undef if the current index is also undef
1846 if (SubMask[i] == -1)
1847 SubMask[i] = M;
1848 // Check that non-undef values are the same as in the mask. If they
1849 // aren't then give up
1850 else if (M != -1 && M != SubMask[i])
1851 return SDValue();
1852 }
1853 }
1854
1855 // Calculate the immediate. Replace any remaining undefs with zero
1856 int Imm = 0;
1857 for (int i = SubVecSize - 1; i >= 0; --i) {
1858 int M = SubMask[i];
1859
1860 if (M == -1)
1861 M = 0;
1862
1863 Imm <<= 2;
1864 Imm |= M & 0x3;
1865 }
1866
1867 MVT GRLenVT = Subtarget.getGRLenVT();
1868
1869 // Return vshuf4i.d
1870 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1871 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1872 DAG.getConstant(Imm, DL, GRLenVT));
1873
1874 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1875 DAG.getConstant(Imm, DL, GRLenVT));
1876}
1877
1878/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1879///
1880/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1881/// reverse whose mask likes:
1882/// <7, 6, 5, 4, 3, 2, 1, 0>
1883///
1884/// When undef's appear in the mask they are treated as if they were whatever
1885/// value is necessary in order to fit the above forms.
1886static SDValue
1888 SDValue V1, SelectionDAG &DAG,
1889 const LoongArchSubtarget &Subtarget) {
1890 // Only vectors with i8/i16 elements which cannot match other patterns
1891 // directly needs to do this.
1892 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1893 VT != MVT::v16i16)
1894 return SDValue();
1895
1896 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1897 return SDValue();
1898
1899 int WidenNumElts = VT.getVectorNumElements() / 4;
1900 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1901 for (int i = 0; i < WidenNumElts; ++i)
1902 WidenMask[i] = WidenNumElts - 1 - i;
1903
1904 MVT WidenVT = MVT::getVectorVT(
1905 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1906 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1907 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1908 DAG.getUNDEF(WidenVT), WidenMask);
1909
1910 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1911 DAG.getBitcast(VT, WidenRev),
1912 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1913}
1914
1915/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1916///
1917/// VPACKEV interleaves the even elements from each vector.
1918///
1919/// It is possible to lower into VPACKEV when the mask consists of two of the
1920/// following forms interleaved:
1921/// <0, 2, 4, ...>
1922/// <n, n+2, n+4, ...>
1923/// where n is the number of elements in the vector.
1924/// For example:
1925/// <0, 0, 2, 2, 4, 4, ...>
1926/// <0, n, 2, n+2, 4, n+4, ...>
1927///
1928/// When undef's appear in the mask they are treated as if they were whatever
1929/// value is necessary in order to fit the above forms.
1931 MVT VT, SDValue V1, SDValue V2,
1932 SelectionDAG &DAG) {
1933
1934 const auto &Begin = Mask.begin();
1935 const auto &End = Mask.end();
1936 SDValue OriV1 = V1, OriV2 = V2;
1937
1938 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1939 V1 = OriV1;
1940 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1941 V1 = OriV2;
1942 else
1943 return SDValue();
1944
1945 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1946 V2 = OriV1;
1947 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1948 V2 = OriV2;
1949 else
1950 return SDValue();
1951
1952 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1953}
1954
1955/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1956///
1957/// VPACKOD interleaves the odd elements from each vector.
1958///
1959/// It is possible to lower into VPACKOD when the mask consists of two of the
1960/// following forms interleaved:
1961/// <1, 3, 5, ...>
1962/// <n+1, n+3, n+5, ...>
1963/// where n is the number of elements in the vector.
1964/// For example:
1965/// <1, 1, 3, 3, 5, 5, ...>
1966/// <1, n+1, 3, n+3, 5, n+5, ...>
1967///
1968/// When undef's appear in the mask they are treated as if they were whatever
1969/// value is necessary in order to fit the above forms.
1971 MVT VT, SDValue V1, SDValue V2,
1972 SelectionDAG &DAG) {
1973
1974 const auto &Begin = Mask.begin();
1975 const auto &End = Mask.end();
1976 SDValue OriV1 = V1, OriV2 = V2;
1977
1978 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1979 V1 = OriV1;
1980 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1981 V1 = OriV2;
1982 else
1983 return SDValue();
1984
1985 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1986 V2 = OriV1;
1987 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1988 V2 = OriV2;
1989 else
1990 return SDValue();
1991
1992 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1993}
1994
1995/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1996///
1997/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1998/// of each vector.
1999///
2000/// It is possible to lower into VILVH when the mask consists of two of the
2001/// following forms interleaved:
2002/// <x, x+1, x+2, ...>
2003/// <n+x, n+x+1, n+x+2, ...>
2004/// where n is the number of elements in the vector and x is half n.
2005/// For example:
2006/// <x, x, x+1, x+1, x+2, x+2, ...>
2007/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2008///
2009/// When undef's appear in the mask they are treated as if they were whatever
2010/// value is necessary in order to fit the above forms.
2012 MVT VT, SDValue V1, SDValue V2,
2013 SelectionDAG &DAG) {
2014
2015 const auto &Begin = Mask.begin();
2016 const auto &End = Mask.end();
2017 unsigned HalfSize = Mask.size() / 2;
2018 SDValue OriV1 = V1, OriV2 = V2;
2019
2020 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2021 V1 = OriV1;
2022 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2023 V1 = OriV2;
2024 else
2025 return SDValue();
2026
2027 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2028 V2 = OriV1;
2029 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2030 1))
2031 V2 = OriV2;
2032 else
2033 return SDValue();
2034
2035 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2036}
2037
2038/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2039///
2040/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2041/// of each vector.
2042///
2043/// It is possible to lower into VILVL when the mask consists of two of the
2044/// following forms interleaved:
2045/// <0, 1, 2, ...>
2046/// <n, n+1, n+2, ...>
2047/// where n is the number of elements in the vector.
2048/// For example:
2049/// <0, 0, 1, 1, 2, 2, ...>
2050/// <0, n, 1, n+1, 2, n+2, ...>
2051///
2052/// When undef's appear in the mask they are treated as if they were whatever
2053/// value is necessary in order to fit the above forms.
2055 MVT VT, SDValue V1, SDValue V2,
2056 SelectionDAG &DAG) {
2057
2058 const auto &Begin = Mask.begin();
2059 const auto &End = Mask.end();
2060 SDValue OriV1 = V1, OriV2 = V2;
2061
2062 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2063 V1 = OriV1;
2064 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2065 V1 = OriV2;
2066 else
2067 return SDValue();
2068
2069 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2070 V2 = OriV1;
2071 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2072 V2 = OriV2;
2073 else
2074 return SDValue();
2075
2076 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2077}
2078
2079/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2080///
2081/// VPICKEV copies the even elements of each vector into the result vector.
2082///
2083/// It is possible to lower into VPICKEV when the mask consists of two of the
2084/// following forms concatenated:
2085/// <0, 2, 4, ...>
2086/// <n, n+2, n+4, ...>
2087/// where n is the number of elements in the vector.
2088/// For example:
2089/// <0, 2, 4, ..., 0, 2, 4, ...>
2090/// <0, 2, 4, ..., n, n+2, n+4, ...>
2091///
2092/// When undef's appear in the mask they are treated as if they were whatever
2093/// value is necessary in order to fit the above forms.
2095 MVT VT, SDValue V1, SDValue V2,
2096 SelectionDAG &DAG) {
2097
2098 const auto &Begin = Mask.begin();
2099 const auto &Mid = Mask.begin() + Mask.size() / 2;
2100 const auto &End = Mask.end();
2101 SDValue OriV1 = V1, OriV2 = V2;
2102
2103 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2104 V1 = OriV1;
2105 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2106 V1 = OriV2;
2107 else
2108 return SDValue();
2109
2110 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2111 V2 = OriV1;
2112 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2113 V2 = OriV2;
2114
2115 else
2116 return SDValue();
2117
2118 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2119}
2120
2121/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2122///
2123/// VPICKOD copies the odd elements of each vector into the result vector.
2124///
2125/// It is possible to lower into VPICKOD when the mask consists of two of the
2126/// following forms concatenated:
2127/// <1, 3, 5, ...>
2128/// <n+1, n+3, n+5, ...>
2129/// where n is the number of elements in the vector.
2130/// For example:
2131/// <1, 3, 5, ..., 1, 3, 5, ...>
2132/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2133///
2134/// When undef's appear in the mask they are treated as if they were whatever
2135/// value is necessary in order to fit the above forms.
2137 MVT VT, SDValue V1, SDValue V2,
2138 SelectionDAG &DAG) {
2139
2140 const auto &Begin = Mask.begin();
2141 const auto &Mid = Mask.begin() + Mask.size() / 2;
2142 const auto &End = Mask.end();
2143 SDValue OriV1 = V1, OriV2 = V2;
2144
2145 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2146 V1 = OriV1;
2147 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2153 V2 = OriV1;
2154 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2155 V2 = OriV2;
2156 else
2157 return SDValue();
2158
2159 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2160}
2161
2162/// Lower VECTOR_SHUFFLE into VSHUF.
2163///
2164/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2165/// adding it as an operand to the resulting VSHUF.
2167 MVT VT, SDValue V1, SDValue V2,
2168 SelectionDAG &DAG,
2169 const LoongArchSubtarget &Subtarget) {
2170
2172 for (auto M : Mask)
2173 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2174
2175 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2176 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2177
2178 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2179 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2180 // VSHF concatenates the vectors in a bitwise fashion:
2181 // <0b00, 0b01> + <0b10, 0b11> ->
2182 // 0b0100 + 0b1110 -> 0b01001110
2183 // <0b10, 0b11, 0b00, 0b01>
2184 // We must therefore swap the operands to get the correct result.
2185 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2186}
2187
2188/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2189///
2190/// This routine breaks down the specific type of 128-bit shuffle and
2191/// dispatches to the lowering routines accordingly.
2193 SDValue V1, SDValue V2, SelectionDAG &DAG,
2194 const LoongArchSubtarget &Subtarget) {
2195 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2196 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2197 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2198 "Vector type is unsupported for lsx!");
2200 "Two operands have different types!");
2201 assert(VT.getVectorNumElements() == Mask.size() &&
2202 "Unexpected mask size for shuffle!");
2203 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2204
2205 APInt KnownUndef, KnownZero;
2206 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2207 APInt Zeroable = KnownUndef | KnownZero;
2208
2209 SDValue Result;
2210 // TODO: Add more comparison patterns.
2211 if (V2.isUndef()) {
2212 if ((Result =
2213 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2214 return Result;
2215 if ((Result =
2216 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2217 return Result;
2218 if ((Result =
2219 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2220 return Result;
2221
2222 // TODO: This comment may be enabled in the future to better match the
2223 // pattern for instruction selection.
2224 /* V2 = V1; */
2225 }
2226
2227 // It is recommended not to change the pattern comparison order for better
2228 // performance.
2229 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2230 return Result;
2231 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2232 return Result;
2233 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2234 return Result;
2235 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2236 return Result;
2237 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2238 return Result;
2239 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2240 return Result;
2241 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2242 (Result =
2243 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2244 return Result;
2245 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2246 Zeroable)))
2247 return Result;
2248 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2249 Zeroable)))
2250 return Result;
2251 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2252 Subtarget)))
2253 return Result;
2254 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2255 return NewShuffle;
2256 if ((Result =
2257 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2258 return Result;
2259 return SDValue();
2260}
2261
2262/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2263///
2264/// It is a XVREPLVEI when the mask is:
2265/// <x, x, x, ..., x+n, x+n, x+n, ...>
2266/// where the number of x is equal to n and n is half the length of vector.
2267///
2268/// When undef's appear in the mask they are treated as if they were whatever
2269/// value is necessary in order to fit the above form.
2270static SDValue
2272 SDValue V1, SelectionDAG &DAG,
2273 const LoongArchSubtarget &Subtarget) {
2274 int SplatIndex = -1;
2275 for (const auto &M : Mask) {
2276 if (M != -1) {
2277 SplatIndex = M;
2278 break;
2279 }
2280 }
2281
2282 if (SplatIndex == -1)
2283 return DAG.getUNDEF(VT);
2284
2285 const auto &Begin = Mask.begin();
2286 const auto &End = Mask.end();
2287 int HalfSize = Mask.size() / 2;
2288
2289 if (SplatIndex >= HalfSize)
2290 return SDValue();
2291
2292 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2293 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2294 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2295 0)) {
2296 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2297 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2298 }
2299
2300 return SDValue();
2301}
2302
2303/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2304static SDValue
2306 SDValue V1, SDValue V2, SelectionDAG &DAG,
2307 const LoongArchSubtarget &Subtarget) {
2308 // When the size is less than or equal to 4, lower cost instructions may be
2309 // used.
2310 if (Mask.size() <= 4)
2311 return SDValue();
2312 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2313}
2314
2315/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2316static SDValue
2318 SDValue V1, SelectionDAG &DAG,
2319 const LoongArchSubtarget &Subtarget) {
2320 // Only consider XVPERMI_D.
2321 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2322 return SDValue();
2323
2324 unsigned MaskImm = 0;
2325 for (unsigned i = 0; i < Mask.size(); ++i) {
2326 if (Mask[i] == -1)
2327 continue;
2328 MaskImm |= Mask[i] << (i * 2);
2329 }
2330
2331 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2332 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2333}
2334
2335/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2337 MVT VT, SDValue V1, SelectionDAG &DAG,
2338 const LoongArchSubtarget &Subtarget) {
2339 // LoongArch LASX only have XVPERM_W.
2340 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2341 return SDValue();
2342
2343 unsigned NumElts = VT.getVectorNumElements();
2344 unsigned HalfSize = NumElts / 2;
2345 bool FrontLo = true, FrontHi = true;
2346 bool BackLo = true, BackHi = true;
2347
2348 auto inRange = [](int val, int low, int high) {
2349 return (val == -1) || (val >= low && val < high);
2350 };
2351
2352 for (unsigned i = 0; i < HalfSize; ++i) {
2353 int Fronti = Mask[i];
2354 int Backi = Mask[i + HalfSize];
2355
2356 FrontLo &= inRange(Fronti, 0, HalfSize);
2357 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2358 BackLo &= inRange(Backi, 0, HalfSize);
2359 BackHi &= inRange(Backi, HalfSize, NumElts);
2360 }
2361
2362 // If both the lower and upper 128-bit parts access only one half of the
2363 // vector (either lower or upper), avoid using xvperm.w. The latency of
2364 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2365 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2366 return SDValue();
2367
2369 MVT GRLenVT = Subtarget.getGRLenVT();
2370 for (unsigned i = 0; i < NumElts; ++i)
2371 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2372 : DAG.getConstant(Mask[i], DL, GRLenVT));
2373 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2374
2375 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2376}
2377
2378/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2380 MVT VT, SDValue V1, SDValue V2,
2381 SelectionDAG &DAG) {
2382 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2383}
2384
2385/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2387 MVT VT, SDValue V1, SDValue V2,
2388 SelectionDAG &DAG) {
2389 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2390}
2391
2392/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2394 MVT VT, SDValue V1, SDValue V2,
2395 SelectionDAG &DAG) {
2396
2397 const auto &Begin = Mask.begin();
2398 const auto &End = Mask.end();
2399 unsigned HalfSize = Mask.size() / 2;
2400 unsigned LeftSize = HalfSize / 2;
2401 SDValue OriV1 = V1, OriV2 = V2;
2402
2403 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2404 1) &&
2405 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2406 V1 = OriV1;
2407 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2408 Mask.size() + HalfSize - LeftSize, 1) &&
2409 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2410 Mask.size() + HalfSize + LeftSize, 1))
2411 V1 = OriV2;
2412 else
2413 return SDValue();
2414
2415 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2416 1) &&
2417 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2418 1))
2419 V2 = OriV1;
2420 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2421 Mask.size() + HalfSize - LeftSize, 1) &&
2422 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2423 Mask.size() + HalfSize + LeftSize, 1))
2424 V2 = OriV2;
2425 else
2426 return SDValue();
2427
2428 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2429}
2430
2431/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2433 MVT VT, SDValue V1, SDValue V2,
2434 SelectionDAG &DAG) {
2435
2436 const auto &Begin = Mask.begin();
2437 const auto &End = Mask.end();
2438 unsigned HalfSize = Mask.size() / 2;
2439 SDValue OriV1 = V1, OriV2 = V2;
2440
2441 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2442 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2443 V1 = OriV1;
2444 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2445 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2446 Mask.size() + HalfSize, 1))
2447 V1 = OriV2;
2448 else
2449 return SDValue();
2450
2451 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2452 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2453 V2 = OriV1;
2454 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2455 1) &&
2456 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2457 Mask.size() + HalfSize, 1))
2458 V2 = OriV2;
2459 else
2460 return SDValue();
2461
2462 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2463}
2464
2465/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2467 MVT VT, SDValue V1, SDValue V2,
2468 SelectionDAG &DAG) {
2469
2470 const auto &Begin = Mask.begin();
2471 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2472 const auto &Mid = Mask.begin() + Mask.size() / 2;
2473 const auto &RightMid = Mask.end() - Mask.size() / 4;
2474 const auto &End = Mask.end();
2475 unsigned HalfSize = Mask.size() / 2;
2476 SDValue OriV1 = V1, OriV2 = V2;
2477
2478 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2479 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2480 V1 = OriV1;
2481 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2482 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2483 V1 = OriV2;
2484 else
2485 return SDValue();
2486
2487 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2488 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2489 V2 = OriV1;
2490 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2491 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2492 V2 = OriV2;
2493
2494 else
2495 return SDValue();
2496
2497 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2498}
2499
2500/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2502 MVT VT, SDValue V1, SDValue V2,
2503 SelectionDAG &DAG) {
2504
2505 const auto &Begin = Mask.begin();
2506 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2507 const auto &Mid = Mask.begin() + Mask.size() / 2;
2508 const auto &RightMid = Mask.end() - Mask.size() / 4;
2509 const auto &End = Mask.end();
2510 unsigned HalfSize = Mask.size() / 2;
2511 SDValue OriV1 = V1, OriV2 = V2;
2512
2513 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2514 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2515 V1 = OriV1;
2516 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2517 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2518 2))
2519 V1 = OriV2;
2520 else
2521 return SDValue();
2522
2523 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2524 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2525 V2 = OriV1;
2526 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2527 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2528 2))
2529 V2 = OriV2;
2530 else
2531 return SDValue();
2532
2533 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2534}
2535
2536/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2537static SDValue
2539 SDValue V1, SDValue V2, SelectionDAG &DAG,
2540 const LoongArchSubtarget &Subtarget) {
2541 // LoongArch LASX only supports xvinsve0.{w/d}.
2542 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2543 VT != MVT::v4f64)
2544 return SDValue();
2545
2546 MVT GRLenVT = Subtarget.getGRLenVT();
2547 int MaskSize = Mask.size();
2548 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2549
2550 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2551 // all other elements are either 'Base + i' or undef (-1). On success, return
2552 // the index of the replaced element. Otherwise, just return -1.
2553 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2554 int Idx = -1;
2555 for (int i = 0; i < MaskSize; ++i) {
2556 if (Mask[i] == Base + i || Mask[i] == -1)
2557 continue;
2558 if (Mask[i] != Replaced)
2559 return -1;
2560 if (Idx == -1)
2561 Idx = i;
2562 else
2563 return -1;
2564 }
2565 return Idx;
2566 };
2567
2568 // Case 1: the lowest element of V2 replaces one element in V1.
2569 int Idx = checkReplaceOne(0, MaskSize);
2570 if (Idx != -1)
2571 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2572 DAG.getConstant(Idx, DL, GRLenVT));
2573
2574 // Case 2: the lowest element of V1 replaces one element in V2.
2575 Idx = checkReplaceOne(MaskSize, 0);
2576 if (Idx != -1)
2577 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2578 DAG.getConstant(Idx, DL, GRLenVT));
2579
2580 return SDValue();
2581}
2582
2583/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2585 MVT VT, SDValue V1, SDValue V2,
2586 SelectionDAG &DAG) {
2587
2588 int MaskSize = Mask.size();
2589 int HalfSize = Mask.size() / 2;
2590 const auto &Begin = Mask.begin();
2591 const auto &Mid = Mask.begin() + HalfSize;
2592 const auto &End = Mask.end();
2593
2594 // VECTOR_SHUFFLE concatenates the vectors:
2595 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2596 // shuffling ->
2597 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2598 //
2599 // XVSHUF concatenates the vectors:
2600 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2601 // shuffling ->
2602 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2603 SmallVector<SDValue, 8> MaskAlloc;
2604 for (auto it = Begin; it < Mid; it++) {
2605 if (*it < 0) // UNDEF
2606 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2607 else if ((*it >= 0 && *it < HalfSize) ||
2608 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2609 int M = *it < HalfSize ? *it : *it - HalfSize;
2610 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2611 } else
2612 return SDValue();
2613 }
2614 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2615
2616 for (auto it = Mid; it < End; it++) {
2617 if (*it < 0) // UNDEF
2618 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2619 else if ((*it >= HalfSize && *it < MaskSize) ||
2620 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2621 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2622 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2623 } else
2624 return SDValue();
2625 }
2626 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2627
2628 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2629 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2630 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2631}
2632
2633/// Shuffle vectors by lane to generate more optimized instructions.
2634/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2635///
2636/// Therefore, except for the following four cases, other cases are regarded
2637/// as cross-lane shuffles, where optimization is relatively limited.
2638///
2639/// - Shuffle high, low lanes of two inputs vector
2640/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2641/// - Shuffle low, high lanes of two inputs vector
2642/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2643/// - Shuffle low, low lanes of two inputs vector
2644/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2645/// - Shuffle high, high lanes of two inputs vector
2646/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2647///
2648/// The first case is the closest to LoongArch instructions and the other
2649/// cases need to be converted to it for processing.
2650///
2651/// This function will return true for the last three cases above and will
2652/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2653/// cross-lane shuffle cases.
2655 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2656 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2657
2658 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2659
2660 int MaskSize = Mask.size();
2661 int HalfSize = Mask.size() / 2;
2662 MVT GRLenVT = Subtarget.getGRLenVT();
2663
2664 HalfMaskType preMask = None, postMask = None;
2665
2666 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2667 return M < 0 || (M >= 0 && M < HalfSize) ||
2668 (M >= MaskSize && M < MaskSize + HalfSize);
2669 }))
2670 preMask = HighLaneTy;
2671 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2672 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2673 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2674 }))
2675 preMask = LowLaneTy;
2676
2677 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2678 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2679 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2680 }))
2681 postMask = LowLaneTy;
2682 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2683 return M < 0 || (M >= 0 && M < HalfSize) ||
2684 (M >= MaskSize && M < MaskSize + HalfSize);
2685 }))
2686 postMask = HighLaneTy;
2687
2688 // The pre-half of mask is high lane type, and the post-half of mask
2689 // is low lane type, which is closest to the LoongArch instructions.
2690 //
2691 // Note: In the LoongArch architecture, the high lane of mask corresponds
2692 // to the lower 128-bit of vector register, and the low lane of mask
2693 // corresponds the higher 128-bit of vector register.
2694 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2695 return false;
2696 }
2697 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2698 V1 = DAG.getBitcast(MVT::v4i64, V1);
2699 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2700 DAG.getConstant(0b01001110, DL, GRLenVT));
2701 V1 = DAG.getBitcast(VT, V1);
2702
2703 if (!V2.isUndef()) {
2704 V2 = DAG.getBitcast(MVT::v4i64, V2);
2705 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2706 DAG.getConstant(0b01001110, DL, GRLenVT));
2707 V2 = DAG.getBitcast(VT, V2);
2708 }
2709
2710 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2711 *it = *it < 0 ? *it : *it - HalfSize;
2712 }
2713 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2714 *it = *it < 0 ? *it : *it + HalfSize;
2715 }
2716 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2717 V1 = DAG.getBitcast(MVT::v4i64, V1);
2718 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2719 DAG.getConstant(0b11101110, DL, GRLenVT));
2720 V1 = DAG.getBitcast(VT, V1);
2721
2722 if (!V2.isUndef()) {
2723 V2 = DAG.getBitcast(MVT::v4i64, V2);
2724 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2725 DAG.getConstant(0b11101110, DL, GRLenVT));
2726 V2 = DAG.getBitcast(VT, V2);
2727 }
2728
2729 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2730 *it = *it < 0 ? *it : *it - HalfSize;
2731 }
2732 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2733 V1 = DAG.getBitcast(MVT::v4i64, V1);
2734 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2735 DAG.getConstant(0b01000100, DL, GRLenVT));
2736 V1 = DAG.getBitcast(VT, V1);
2737
2738 if (!V2.isUndef()) {
2739 V2 = DAG.getBitcast(MVT::v4i64, V2);
2740 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2741 DAG.getConstant(0b01000100, DL, GRLenVT));
2742 V2 = DAG.getBitcast(VT, V2);
2743 }
2744
2745 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2746 *it = *it < 0 ? *it : *it + HalfSize;
2747 }
2748 } else { // cross-lane
2749 return false;
2750 }
2751
2752 return true;
2753}
2754
2755/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2756/// Only for 256-bit vector.
2757///
2758/// For example:
2759/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2760/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2761/// is lowerded to:
2762/// (XVPERMI $xr2, $xr0, 78)
2763/// (XVSHUF $xr1, $xr2, $xr0)
2764/// (XVORI $xr0, $xr1, 0)
2766 ArrayRef<int> Mask,
2767 MVT VT, SDValue V1,
2768 SDValue V2,
2769 SelectionDAG &DAG) {
2770 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2771 int Size = Mask.size();
2772 int LaneSize = Size / 2;
2773
2774 bool LaneCrossing[2] = {false, false};
2775 for (int i = 0; i < Size; ++i)
2776 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2777 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2778
2779 // Ensure that all lanes ared involved.
2780 if (!LaneCrossing[0] && !LaneCrossing[1])
2781 return SDValue();
2782
2783 SmallVector<int> InLaneMask;
2784 InLaneMask.assign(Mask.begin(), Mask.end());
2785 for (int i = 0; i < Size; ++i) {
2786 int &M = InLaneMask[i];
2787 if (M < 0)
2788 continue;
2789 if (((M % Size) / LaneSize) != (i / LaneSize))
2790 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2791 }
2792
2793 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2794 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2795 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2796 Flipped = DAG.getBitcast(VT, Flipped);
2797 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2798}
2799
2800/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2801///
2802/// This routine breaks down the specific type of 256-bit shuffle and
2803/// dispatches to the lowering routines accordingly.
2805 SDValue V1, SDValue V2, SelectionDAG &DAG,
2806 const LoongArchSubtarget &Subtarget) {
2807 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2808 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2809 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2810 "Vector type is unsupported for lasx!");
2812 "Two operands have different types!");
2813 assert(VT.getVectorNumElements() == Mask.size() &&
2814 "Unexpected mask size for shuffle!");
2815 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2816 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2817
2818 APInt KnownUndef, KnownZero;
2819 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2820 APInt Zeroable = KnownUndef | KnownZero;
2821
2822 SDValue Result;
2823 // TODO: Add more comparison patterns.
2824 if (V2.isUndef()) {
2825 if ((Result =
2826 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2827 return Result;
2828 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2829 Subtarget)))
2830 return Result;
2831 // Try to widen vectors to gain more optimization opportunities.
2832 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2833 return NewShuffle;
2834 if ((Result =
2835 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2836 return Result;
2837 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2838 return Result;
2839 if ((Result =
2840 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2841 return Result;
2842
2843 // TODO: This comment may be enabled in the future to better match the
2844 // pattern for instruction selection.
2845 /* V2 = V1; */
2846 }
2847
2848 // It is recommended not to change the pattern comparison order for better
2849 // performance.
2850 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2851 return Result;
2852 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2853 return Result;
2854 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2855 return Result;
2856 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2857 return Result;
2858 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2859 return Result;
2860 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2861 return Result;
2862 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2863 Zeroable)))
2864 return Result;
2865 if ((Result =
2866 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2867 return Result;
2868 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2869 Subtarget)))
2870 return Result;
2871
2872 // canonicalize non cross-lane shuffle vector
2873 SmallVector<int> NewMask(Mask);
2874 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2875 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2876
2877 // FIXME: Handling the remaining cases earlier can degrade performance
2878 // in some situations. Further analysis is required to enable more
2879 // effective optimizations.
2880 if (V2.isUndef()) {
2881 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2882 V1, V2, DAG)))
2883 return Result;
2884 }
2885
2886 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2887 return NewShuffle;
2888 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2889 return Result;
2890
2891 return SDValue();
2892}
2893
2894SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2895 SelectionDAG &DAG) const {
2896 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2897 ArrayRef<int> OrigMask = SVOp->getMask();
2898 SDValue V1 = Op.getOperand(0);
2899 SDValue V2 = Op.getOperand(1);
2900 MVT VT = Op.getSimpleValueType();
2901 int NumElements = VT.getVectorNumElements();
2902 SDLoc DL(Op);
2903
2904 bool V1IsUndef = V1.isUndef();
2905 bool V2IsUndef = V2.isUndef();
2906 if (V1IsUndef && V2IsUndef)
2907 return DAG.getUNDEF(VT);
2908
2909 // When we create a shuffle node we put the UNDEF node to second operand,
2910 // but in some cases the first operand may be transformed to UNDEF.
2911 // In this case we should just commute the node.
2912 if (V1IsUndef)
2913 return DAG.getCommutedVectorShuffle(*SVOp);
2914
2915 // Check for non-undef masks pointing at an undef vector and make the masks
2916 // undef as well. This makes it easier to match the shuffle based solely on
2917 // the mask.
2918 if (V2IsUndef &&
2919 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2920 SmallVector<int, 8> NewMask(OrigMask);
2921 for (int &M : NewMask)
2922 if (M >= NumElements)
2923 M = -1;
2924 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2925 }
2926
2927 // Check for illegal shuffle mask element index values.
2928 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2929 (void)MaskUpperLimit;
2930 assert(llvm::all_of(OrigMask,
2931 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2932 "Out of bounds shuffle index");
2933
2934 // For each vector width, delegate to a specialized lowering routine.
2935 if (VT.is128BitVector())
2936 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2937
2938 if (VT.is256BitVector())
2939 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2940
2941 return SDValue();
2942}
2943
2944SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 // Custom lower to ensure the libcall return is passed in an FPR on hard
2947 // float ABIs.
2948 SDLoc DL(Op);
2949 MakeLibCallOptions CallOptions;
2950 SDValue Op0 = Op.getOperand(0);
2951 SDValue Chain = SDValue();
2952 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2953 SDValue Res;
2954 std::tie(Res, Chain) =
2955 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2956 if (Subtarget.is64Bit())
2957 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2958 return DAG.getBitcast(MVT::i32, Res);
2959}
2960
2961SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2962 SelectionDAG &DAG) const {
2963 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2964 // float ABIs.
2965 SDLoc DL(Op);
2966 MakeLibCallOptions CallOptions;
2967 SDValue Op0 = Op.getOperand(0);
2968 SDValue Chain = SDValue();
2969 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2970 DL, MVT::f32, Op0)
2971 : DAG.getBitcast(MVT::f32, Op0);
2972 SDValue Res;
2973 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2974 CallOptions, DL, Chain);
2975 return Res;
2976}
2977
2978SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2979 SelectionDAG &DAG) const {
2980 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2981 SDLoc DL(Op);
2982 MakeLibCallOptions CallOptions;
2983 RTLIB::Libcall LC =
2984 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2985 SDValue Res =
2986 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2987 if (Subtarget.is64Bit())
2988 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2989 return DAG.getBitcast(MVT::i32, Res);
2990}
2991
2992SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2993 SelectionDAG &DAG) const {
2994 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2995 MVT VT = Op.getSimpleValueType();
2996 SDLoc DL(Op);
2997 Op = DAG.getNode(
2998 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2999 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3000 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3001 DL, MVT::f32, Op)
3002 : DAG.getBitcast(MVT::f32, Op);
3003 if (VT != MVT::f32)
3004 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3005 return Res;
3006}
3007
3008// Lower BUILD_VECTOR as broadcast load (if possible).
3009// For example:
3010// %a = load i8, ptr %ptr
3011// %b = build_vector %a, %a, %a, %a
3012// is lowered to :
3013// (VLDREPL_B $a0, 0)
3015 const SDLoc &DL,
3016 SelectionDAG &DAG) {
3017 MVT VT = BVOp->getSimpleValueType(0);
3018 int NumOps = BVOp->getNumOperands();
3019
3020 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3021 "Unsupported vector type for broadcast.");
3022
3023 SDValue IdentitySrc;
3024 bool IsIdeneity = true;
3025
3026 for (int i = 0; i != NumOps; i++) {
3027 SDValue Op = BVOp->getOperand(i);
3028 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3029 IsIdeneity = false;
3030 break;
3031 }
3032 IdentitySrc = BVOp->getOperand(0);
3033 }
3034
3035 // make sure that this load is valid and only has one user.
3036 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3037 return SDValue();
3038
3039 auto *LN = cast<LoadSDNode>(IdentitySrc);
3040 auto ExtType = LN->getExtensionType();
3041
3042 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3043 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3044 // Indexed loads and stores are not supported on LoongArch.
3045 assert(LN->isUnindexed() && "Unexpected indexed load.");
3046
3047 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3048 // The offset operand of unindexed load is always undefined, so there is
3049 // no need to pass it to VLDREPL.
3050 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3051 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3052 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3053 return BCast;
3054 }
3055 return SDValue();
3056}
3057
3058// Sequentially insert elements from Ops into Vector, from low to high indices.
3059// Note: Ops can have fewer elements than Vector.
3061 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3062 EVT ResTy) {
3063 assert(Ops.size() <= ResTy.getVectorNumElements());
3064
3065 SDValue Op0 = Ops[0];
3066 if (!Op0.isUndef())
3067 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3068 for (unsigned i = 1; i < Ops.size(); ++i) {
3069 SDValue Opi = Ops[i];
3070 if (Opi.isUndef())
3071 continue;
3072 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3073 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3074 }
3075}
3076
3077// Build a ResTy subvector from Node, taking NumElts elements starting at index
3078// 'first'.
3080 SelectionDAG &DAG, SDLoc DL,
3081 const LoongArchSubtarget &Subtarget,
3082 EVT ResTy, unsigned first) {
3083 unsigned NumElts = ResTy.getVectorNumElements();
3084
3085 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3086
3087 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3088 Node->op_begin() + first + NumElts);
3089 SDValue Vector = DAG.getUNDEF(ResTy);
3090 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3091 return Vector;
3092}
3093
3094SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3095 SelectionDAG &DAG) const {
3096 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3097 MVT VT = Node->getSimpleValueType(0);
3098 EVT ResTy = Op->getValueType(0);
3099 unsigned NumElts = ResTy.getVectorNumElements();
3100 SDLoc DL(Op);
3101 APInt SplatValue, SplatUndef;
3102 unsigned SplatBitSize;
3103 bool HasAnyUndefs;
3104 bool IsConstant = false;
3105 bool UseSameConstant = true;
3106 SDValue ConstantValue;
3107 bool Is128Vec = ResTy.is128BitVector();
3108 bool Is256Vec = ResTy.is256BitVector();
3109
3110 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3111 (!Subtarget.hasExtLASX() || !Is256Vec))
3112 return SDValue();
3113
3114 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3115 return Result;
3116
3117 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3118 /*MinSplatBits=*/8) &&
3119 SplatBitSize <= 64) {
3120 // We can only cope with 8, 16, 32, or 64-bit elements.
3121 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3122 SplatBitSize != 64)
3123 return SDValue();
3124
3125 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3126 // We can only handle 64-bit elements that are within
3127 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3128 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3129 if (!SplatValue.isSignedIntN(10) &&
3130 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3131 return SDValue();
3132 if ((Is128Vec && ResTy == MVT::v4i32) ||
3133 (Is256Vec && ResTy == MVT::v8i32))
3134 return Op;
3135 }
3136
3137 EVT ViaVecTy;
3138
3139 switch (SplatBitSize) {
3140 default:
3141 return SDValue();
3142 case 8:
3143 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3144 break;
3145 case 16:
3146 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3147 break;
3148 case 32:
3149 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3150 break;
3151 case 64:
3152 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3153 break;
3154 }
3155
3156 // SelectionDAG::getConstant will promote SplatValue appropriately.
3157 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3158
3159 // Bitcast to the type we originally wanted.
3160 if (ViaVecTy != ResTy)
3161 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3162
3163 return Result;
3164 }
3165
3166 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3167 return Op;
3168
3169 for (unsigned i = 0; i < NumElts; ++i) {
3170 SDValue Opi = Node->getOperand(i);
3171 if (isIntOrFPConstant(Opi)) {
3172 IsConstant = true;
3173 if (!ConstantValue.getNode())
3174 ConstantValue = Opi;
3175 else if (ConstantValue != Opi)
3176 UseSameConstant = false;
3177 }
3178 }
3179
3180 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3181 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3182 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3183 for (unsigned i = 0; i < NumElts; ++i) {
3184 SDValue Opi = Node->getOperand(i);
3185 if (!isIntOrFPConstant(Opi))
3186 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3187 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3188 }
3189 return Result;
3190 }
3191
3192 if (!IsConstant) {
3193 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3194 // the sub-sequence of the vector and then broadcast the sub-sequence.
3195 //
3196 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3197 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3198 // generates worse code in some cases. This could be further optimized
3199 // with more consideration.
3201 BitVector UndefElements;
3202 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3203 UndefElements.count() == 0) {
3204 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3205 // because the high part can be simply treated as undef.
3206 SDValue Vector = DAG.getUNDEF(ResTy);
3207 EVT FillTy = Is256Vec
3209 : ResTy;
3210 SDValue FillVec =
3211 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3212
3213 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3214
3215 unsigned SeqLen = Sequence.size();
3216 unsigned SplatLen = NumElts / SeqLen;
3217 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3218 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3219
3220 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3221 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3222 if (SplatEltTy == MVT::i128)
3223 SplatTy = MVT::v4i64;
3224
3225 SDValue SplatVec;
3226 SDValue SrcVec = DAG.getBitcast(
3227 SplatTy,
3228 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3229 if (Is256Vec) {
3230 SplatVec =
3231 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3232 : LoongArchISD::XVREPLVE0,
3233 DL, SplatTy, SrcVec);
3234 } else {
3235 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3236 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3237 }
3238
3239 return DAG.getBitcast(ResTy, SplatVec);
3240 }
3241
3242 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3243 // using memory operations is much lower.
3244 //
3245 // For 256-bit vectors, normally split into two halves and concatenate.
3246 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3247 // one non-undef element, skip spliting to avoid a worse result.
3248 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3249 ResTy == MVT::v4f64) {
3250 unsigned NonUndefCount = 0;
3251 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3252 if (!Node->getOperand(i).isUndef()) {
3253 ++NonUndefCount;
3254 if (NonUndefCount > 1)
3255 break;
3256 }
3257 }
3258 if (NonUndefCount == 1)
3259 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3260 }
3261
3262 EVT VecTy =
3263 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3264 SDValue Vector =
3265 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3266
3267 if (Is128Vec)
3268 return Vector;
3269
3270 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3271 VecTy, NumElts / 2);
3272
3273 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3274 }
3275
3276 return SDValue();
3277}
3278
3279SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3280 SelectionDAG &DAG) const {
3281 SDLoc DL(Op);
3282 MVT ResVT = Op.getSimpleValueType();
3283 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3284
3285 unsigned NumOperands = Op.getNumOperands();
3286 unsigned NumFreezeUndef = 0;
3287 unsigned NumZero = 0;
3288 unsigned NumNonZero = 0;
3289 unsigned NonZeros = 0;
3290 SmallSet<SDValue, 4> Undefs;
3291 for (unsigned i = 0; i != NumOperands; ++i) {
3292 SDValue SubVec = Op.getOperand(i);
3293 if (SubVec.isUndef())
3294 continue;
3295 if (ISD::isFreezeUndef(SubVec.getNode())) {
3296 // If the freeze(undef) has multiple uses then we must fold to zero.
3297 if (SubVec.hasOneUse()) {
3298 ++NumFreezeUndef;
3299 } else {
3300 ++NumZero;
3301 Undefs.insert(SubVec);
3302 }
3303 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3304 ++NumZero;
3305 else {
3306 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3307 NonZeros |= 1 << i;
3308 ++NumNonZero;
3309 }
3310 }
3311
3312 // If we have more than 2 non-zeros, build each half separately.
3313 if (NumNonZero > 2) {
3314 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3315 ArrayRef<SDUse> Ops = Op->ops();
3316 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3317 Ops.slice(0, NumOperands / 2));
3318 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3319 Ops.slice(NumOperands / 2));
3320 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3321 }
3322
3323 // Otherwise, build it up through insert_subvectors.
3324 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3325 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3326 : DAG.getUNDEF(ResVT));
3327
3328 // Replace Undef operands with ZeroVector.
3329 for (SDValue U : Undefs)
3330 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3331
3332 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3333 unsigned NumSubElems = SubVT.getVectorNumElements();
3334 for (unsigned i = 0; i != NumOperands; ++i) {
3335 if ((NonZeros & (1 << i)) == 0)
3336 continue;
3337
3338 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3339 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3340 }
3341
3342 return Vec;
3343}
3344
3345SDValue
3346LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3347 SelectionDAG &DAG) const {
3348 MVT EltVT = Op.getSimpleValueType();
3349 SDValue Vec = Op->getOperand(0);
3350 EVT VecTy = Vec->getValueType(0);
3351 SDValue Idx = Op->getOperand(1);
3352 SDLoc DL(Op);
3353 MVT GRLenVT = Subtarget.getGRLenVT();
3354
3355 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3356
3357 if (isa<ConstantSDNode>(Idx))
3358 return Op;
3359
3360 switch (VecTy.getSimpleVT().SimpleTy) {
3361 default:
3362 llvm_unreachable("Unexpected type");
3363 case MVT::v32i8:
3364 case MVT::v16i16:
3365 case MVT::v4i64:
3366 case MVT::v4f64: {
3367 // Extract the high half subvector and place it to the low half of a new
3368 // vector. It doesn't matter what the high half of the new vector is.
3369 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3370 SDValue VecHi =
3371 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3372 SDValue TmpVec =
3373 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3374 VecHi, DAG.getConstant(0, DL, GRLenVT));
3375
3376 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3377 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3378 // desired element.
3379 SDValue IdxCp =
3380 Subtarget.is64Bit()
3381 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3382 : DAG.getBitcast(MVT::f32, Idx);
3383 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3384 SDValue MaskVec =
3385 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3386 SDValue ResVec =
3387 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3388
3389 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3390 DAG.getConstant(0, DL, GRLenVT));
3391 }
3392 case MVT::v8i32:
3393 case MVT::v8f32: {
3394 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3395 SDValue SplatValue =
3396 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3397
3398 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3399 DAG.getConstant(0, DL, GRLenVT));
3400 }
3401 }
3402}
3403
3404SDValue
3405LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3406 SelectionDAG &DAG) const {
3407 MVT VT = Op.getSimpleValueType();
3408 MVT EltVT = VT.getVectorElementType();
3409 unsigned NumElts = VT.getVectorNumElements();
3410 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3411 SDLoc DL(Op);
3412 SDValue Op0 = Op.getOperand(0);
3413 SDValue Op1 = Op.getOperand(1);
3414 SDValue Op2 = Op.getOperand(2);
3415
3416 if (isa<ConstantSDNode>(Op2))
3417 return Op;
3418
3419 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3420 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3421
3422 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3423 return SDValue();
3424
3425 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3426 SmallVector<SDValue, 32> RawIndices;
3427 SDValue SplatIdx;
3428 SDValue Indices;
3429
3430 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3431 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3432 for (unsigned i = 0; i < NumElts; ++i) {
3433 RawIndices.push_back(Op2);
3434 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3435 }
3436 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3437 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3438
3439 RawIndices.clear();
3440 for (unsigned i = 0; i < NumElts; ++i) {
3441 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3442 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3443 }
3444 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3445 Indices = DAG.getBitcast(IdxVTy, Indices);
3446 } else {
3447 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3448
3449 for (unsigned i = 0; i < NumElts; ++i)
3450 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3451 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3452 }
3453
3454 // insert vec, elt, idx
3455 // =>
3456 // select (splatidx == {0,1,2...}) ? splatelt : vec
3457 SDValue SelectCC =
3458 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3459 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3460}
3461
3462SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3463 SelectionDAG &DAG) const {
3464 SDLoc DL(Op);
3465 SyncScope::ID FenceSSID =
3466 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3467
3468 // singlethread fences only synchronize with signal handlers on the same
3469 // thread and thus only need to preserve instruction order, not actually
3470 // enforce memory ordering.
3471 if (FenceSSID == SyncScope::SingleThread)
3472 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3473 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3474
3475 return Op;
3476}
3477
3478SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3479 SelectionDAG &DAG) const {
3480
3481 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3482 DAG.getContext()->emitError(
3483 "On LA64, only 64-bit registers can be written.");
3484 return Op.getOperand(0);
3485 }
3486
3487 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3488 DAG.getContext()->emitError(
3489 "On LA32, only 32-bit registers can be written.");
3490 return Op.getOperand(0);
3491 }
3492
3493 return Op;
3494}
3495
3496SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3497 SelectionDAG &DAG) const {
3498 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3499 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3500 "be a constant integer");
3501 return SDValue();
3502 }
3503
3504 MachineFunction &MF = DAG.getMachineFunction();
3506 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3507 EVT VT = Op.getValueType();
3508 SDLoc DL(Op);
3509 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3510 unsigned Depth = Op.getConstantOperandVal(0);
3511 int GRLenInBytes = Subtarget.getGRLen() / 8;
3512
3513 while (Depth--) {
3514 int Offset = -(GRLenInBytes * 2);
3515 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3516 DAG.getSignedConstant(Offset, DL, VT));
3517 FrameAddr =
3518 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3519 }
3520 return FrameAddr;
3521}
3522
3523SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 // Currently only support lowering return address for current frame.
3526 if (Op.getConstantOperandVal(0) != 0) {
3527 DAG.getContext()->emitError(
3528 "return address can only be determined for the current frame");
3529 return SDValue();
3530 }
3531
3532 MachineFunction &MF = DAG.getMachineFunction();
3534 MVT GRLenVT = Subtarget.getGRLenVT();
3535
3536 // Return the value of the return address register, marking it an implicit
3537 // live-in.
3538 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3539 getRegClassFor(GRLenVT));
3540 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3541}
3542
3543SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3544 SelectionDAG &DAG) const {
3545 MachineFunction &MF = DAG.getMachineFunction();
3546 auto Size = Subtarget.getGRLen() / 8;
3547 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3548 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3549}
3550
3551SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3552 SelectionDAG &DAG) const {
3553 MachineFunction &MF = DAG.getMachineFunction();
3554 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3555
3556 SDLoc DL(Op);
3557 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3559
3560 // vastart just stores the address of the VarArgsFrameIndex slot into the
3561 // memory location argument.
3562 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3563 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3564 MachinePointerInfo(SV));
3565}
3566
3567SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3568 SelectionDAG &DAG) const {
3569 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3570 !Subtarget.hasBasicD() && "unexpected target features");
3571
3572 SDLoc DL(Op);
3573 SDValue Op0 = Op.getOperand(0);
3574 if (Op0->getOpcode() == ISD::AND) {
3575 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3576 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3577 return Op;
3578 }
3579
3580 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3581 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3582 Op0.getConstantOperandVal(2) == UINT64_C(0))
3583 return Op;
3584
3585 if (Op0.getOpcode() == ISD::AssertZext &&
3586 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3587 return Op;
3588
3589 EVT OpVT = Op0.getValueType();
3590 EVT RetVT = Op.getValueType();
3591 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3592 MakeLibCallOptions CallOptions;
3593 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3594 SDValue Chain = SDValue();
3596 std::tie(Result, Chain) =
3597 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3598 return Result;
3599}
3600
3601SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3602 SelectionDAG &DAG) const {
3603 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3604 !Subtarget.hasBasicD() && "unexpected target features");
3605
3606 SDLoc DL(Op);
3607 SDValue Op0 = Op.getOperand(0);
3608
3609 if ((Op0.getOpcode() == ISD::AssertSext ||
3611 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3612 return Op;
3613
3614 EVT OpVT = Op0.getValueType();
3615 EVT RetVT = Op.getValueType();
3616 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3617 MakeLibCallOptions CallOptions;
3618 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3619 SDValue Chain = SDValue();
3621 std::tie(Result, Chain) =
3622 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3623 return Result;
3624}
3625
3626SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3627 SelectionDAG &DAG) const {
3628
3629 SDLoc DL(Op);
3630 EVT VT = Op.getValueType();
3631 SDValue Op0 = Op.getOperand(0);
3632 EVT Op0VT = Op0.getValueType();
3633
3634 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3635 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3636 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3637 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3638 }
3639 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3640 SDValue Lo, Hi;
3641 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3642 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3643 }
3644 return Op;
3645}
3646
3647SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3648 SelectionDAG &DAG) const {
3649
3650 SDLoc DL(Op);
3651 SDValue Op0 = Op.getOperand(0);
3652
3653 if (Op0.getValueType() == MVT::f16)
3654 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3655
3656 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3657 !Subtarget.hasBasicD()) {
3658 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3659 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3660 }
3661
3662 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3663 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3664 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3665}
3666
3668 SelectionDAG &DAG, unsigned Flags) {
3669 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3670}
3671
3673 SelectionDAG &DAG, unsigned Flags) {
3674 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3675 Flags);
3676}
3677
3679 SelectionDAG &DAG, unsigned Flags) {
3680 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3681 N->getOffset(), Flags);
3682}
3683
3685 SelectionDAG &DAG, unsigned Flags) {
3686 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3687}
3688
3689template <class NodeTy>
3690SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3692 bool IsLocal) const {
3693 SDLoc DL(N);
3694 EVT Ty = getPointerTy(DAG.getDataLayout());
3695 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3696 SDValue Load;
3697
3698 switch (M) {
3699 default:
3700 report_fatal_error("Unsupported code model");
3701
3702 case CodeModel::Large: {
3703 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3704
3705 // This is not actually used, but is necessary for successfully matching
3706 // the PseudoLA_*_LARGE nodes.
3707 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3708 if (IsLocal) {
3709 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3710 // eventually becomes the desired 5-insn code sequence.
3711 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3712 Tmp, Addr),
3713 0);
3714 } else {
3715 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3716 // eventually becomes the desired 5-insn code sequence.
3717 Load = SDValue(
3718 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3719 0);
3720 }
3721 break;
3722 }
3723
3724 case CodeModel::Small:
3725 case CodeModel::Medium:
3726 if (IsLocal) {
3727 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3728 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3729 Load = SDValue(
3730 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3731 } else {
3732 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3733 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3734 Load =
3735 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3736 }
3737 }
3738
3739 if (!IsLocal) {
3740 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3741 MachineFunction &MF = DAG.getMachineFunction();
3742 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3746 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3747 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3748 }
3749
3750 return Load;
3751}
3752
3753SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3754 SelectionDAG &DAG) const {
3755 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3756 DAG.getTarget().getCodeModel());
3757}
3758
3759SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3760 SelectionDAG &DAG) const {
3761 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3762 DAG.getTarget().getCodeModel());
3763}
3764
3765SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3768 DAG.getTarget().getCodeModel());
3769}
3770
3771SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3772 SelectionDAG &DAG) const {
3773 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3774 assert(N->getOffset() == 0 && "unexpected offset in global node");
3775 auto CM = DAG.getTarget().getCodeModel();
3776 const GlobalValue *GV = N->getGlobal();
3777
3778 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3779 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3780 CM = *GCM;
3781 }
3782
3783 return getAddr(N, DAG, CM, GV->isDSOLocal());
3784}
3785
3786SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3787 SelectionDAG &DAG,
3788 unsigned Opc, bool UseGOT,
3789 bool Large) const {
3790 SDLoc DL(N);
3791 EVT Ty = getPointerTy(DAG.getDataLayout());
3792 MVT GRLenVT = Subtarget.getGRLenVT();
3793
3794 // This is not actually used, but is necessary for successfully matching the
3795 // PseudoLA_*_LARGE nodes.
3796 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3797 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3798
3799 // Only IE needs an extra argument for large code model.
3800 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3801 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3802 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3803
3804 // If it is LE for normal/medium code model, the add tp operation will occur
3805 // during the pseudo-instruction expansion.
3806 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3807 return Offset;
3808
3809 if (UseGOT) {
3810 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3811 MachineFunction &MF = DAG.getMachineFunction();
3812 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3816 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3817 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3818 }
3819
3820 // Add the thread pointer.
3821 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3822 DAG.getRegister(LoongArch::R2, GRLenVT));
3823}
3824
3825SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3826 SelectionDAG &DAG,
3827 unsigned Opc,
3828 bool Large) const {
3829 SDLoc DL(N);
3830 EVT Ty = getPointerTy(DAG.getDataLayout());
3831 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3832
3833 // This is not actually used, but is necessary for successfully matching the
3834 // PseudoLA_*_LARGE nodes.
3835 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3836
3837 // Use a PC-relative addressing mode to access the dynamic GOT address.
3838 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3839 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3840 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3841
3842 // Prepare argument list to generate call.
3844 Args.emplace_back(Load, CallTy);
3845
3846 // Setup call to __tls_get_addr.
3847 TargetLowering::CallLoweringInfo CLI(DAG);
3848 CLI.setDebugLoc(DL)
3849 .setChain(DAG.getEntryNode())
3850 .setLibCallee(CallingConv::C, CallTy,
3851 DAG.getExternalSymbol("__tls_get_addr", Ty),
3852 std::move(Args));
3853
3854 return LowerCallTo(CLI).first;
3855}
3856
3857SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3858 SelectionDAG &DAG, unsigned Opc,
3859 bool Large) const {
3860 SDLoc DL(N);
3861 EVT Ty = getPointerTy(DAG.getDataLayout());
3862 const GlobalValue *GV = N->getGlobal();
3863
3864 // This is not actually used, but is necessary for successfully matching the
3865 // PseudoLA_*_LARGE nodes.
3866 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3867
3868 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3869 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3870 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3871 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3872 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3873}
3874
3875SDValue
3876LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3877 SelectionDAG &DAG) const {
3880 report_fatal_error("In GHC calling convention TLS is not supported");
3881
3882 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3883 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3884
3885 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3886 assert(N->getOffset() == 0 && "unexpected offset in global node");
3887
3888 if (DAG.getTarget().useEmulatedTLS())
3889 reportFatalUsageError("the emulated TLS is prohibited");
3890
3891 bool IsDesc = DAG.getTarget().useTLSDESC();
3892
3893 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3895 // In this model, application code calls the dynamic linker function
3896 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3897 // runtime.
3898 if (!IsDesc)
3899 return getDynamicTLSAddr(N, DAG,
3900 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3901 : LoongArch::PseudoLA_TLS_GD,
3902 Large);
3903 break;
3905 // Same as GeneralDynamic, except for assembly modifiers and relocation
3906 // records.
3907 if (!IsDesc)
3908 return getDynamicTLSAddr(N, DAG,
3909 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3910 : LoongArch::PseudoLA_TLS_LD,
3911 Large);
3912 break;
3914 // This model uses the GOT to resolve TLS offsets.
3915 return getStaticTLSAddr(N, DAG,
3916 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3917 : LoongArch::PseudoLA_TLS_IE,
3918 /*UseGOT=*/true, Large);
3920 // This model is used when static linking as the TLS offsets are resolved
3921 // during program linking.
3922 //
3923 // This node doesn't need an extra argument for the large code model.
3924 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3925 /*UseGOT=*/false, Large);
3926 }
3927
3928 return getTLSDescAddr(N, DAG,
3929 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3930 : LoongArch::PseudoLA_TLS_DESC,
3931 Large);
3932}
3933
3934template <unsigned N>
3936 SelectionDAG &DAG, bool IsSigned = false) {
3937 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3938 // Check the ImmArg.
3939 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3940 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3941 DAG.getContext()->emitError(Op->getOperationName(0) +
3942 ": argument out of range.");
3943 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3944 }
3945 return SDValue();
3946}
3947
3948SDValue
3949LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 switch (Op.getConstantOperandVal(0)) {
3952 default:
3953 return SDValue(); // Don't custom lower most intrinsics.
3954 case Intrinsic::thread_pointer: {
3955 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3956 return DAG.getRegister(LoongArch::R2, PtrVT);
3957 }
3958 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3959 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3960 case Intrinsic::loongarch_lsx_vreplvei_d:
3961 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3962 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3963 case Intrinsic::loongarch_lsx_vreplvei_w:
3964 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3965 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3966 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3967 case Intrinsic::loongarch_lasx_xvpickve_d:
3968 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3969 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3970 case Intrinsic::loongarch_lasx_xvinsve0_d:
3971 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3972 case Intrinsic::loongarch_lsx_vsat_b:
3973 case Intrinsic::loongarch_lsx_vsat_bu:
3974 case Intrinsic::loongarch_lsx_vrotri_b:
3975 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3977 case Intrinsic::loongarch_lsx_vsrlri_b:
3978 case Intrinsic::loongarch_lsx_vsrari_b:
3979 case Intrinsic::loongarch_lsx_vreplvei_h:
3980 case Intrinsic::loongarch_lasx_xvsat_b:
3981 case Intrinsic::loongarch_lasx_xvsat_bu:
3982 case Intrinsic::loongarch_lasx_xvrotri_b:
3983 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3985 case Intrinsic::loongarch_lasx_xvsrlri_b:
3986 case Intrinsic::loongarch_lasx_xvsrari_b:
3987 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3988 case Intrinsic::loongarch_lasx_xvpickve_w:
3989 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3990 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3991 case Intrinsic::loongarch_lasx_xvinsve0_w:
3992 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3993 case Intrinsic::loongarch_lsx_vsat_h:
3994 case Intrinsic::loongarch_lsx_vsat_hu:
3995 case Intrinsic::loongarch_lsx_vrotri_h:
3996 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3998 case Intrinsic::loongarch_lsx_vsrlri_h:
3999 case Intrinsic::loongarch_lsx_vsrari_h:
4000 case Intrinsic::loongarch_lsx_vreplvei_b:
4001 case Intrinsic::loongarch_lasx_xvsat_h:
4002 case Intrinsic::loongarch_lasx_xvsat_hu:
4003 case Intrinsic::loongarch_lasx_xvrotri_h:
4004 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4006 case Intrinsic::loongarch_lasx_xvsrlri_h:
4007 case Intrinsic::loongarch_lasx_xvsrari_h:
4008 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4009 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4010 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4011 case Intrinsic::loongarch_lsx_vsrani_b_h:
4012 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4013 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4014 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4015 case Intrinsic::loongarch_lsx_vssrani_b_h:
4016 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4017 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4018 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4019 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4021 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4023 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4025 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4026 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4027 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4029 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4031 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4034 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4035 case Intrinsic::loongarch_lsx_vsat_w:
4036 case Intrinsic::loongarch_lsx_vsat_wu:
4037 case Intrinsic::loongarch_lsx_vrotri_w:
4038 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4039 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4040 case Intrinsic::loongarch_lsx_vsrlri_w:
4041 case Intrinsic::loongarch_lsx_vsrari_w:
4042 case Intrinsic::loongarch_lsx_vslei_bu:
4043 case Intrinsic::loongarch_lsx_vslei_hu:
4044 case Intrinsic::loongarch_lsx_vslei_wu:
4045 case Intrinsic::loongarch_lsx_vslei_du:
4046 case Intrinsic::loongarch_lsx_vslti_bu:
4047 case Intrinsic::loongarch_lsx_vslti_hu:
4048 case Intrinsic::loongarch_lsx_vslti_wu:
4049 case Intrinsic::loongarch_lsx_vslti_du:
4050 case Intrinsic::loongarch_lsx_vbsll_v:
4051 case Intrinsic::loongarch_lsx_vbsrl_v:
4052 case Intrinsic::loongarch_lasx_xvsat_w:
4053 case Intrinsic::loongarch_lasx_xvsat_wu:
4054 case Intrinsic::loongarch_lasx_xvrotri_w:
4055 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4057 case Intrinsic::loongarch_lasx_xvsrlri_w:
4058 case Intrinsic::loongarch_lasx_xvsrari_w:
4059 case Intrinsic::loongarch_lasx_xvslei_bu:
4060 case Intrinsic::loongarch_lasx_xvslei_hu:
4061 case Intrinsic::loongarch_lasx_xvslei_wu:
4062 case Intrinsic::loongarch_lasx_xvslei_du:
4063 case Intrinsic::loongarch_lasx_xvslti_bu:
4064 case Intrinsic::loongarch_lasx_xvslti_hu:
4065 case Intrinsic::loongarch_lasx_xvslti_wu:
4066 case Intrinsic::loongarch_lasx_xvslti_du:
4067 case Intrinsic::loongarch_lasx_xvbsll_v:
4068 case Intrinsic::loongarch_lasx_xvbsrl_v:
4069 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4070 case Intrinsic::loongarch_lsx_vseqi_b:
4071 case Intrinsic::loongarch_lsx_vseqi_h:
4072 case Intrinsic::loongarch_lsx_vseqi_w:
4073 case Intrinsic::loongarch_lsx_vseqi_d:
4074 case Intrinsic::loongarch_lsx_vslei_b:
4075 case Intrinsic::loongarch_lsx_vslei_h:
4076 case Intrinsic::loongarch_lsx_vslei_w:
4077 case Intrinsic::loongarch_lsx_vslei_d:
4078 case Intrinsic::loongarch_lsx_vslti_b:
4079 case Intrinsic::loongarch_lsx_vslti_h:
4080 case Intrinsic::loongarch_lsx_vslti_w:
4081 case Intrinsic::loongarch_lsx_vslti_d:
4082 case Intrinsic::loongarch_lasx_xvseqi_b:
4083 case Intrinsic::loongarch_lasx_xvseqi_h:
4084 case Intrinsic::loongarch_lasx_xvseqi_w:
4085 case Intrinsic::loongarch_lasx_xvseqi_d:
4086 case Intrinsic::loongarch_lasx_xvslei_b:
4087 case Intrinsic::loongarch_lasx_xvslei_h:
4088 case Intrinsic::loongarch_lasx_xvslei_w:
4089 case Intrinsic::loongarch_lasx_xvslei_d:
4090 case Intrinsic::loongarch_lasx_xvslti_b:
4091 case Intrinsic::loongarch_lasx_xvslti_h:
4092 case Intrinsic::loongarch_lasx_xvslti_w:
4093 case Intrinsic::loongarch_lasx_xvslti_d:
4094 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4095 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4096 case Intrinsic::loongarch_lsx_vsrani_h_w:
4097 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4098 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4099 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4100 case Intrinsic::loongarch_lsx_vssrani_h_w:
4101 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4102 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4103 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4104 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4106 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4107 case Intrinsic::loongarch_lsx_vfrstpi_b:
4108 case Intrinsic::loongarch_lsx_vfrstpi_h:
4109 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4110 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4112 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4113 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4114 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4116 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4118 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4121 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4122 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4123 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4124 case Intrinsic::loongarch_lsx_vsat_d:
4125 case Intrinsic::loongarch_lsx_vsat_du:
4126 case Intrinsic::loongarch_lsx_vrotri_d:
4127 case Intrinsic::loongarch_lsx_vsrlri_d:
4128 case Intrinsic::loongarch_lsx_vsrari_d:
4129 case Intrinsic::loongarch_lasx_xvsat_d:
4130 case Intrinsic::loongarch_lasx_xvsat_du:
4131 case Intrinsic::loongarch_lasx_xvrotri_d:
4132 case Intrinsic::loongarch_lasx_xvsrlri_d:
4133 case Intrinsic::loongarch_lasx_xvsrari_d:
4134 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4135 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4136 case Intrinsic::loongarch_lsx_vsrani_w_d:
4137 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4138 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4139 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4140 case Intrinsic::loongarch_lsx_vssrani_w_d:
4141 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4142 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4143 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4144 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4146 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4148 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4150 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4151 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4152 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4154 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4156 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4159 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4160 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4161 case Intrinsic::loongarch_lsx_vsrani_d_q:
4162 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4163 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4164 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4165 case Intrinsic::loongarch_lsx_vssrani_d_q:
4166 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4167 case Intrinsic::loongarch_lsx_vssrani_du_q:
4168 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4169 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4170 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4171 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4172 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4173 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4175 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4176 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4177 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4178 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4179 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4181 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4183 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4184 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4185 case Intrinsic::loongarch_lsx_vnori_b:
4186 case Intrinsic::loongarch_lsx_vshuf4i_b:
4187 case Intrinsic::loongarch_lsx_vshuf4i_h:
4188 case Intrinsic::loongarch_lsx_vshuf4i_w:
4189 case Intrinsic::loongarch_lasx_xvnori_b:
4190 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4191 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4192 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4193 case Intrinsic::loongarch_lasx_xvpermi_d:
4194 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4195 case Intrinsic::loongarch_lsx_vshuf4i_d:
4196 case Intrinsic::loongarch_lsx_vpermi_w:
4197 case Intrinsic::loongarch_lsx_vbitseli_b:
4198 case Intrinsic::loongarch_lsx_vextrins_b:
4199 case Intrinsic::loongarch_lsx_vextrins_h:
4200 case Intrinsic::loongarch_lsx_vextrins_w:
4201 case Intrinsic::loongarch_lsx_vextrins_d:
4202 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4203 case Intrinsic::loongarch_lasx_xvpermi_w:
4204 case Intrinsic::loongarch_lasx_xvpermi_q:
4205 case Intrinsic::loongarch_lasx_xvbitseli_b:
4206 case Intrinsic::loongarch_lasx_xvextrins_b:
4207 case Intrinsic::loongarch_lasx_xvextrins_h:
4208 case Intrinsic::loongarch_lasx_xvextrins_w:
4209 case Intrinsic::loongarch_lasx_xvextrins_d:
4210 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4211 case Intrinsic::loongarch_lsx_vrepli_b:
4212 case Intrinsic::loongarch_lsx_vrepli_h:
4213 case Intrinsic::loongarch_lsx_vrepli_w:
4214 case Intrinsic::loongarch_lsx_vrepli_d:
4215 case Intrinsic::loongarch_lasx_xvrepli_b:
4216 case Intrinsic::loongarch_lasx_xvrepli_h:
4217 case Intrinsic::loongarch_lasx_xvrepli_w:
4218 case Intrinsic::loongarch_lasx_xvrepli_d:
4219 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4220 case Intrinsic::loongarch_lsx_vldi:
4221 case Intrinsic::loongarch_lasx_xvldi:
4222 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4223 }
4224}
4225
4226// Helper function that emits error message for intrinsics with chain and return
4227// merge values of a UNDEF and the chain.
4229 StringRef ErrorMsg,
4230 SelectionDAG &DAG) {
4231 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4232 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4233 SDLoc(Op));
4234}
4235
4236SDValue
4237LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4238 SelectionDAG &DAG) const {
4239 SDLoc DL(Op);
4240 MVT GRLenVT = Subtarget.getGRLenVT();
4241 EVT VT = Op.getValueType();
4242 SDValue Chain = Op.getOperand(0);
4243 const StringRef ErrorMsgOOR = "argument out of range";
4244 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4246
4247 switch (Op.getConstantOperandVal(1)) {
4248 default:
4249 return Op;
4250 case Intrinsic::loongarch_crc_w_b_w:
4251 case Intrinsic::loongarch_crc_w_h_w:
4252 case Intrinsic::loongarch_crc_w_w_w:
4253 case Intrinsic::loongarch_crc_w_d_w:
4254 case Intrinsic::loongarch_crcc_w_b_w:
4255 case Intrinsic::loongarch_crcc_w_h_w:
4256 case Intrinsic::loongarch_crcc_w_w_w:
4257 case Intrinsic::loongarch_crcc_w_d_w:
4258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4259 case Intrinsic::loongarch_csrrd_w:
4260 case Intrinsic::loongarch_csrrd_d: {
4261 unsigned Imm = Op.getConstantOperandVal(2);
4262 return !isUInt<14>(Imm)
4263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4264 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4265 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4266 }
4267 case Intrinsic::loongarch_csrwr_w:
4268 case Intrinsic::loongarch_csrwr_d: {
4269 unsigned Imm = Op.getConstantOperandVal(3);
4270 return !isUInt<14>(Imm)
4271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4272 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4273 {Chain, Op.getOperand(2),
4274 DAG.getConstant(Imm, DL, GRLenVT)});
4275 }
4276 case Intrinsic::loongarch_csrxchg_w:
4277 case Intrinsic::loongarch_csrxchg_d: {
4278 unsigned Imm = Op.getConstantOperandVal(4);
4279 return !isUInt<14>(Imm)
4280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4281 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4282 {Chain, Op.getOperand(2), Op.getOperand(3),
4283 DAG.getConstant(Imm, DL, GRLenVT)});
4284 }
4285 case Intrinsic::loongarch_iocsrrd_d: {
4286 return DAG.getNode(
4287 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4288 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4289 }
4290#define IOCSRRD_CASE(NAME, NODE) \
4291 case Intrinsic::loongarch_##NAME: { \
4292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4293 {Chain, Op.getOperand(2)}); \
4294 }
4295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4298#undef IOCSRRD_CASE
4299 case Intrinsic::loongarch_cpucfg: {
4300 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4301 {Chain, Op.getOperand(2)});
4302 }
4303 case Intrinsic::loongarch_lddir_d: {
4304 unsigned Imm = Op.getConstantOperandVal(3);
4305 return !isUInt<8>(Imm)
4306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4307 : Op;
4308 }
4309 case Intrinsic::loongarch_movfcsr2gr: {
4310 if (!Subtarget.hasBasicF())
4311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4312 unsigned Imm = Op.getConstantOperandVal(2);
4313 return !isUInt<2>(Imm)
4314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4315 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4316 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4317 }
4318 case Intrinsic::loongarch_lsx_vld:
4319 case Intrinsic::loongarch_lsx_vldrepl_b:
4320 case Intrinsic::loongarch_lasx_xvld:
4321 case Intrinsic::loongarch_lasx_xvldrepl_b:
4322 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_h:
4326 case Intrinsic::loongarch_lasx_xvldrepl_h:
4327 return !isShiftedInt<11, 1>(
4328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4330 Op, "argument out of range or not a multiple of 2", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_w:
4333 case Intrinsic::loongarch_lasx_xvldrepl_w:
4334 return !isShiftedInt<10, 2>(
4335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4337 Op, "argument out of range or not a multiple of 4", DAG)
4338 : SDValue();
4339 case Intrinsic::loongarch_lsx_vldrepl_d:
4340 case Intrinsic::loongarch_lasx_xvldrepl_d:
4341 return !isShiftedInt<9, 3>(
4342 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4344 Op, "argument out of range or not a multiple of 8", DAG)
4345 : SDValue();
4346 }
4347}
4348
4349// Helper function that emits error message for intrinsics with void return
4350// value and return the chain.
4352 SelectionDAG &DAG) {
4353
4354 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4355 return Op.getOperand(0);
4356}
4357
4358SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4359 SelectionDAG &DAG) const {
4360 SDLoc DL(Op);
4361 MVT GRLenVT = Subtarget.getGRLenVT();
4362 SDValue Chain = Op.getOperand(0);
4363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4364 SDValue Op2 = Op.getOperand(2);
4365 const StringRef ErrorMsgOOR = "argument out of range";
4366 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4367 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4369
4370 switch (IntrinsicEnum) {
4371 default:
4372 // TODO: Add more Intrinsics.
4373 return SDValue();
4374 case Intrinsic::loongarch_cacop_d:
4375 case Intrinsic::loongarch_cacop_w: {
4376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4379 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4381 unsigned Imm1 = Op2->getAsZExtVal();
4382 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4383 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4384 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4385 return Op;
4386 }
4387 case Intrinsic::loongarch_dbar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4391 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4392 DAG.getConstant(Imm, DL, GRLenVT));
4393 }
4394 case Intrinsic::loongarch_ibar: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4398 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4399 DAG.getConstant(Imm, DL, GRLenVT));
4400 }
4401 case Intrinsic::loongarch_break: {
4402 unsigned Imm = Op2->getAsZExtVal();
4403 return !isUInt<15>(Imm)
4404 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4405 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4406 DAG.getConstant(Imm, DL, GRLenVT));
4407 }
4408 case Intrinsic::loongarch_movgr2fcsr: {
4409 if (!Subtarget.hasBasicF())
4410 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4411 unsigned Imm = Op2->getAsZExtVal();
4412 return !isUInt<2>(Imm)
4413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4414 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4415 DAG.getConstant(Imm, DL, GRLenVT),
4416 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4417 Op.getOperand(3)));
4418 }
4419 case Intrinsic::loongarch_syscall: {
4420 unsigned Imm = Op2->getAsZExtVal();
4421 return !isUInt<15>(Imm)
4422 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4423 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4424 DAG.getConstant(Imm, DL, GRLenVT));
4425 }
4426#define IOCSRWR_CASE(NAME, NODE) \
4427 case Intrinsic::loongarch_##NAME: { \
4428 SDValue Op3 = Op.getOperand(3); \
4429 return Subtarget.is64Bit() \
4430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4434 Op3); \
4435 }
4436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4439#undef IOCSRWR_CASE
4440 case Intrinsic::loongarch_iocsrwr_d: {
4441 return !Subtarget.is64Bit()
4442 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4443 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4444 Op2,
4445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4446 Op.getOperand(3)));
4447 }
4448#define ASRT_LE_GT_CASE(NAME) \
4449 case Intrinsic::loongarch_##NAME: { \
4450 return !Subtarget.is64Bit() \
4451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4452 : Op; \
4453 }
4454 ASRT_LE_GT_CASE(asrtle_d)
4455 ASRT_LE_GT_CASE(asrtgt_d)
4456#undef ASRT_LE_GT_CASE
4457 case Intrinsic::loongarch_ldpte_d: {
4458 unsigned Imm = Op.getConstantOperandVal(3);
4459 return !Subtarget.is64Bit()
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4461 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4462 : Op;
4463 }
4464 case Intrinsic::loongarch_lsx_vst:
4465 case Intrinsic::loongarch_lasx_xvst:
4466 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4468 : SDValue();
4469 case Intrinsic::loongarch_lasx_xvstelm_b:
4470 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4471 !isUInt<5>(Op.getConstantOperandVal(5)))
4472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4473 : SDValue();
4474 case Intrinsic::loongarch_lsx_vstelm_b:
4475 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4476 !isUInt<4>(Op.getConstantOperandVal(5)))
4477 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lasx_xvstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4482 !isUInt<4>(Op.getConstantOperandVal(5)))
4484 Op, "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lsx_vstelm_h:
4487 return (!isShiftedInt<8, 1>(
4488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4489 !isUInt<3>(Op.getConstantOperandVal(5)))
4491 Op, "argument out of range or not a multiple of 2", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lasx_xvstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4496 !isUInt<3>(Op.getConstantOperandVal(5)))
4498 Op, "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lsx_vstelm_w:
4501 return (!isShiftedInt<8, 2>(
4502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4503 !isUInt<2>(Op.getConstantOperandVal(5)))
4505 Op, "argument out of range or not a multiple of 4", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lasx_xvstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4510 !isUInt<2>(Op.getConstantOperandVal(5)))
4512 Op, "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 case Intrinsic::loongarch_lsx_vstelm_d:
4515 return (!isShiftedInt<8, 3>(
4516 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4517 !isUInt<1>(Op.getConstantOperandVal(5)))
4519 Op, "argument out of range or not a multiple of 8", DAG)
4520 : SDValue();
4521 }
4522}
4523
4524SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4525 SelectionDAG &DAG) const {
4526 SDLoc DL(Op);
4527 SDValue Lo = Op.getOperand(0);
4528 SDValue Hi = Op.getOperand(1);
4529 SDValue Shamt = Op.getOperand(2);
4530 EVT VT = Lo.getValueType();
4531
4532 // if Shamt-GRLen < 0: // Shamt < GRLen
4533 // Lo = Lo << Shamt
4534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4535 // else:
4536 // Lo = 0
4537 // Hi = Lo << (Shamt-GRLen)
4538
4539 SDValue Zero = DAG.getConstant(0, DL, VT);
4540 SDValue One = DAG.getConstant(1, DL, VT);
4541 SDValue MinusGRLen =
4542 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4543 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4544 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4545 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4546
4547 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4548 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4549 SDValue ShiftRightLo =
4550 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4551 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4552 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4553 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4554
4555 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4556
4557 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4558 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4559
4560 SDValue Parts[2] = {Lo, Hi};
4561 return DAG.getMergeValues(Parts, DL);
4562}
4563
4564SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4565 SelectionDAG &DAG,
4566 bool IsSRA) const {
4567 SDLoc DL(Op);
4568 SDValue Lo = Op.getOperand(0);
4569 SDValue Hi = Op.getOperand(1);
4570 SDValue Shamt = Op.getOperand(2);
4571 EVT VT = Lo.getValueType();
4572
4573 // SRA expansion:
4574 // if Shamt-GRLen < 0: // Shamt < GRLen
4575 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4576 // Hi = Hi >>s Shamt
4577 // else:
4578 // Lo = Hi >>s (Shamt-GRLen);
4579 // Hi = Hi >>s (GRLen-1)
4580 //
4581 // SRL expansion:
4582 // if Shamt-GRLen < 0: // Shamt < GRLen
4583 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4584 // Hi = Hi >>u Shamt
4585 // else:
4586 // Lo = Hi >>u (Shamt-GRLen);
4587 // Hi = 0;
4588
4589 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4590
4591 SDValue Zero = DAG.getConstant(0, DL, VT);
4592 SDValue One = DAG.getConstant(1, DL, VT);
4593 SDValue MinusGRLen =
4594 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4595 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4596 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4597 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4598
4599 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4600 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4601 SDValue ShiftLeftHi =
4602 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4603 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4604 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4605 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4606 SDValue HiFalse =
4607 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4608
4609 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4610
4611 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4612 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4613
4614 SDValue Parts[2] = {Lo, Hi};
4615 return DAG.getMergeValues(Parts, DL);
4616}
4617
4618// Returns the opcode of the target-specific SDNode that implements the 32-bit
4619// form of the given Opcode.
4620static unsigned getLoongArchWOpcode(unsigned Opcode) {
4621 switch (Opcode) {
4622 default:
4623 llvm_unreachable("Unexpected opcode");
4624 case ISD::SDIV:
4625 return LoongArchISD::DIV_W;
4626 case ISD::UDIV:
4627 return LoongArchISD::DIV_WU;
4628 case ISD::SREM:
4629 return LoongArchISD::MOD_W;
4630 case ISD::UREM:
4631 return LoongArchISD::MOD_WU;
4632 case ISD::SHL:
4633 return LoongArchISD::SLL_W;
4634 case ISD::SRA:
4635 return LoongArchISD::SRA_W;
4636 case ISD::SRL:
4637 return LoongArchISD::SRL_W;
4638 case ISD::ROTL:
4639 case ISD::ROTR:
4640 return LoongArchISD::ROTR_W;
4641 case ISD::CTTZ:
4642 return LoongArchISD::CTZ_W;
4643 case ISD::CTLZ:
4644 return LoongArchISD::CLZ_W;
4645 }
4646}
4647
4648// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4649// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4650// otherwise be promoted to i64, making it difficult to select the
4651// SLL_W/.../*W later one because the fact the operation was originally of
4652// type i8/i16/i32 is lost.
4654 unsigned ExtOpc = ISD::ANY_EXTEND) {
4655 SDLoc DL(N);
4656 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4657 SDValue NewOp0, NewRes;
4658
4659 switch (NumOp) {
4660 default:
4661 llvm_unreachable("Unexpected NumOp");
4662 case 1: {
4663 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4664 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4665 break;
4666 }
4667 case 2: {
4668 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4669 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4670 if (N->getOpcode() == ISD::ROTL) {
4671 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4672 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4673 }
4674 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4675 break;
4676 }
4677 // TODO:Handle more NumOp.
4678 }
4679
4680 // ReplaceNodeResults requires we maintain the same type for the return
4681 // value.
4682 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4683}
4684
4685// Converts the given 32-bit operation to a i64 operation with signed extension
4686// semantic to reduce the signed extension instructions.
4688 SDLoc DL(N);
4689 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4690 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4691 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4692 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4693 DAG.getValueType(MVT::i32));
4694 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4695}
4696
4697// Helper function that emits error message for intrinsics with/without chain
4698// and return a UNDEF or and the chain as the results.
4701 StringRef ErrorMsg, bool WithChain = true) {
4702 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4703 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4704 if (!WithChain)
4705 return;
4706 Results.push_back(N->getOperand(0));
4707}
4708
4709template <unsigned N>
4710static void
4712 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4713 unsigned ResOp) {
4714 const StringRef ErrorMsgOOR = "argument out of range";
4715 unsigned Imm = Node->getConstantOperandVal(2);
4716 if (!isUInt<N>(Imm)) {
4718 /*WithChain=*/false);
4719 return;
4720 }
4721 SDLoc DL(Node);
4722 SDValue Vec = Node->getOperand(1);
4723
4724 SDValue PickElt =
4725 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4726 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4728 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4729 PickElt.getValue(0)));
4730}
4731
4734 SelectionDAG &DAG,
4735 const LoongArchSubtarget &Subtarget,
4736 unsigned ResOp) {
4737 SDLoc DL(N);
4738 SDValue Vec = N->getOperand(1);
4739
4740 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4741 Results.push_back(
4742 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4743}
4744
4745static void
4747 SelectionDAG &DAG,
4748 const LoongArchSubtarget &Subtarget) {
4749 switch (N->getConstantOperandVal(0)) {
4750 default:
4751 llvm_unreachable("Unexpected Intrinsic.");
4752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4753 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4754 LoongArchISD::VPICK_SEXT_ELT);
4755 break;
4756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4758 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4759 LoongArchISD::VPICK_SEXT_ELT);
4760 break;
4761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4762 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4763 LoongArchISD::VPICK_SEXT_ELT);
4764 break;
4765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4766 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4767 LoongArchISD::VPICK_ZEXT_ELT);
4768 break;
4769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4771 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4772 LoongArchISD::VPICK_ZEXT_ELT);
4773 break;
4774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4775 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4776 LoongArchISD::VPICK_ZEXT_ELT);
4777 break;
4778 case Intrinsic::loongarch_lsx_bz_b:
4779 case Intrinsic::loongarch_lsx_bz_h:
4780 case Intrinsic::loongarch_lsx_bz_w:
4781 case Intrinsic::loongarch_lsx_bz_d:
4782 case Intrinsic::loongarch_lasx_xbz_b:
4783 case Intrinsic::loongarch_lasx_xbz_h:
4784 case Intrinsic::loongarch_lasx_xbz_w:
4785 case Intrinsic::loongarch_lasx_xbz_d:
4786 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4787 LoongArchISD::VALL_ZERO);
4788 break;
4789 case Intrinsic::loongarch_lsx_bz_v:
4790 case Intrinsic::loongarch_lasx_xbz_v:
4791 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4792 LoongArchISD::VANY_ZERO);
4793 break;
4794 case Intrinsic::loongarch_lsx_bnz_b:
4795 case Intrinsic::loongarch_lsx_bnz_h:
4796 case Intrinsic::loongarch_lsx_bnz_w:
4797 case Intrinsic::loongarch_lsx_bnz_d:
4798 case Intrinsic::loongarch_lasx_xbnz_b:
4799 case Intrinsic::loongarch_lasx_xbnz_h:
4800 case Intrinsic::loongarch_lasx_xbnz_w:
4801 case Intrinsic::loongarch_lasx_xbnz_d:
4802 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4803 LoongArchISD::VALL_NONZERO);
4804 break;
4805 case Intrinsic::loongarch_lsx_bnz_v:
4806 case Intrinsic::loongarch_lasx_xbnz_v:
4807 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4808 LoongArchISD::VANY_NONZERO);
4809 break;
4810 }
4811}
4812
4815 SelectionDAG &DAG) {
4816 assert(N->getValueType(0) == MVT::i128 &&
4817 "AtomicCmpSwap on types less than 128 should be legal");
4818 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4819
4820 unsigned Opcode;
4821 switch (MemOp->getMergedOrdering()) {
4825 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4826 break;
4829 Opcode = LoongArch::PseudoCmpXchg128;
4830 break;
4831 default:
4832 llvm_unreachable("Unexpected ordering!");
4833 }
4834
4835 SDLoc DL(N);
4836 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4837 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4838 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4839 NewVal.first, NewVal.second, N->getOperand(0)};
4840
4841 SDNode *CmpSwap = DAG.getMachineNode(
4842 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4843 Ops);
4844 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4845 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4846 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4847 Results.push_back(SDValue(CmpSwap, 3));
4848}
4849
4852 SDLoc DL(N);
4853 EVT VT = N->getValueType(0);
4854 switch (N->getOpcode()) {
4855 default:
4856 llvm_unreachable("Don't know how to legalize this operation");
4857 case ISD::ADD:
4858 case ISD::SUB:
4859 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4860 "Unexpected custom legalisation");
4861 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4862 break;
4863 case ISD::SDIV:
4864 case ISD::UDIV:
4865 case ISD::SREM:
4866 case ISD::UREM:
4867 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4868 "Unexpected custom legalisation");
4869 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4870 Subtarget.hasDiv32() && VT == MVT::i32
4872 : ISD::SIGN_EXTEND));
4873 break;
4874 case ISD::SHL:
4875 case ISD::SRA:
4876 case ISD::SRL:
4877 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4878 "Unexpected custom legalisation");
4879 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4880 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4881 break;
4882 }
4883 break;
4884 case ISD::ROTL:
4885 case ISD::ROTR:
4886 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4887 "Unexpected custom legalisation");
4888 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4889 break;
4890 case ISD::FP_TO_SINT: {
4891 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4892 "Unexpected custom legalisation");
4893 SDValue Src = N->getOperand(0);
4894 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4895 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4897 if (!isTypeLegal(Src.getValueType()))
4898 return;
4899 if (Src.getValueType() == MVT::f16)
4900 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4901 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4902 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4903 return;
4904 }
4905 // If the FP type needs to be softened, emit a library call using the 'si'
4906 // version. If we left it to default legalization we'd end up with 'di'.
4907 RTLIB::Libcall LC;
4908 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4909 MakeLibCallOptions CallOptions;
4910 EVT OpVT = Src.getValueType();
4911 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4912 SDValue Chain = SDValue();
4913 SDValue Result;
4914 std::tie(Result, Chain) =
4915 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4916 Results.push_back(Result);
4917 break;
4918 }
4919 case ISD::BITCAST: {
4920 SDValue Src = N->getOperand(0);
4921 EVT SrcVT = Src.getValueType();
4922 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4923 Subtarget.hasBasicF()) {
4924 SDValue Dst =
4925 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4927 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4928 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4929 DAG.getVTList(MVT::i32, MVT::i32), Src);
4930 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4931 NewReg.getValue(0), NewReg.getValue(1));
4932 Results.push_back(RetReg);
4933 }
4934 break;
4935 }
4936 case ISD::FP_TO_UINT: {
4937 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4938 "Unexpected custom legalisation");
4939 auto &TLI = DAG.getTargetLoweringInfo();
4940 SDValue Tmp1, Tmp2;
4941 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4942 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4943 break;
4944 }
4945 case ISD::BSWAP: {
4946 SDValue Src = N->getOperand(0);
4947 assert((VT == MVT::i16 || VT == MVT::i32) &&
4948 "Unexpected custom legalization");
4949 MVT GRLenVT = Subtarget.getGRLenVT();
4950 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4951 SDValue Tmp;
4952 switch (VT.getSizeInBits()) {
4953 default:
4954 llvm_unreachable("Unexpected operand width");
4955 case 16:
4956 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4957 break;
4958 case 32:
4959 // Only LA64 will get to here due to the size mismatch between VT and
4960 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4961 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4962 break;
4963 }
4964 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4965 break;
4966 }
4967 case ISD::BITREVERSE: {
4968 SDValue Src = N->getOperand(0);
4969 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4970 "Unexpected custom legalization");
4971 MVT GRLenVT = Subtarget.getGRLenVT();
4972 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4973 SDValue Tmp;
4974 switch (VT.getSizeInBits()) {
4975 default:
4976 llvm_unreachable("Unexpected operand width");
4977 case 8:
4978 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4979 break;
4980 case 32:
4981 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4982 break;
4983 }
4984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4985 break;
4986 }
4987 case ISD::CTLZ:
4988 case ISD::CTTZ: {
4989 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4990 "Unexpected custom legalisation");
4991 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4992 break;
4993 }
4995 SDValue Chain = N->getOperand(0);
4996 SDValue Op2 = N->getOperand(2);
4997 MVT GRLenVT = Subtarget.getGRLenVT();
4998 const StringRef ErrorMsgOOR = "argument out of range";
4999 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5000 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5001
5002 switch (N->getConstantOperandVal(1)) {
5003 default:
5004 llvm_unreachable("Unexpected Intrinsic.");
5005 case Intrinsic::loongarch_movfcsr2gr: {
5006 if (!Subtarget.hasBasicF()) {
5007 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5008 return;
5009 }
5010 unsigned Imm = Op2->getAsZExtVal();
5011 if (!isUInt<2>(Imm)) {
5012 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5013 return;
5014 }
5015 SDValue MOVFCSR2GRResults = DAG.getNode(
5016 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5017 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5018 Results.push_back(
5019 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5020 Results.push_back(MOVFCSR2GRResults.getValue(1));
5021 break;
5022 }
5023#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5024 case Intrinsic::loongarch_##NAME: { \
5025 SDValue NODE = DAG.getNode( \
5026 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5027 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5029 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5030 Results.push_back(NODE.getValue(1)); \
5031 break; \
5032 }
5033 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5034 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5035 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5036 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5037 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5038 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5039#undef CRC_CASE_EXT_BINARYOP
5040
5041#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5042 case Intrinsic::loongarch_##NAME: { \
5043 SDValue NODE = DAG.getNode( \
5044 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5045 {Chain, Op2, \
5046 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5047 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5048 Results.push_back(NODE.getValue(1)); \
5049 break; \
5050 }
5051 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5052 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5053#undef CRC_CASE_EXT_UNARYOP
5054#define CSR_CASE(ID) \
5055 case Intrinsic::loongarch_##ID: { \
5056 if (!Subtarget.is64Bit()) \
5057 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5058 break; \
5059 }
5060 CSR_CASE(csrrd_d);
5061 CSR_CASE(csrwr_d);
5062 CSR_CASE(csrxchg_d);
5063 CSR_CASE(iocsrrd_d);
5064#undef CSR_CASE
5065 case Intrinsic::loongarch_csrrd_w: {
5066 unsigned Imm = Op2->getAsZExtVal();
5067 if (!isUInt<14>(Imm)) {
5068 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5069 return;
5070 }
5071 SDValue CSRRDResults =
5072 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5073 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5074 Results.push_back(
5075 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5076 Results.push_back(CSRRDResults.getValue(1));
5077 break;
5078 }
5079 case Intrinsic::loongarch_csrwr_w: {
5080 unsigned Imm = N->getConstantOperandVal(3);
5081 if (!isUInt<14>(Imm)) {
5082 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5083 return;
5084 }
5085 SDValue CSRWRResults =
5086 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5087 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5088 DAG.getConstant(Imm, DL, GRLenVT)});
5089 Results.push_back(
5090 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5091 Results.push_back(CSRWRResults.getValue(1));
5092 break;
5093 }
5094 case Intrinsic::loongarch_csrxchg_w: {
5095 unsigned Imm = N->getConstantOperandVal(4);
5096 if (!isUInt<14>(Imm)) {
5097 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5098 return;
5099 }
5100 SDValue CSRXCHGResults = DAG.getNode(
5101 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5102 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5103 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5104 DAG.getConstant(Imm, DL, GRLenVT)});
5105 Results.push_back(
5106 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5107 Results.push_back(CSRXCHGResults.getValue(1));
5108 break;
5109 }
5110#define IOCSRRD_CASE(NAME, NODE) \
5111 case Intrinsic::loongarch_##NAME: { \
5112 SDValue IOCSRRDResults = \
5113 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5114 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5115 Results.push_back( \
5116 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5117 Results.push_back(IOCSRRDResults.getValue(1)); \
5118 break; \
5119 }
5120 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5121 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5122 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5123#undef IOCSRRD_CASE
5124 case Intrinsic::loongarch_cpucfg: {
5125 SDValue CPUCFGResults =
5126 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5127 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5128 Results.push_back(
5129 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5130 Results.push_back(CPUCFGResults.getValue(1));
5131 break;
5132 }
5133 case Intrinsic::loongarch_lddir_d: {
5134 if (!Subtarget.is64Bit()) {
5135 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5136 return;
5137 }
5138 break;
5139 }
5140 }
5141 break;
5142 }
5143 case ISD::READ_REGISTER: {
5144 if (Subtarget.is64Bit())
5145 DAG.getContext()->emitError(
5146 "On LA64, only 64-bit registers can be read.");
5147 else
5148 DAG.getContext()->emitError(
5149 "On LA32, only 32-bit registers can be read.");
5150 Results.push_back(DAG.getUNDEF(VT));
5151 Results.push_back(N->getOperand(0));
5152 break;
5153 }
5155 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5156 break;
5157 }
5158 case ISD::LROUND: {
5159 SDValue Op0 = N->getOperand(0);
5160 EVT OpVT = Op0.getValueType();
5161 RTLIB::Libcall LC =
5162 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5163 MakeLibCallOptions CallOptions;
5164 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5165 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5166 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5167 Results.push_back(Result);
5168 break;
5169 }
5170 case ISD::ATOMIC_CMP_SWAP: {
5172 break;
5173 }
5174 case ISD::TRUNCATE: {
5175 MVT VT = N->getSimpleValueType(0);
5176 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5177 return;
5178
5179 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5180 SDValue In = N->getOperand(0);
5181 EVT InVT = In.getValueType();
5182 EVT InEltVT = InVT.getVectorElementType();
5183 EVT EltVT = VT.getVectorElementType();
5184 unsigned MinElts = VT.getVectorNumElements();
5185 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5186 unsigned InBits = InVT.getSizeInBits();
5187
5188 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5189 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5190 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5191 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5192 for (unsigned I = 0; I < MinElts; ++I)
5193 TruncMask[I] = Scale * I;
5194
5195 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5196 MVT SVT = In.getSimpleValueType().getScalarType();
5197 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5198 SDValue WidenIn =
5199 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5200 DAG.getVectorIdxConstant(0, DL));
5201 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5202 "Illegal vector type in truncation");
5203 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5204 Results.push_back(
5205 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5206 return;
5207 }
5208 }
5209
5210 break;
5211 }
5212 }
5213}
5214
5215/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5217 SelectionDAG &DAG) {
5218 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5219
5220 MVT VT = N->getSimpleValueType(0);
5221 if (!VT.is128BitVector() && !VT.is256BitVector())
5222 return SDValue();
5223
5224 SDValue X, Y;
5225 SDValue N0 = N->getOperand(0);
5226 SDValue N1 = N->getOperand(1);
5227
5228 if (SDValue Not = isNOT(N0, DAG)) {
5229 X = Not;
5230 Y = N1;
5231 } else if (SDValue Not = isNOT(N1, DAG)) {
5232 X = Not;
5233 Y = N0;
5234 } else
5235 return SDValue();
5236
5237 X = DAG.getBitcast(VT, X);
5238 Y = DAG.getBitcast(VT, Y);
5239 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5240}
5241
5244 const LoongArchSubtarget &Subtarget) {
5245 if (DCI.isBeforeLegalizeOps())
5246 return SDValue();
5247
5248 SDValue FirstOperand = N->getOperand(0);
5249 SDValue SecondOperand = N->getOperand(1);
5250 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5251 EVT ValTy = N->getValueType(0);
5252 SDLoc DL(N);
5253 uint64_t lsb, msb;
5254 unsigned SMIdx, SMLen;
5255 ConstantSDNode *CN;
5256 SDValue NewOperand;
5257 MVT GRLenVT = Subtarget.getGRLenVT();
5258
5259 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5260 return R;
5261
5262 // BSTRPICK requires the 32S feature.
5263 if (!Subtarget.has32S())
5264 return SDValue();
5265
5266 // Op's second operand must be a shifted mask.
5267 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5268 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5269 return SDValue();
5270
5271 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5272 // Pattern match BSTRPICK.
5273 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5274 // => BSTRPICK $dst, $src, msb, lsb
5275 // where msb = lsb + len - 1
5276
5277 // The second operand of the shift must be an immediate.
5278 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5279 return SDValue();
5280
5281 lsb = CN->getZExtValue();
5282
5283 // Return if the shifted mask does not start at bit 0 or the sum of its
5284 // length and lsb exceeds the word's size.
5285 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5286 return SDValue();
5287
5288 NewOperand = FirstOperand.getOperand(0);
5289 } else {
5290 // Pattern match BSTRPICK.
5291 // $dst = and $src, (2**len- 1) , if len > 12
5292 // => BSTRPICK $dst, $src, msb, lsb
5293 // where lsb = 0 and msb = len - 1
5294
5295 // If the mask is <= 0xfff, andi can be used instead.
5296 if (CN->getZExtValue() <= 0xfff)
5297 return SDValue();
5298
5299 // Return if the MSB exceeds.
5300 if (SMIdx + SMLen > ValTy.getSizeInBits())
5301 return SDValue();
5302
5303 if (SMIdx > 0) {
5304 // Omit if the constant has more than 2 uses. This a conservative
5305 // decision. Whether it is a win depends on the HW microarchitecture.
5306 // However it should always be better for 1 and 2 uses.
5307 if (CN->use_size() > 2)
5308 return SDValue();
5309 // Return if the constant can be composed by a single LU12I.W.
5310 if ((CN->getZExtValue() & 0xfff) == 0)
5311 return SDValue();
5312 // Return if the constand can be composed by a single ADDI with
5313 // the zero register.
5314 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5315 return SDValue();
5316 }
5317
5318 lsb = SMIdx;
5319 NewOperand = FirstOperand;
5320 }
5321
5322 msb = lsb + SMLen - 1;
5323 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5324 DAG.getConstant(msb, DL, GRLenVT),
5325 DAG.getConstant(lsb, DL, GRLenVT));
5326 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5327 return NR0;
5328 // Try to optimize to
5329 // bstrpick $Rd, $Rs, msb, lsb
5330 // slli $Rd, $Rd, lsb
5331 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5332 DAG.getConstant(lsb, DL, GRLenVT));
5333}
5334
5337 const LoongArchSubtarget &Subtarget) {
5338 // BSTRPICK requires the 32S feature.
5339 if (!Subtarget.has32S())
5340 return SDValue();
5341
5342 if (DCI.isBeforeLegalizeOps())
5343 return SDValue();
5344
5345 // $dst = srl (and $src, Mask), Shamt
5346 // =>
5347 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5348 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5349 //
5350
5351 SDValue FirstOperand = N->getOperand(0);
5352 ConstantSDNode *CN;
5353 EVT ValTy = N->getValueType(0);
5354 SDLoc DL(N);
5355 MVT GRLenVT = Subtarget.getGRLenVT();
5356 unsigned MaskIdx, MaskLen;
5357 uint64_t Shamt;
5358
5359 // The first operand must be an AND and the second operand of the AND must be
5360 // a shifted mask.
5361 if (FirstOperand.getOpcode() != ISD::AND ||
5362 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5363 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5364 return SDValue();
5365
5366 // The second operand (shift amount) must be an immediate.
5367 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5368 return SDValue();
5369
5370 Shamt = CN->getZExtValue();
5371 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5372 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5373 FirstOperand->getOperand(0),
5374 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5375 DAG.getConstant(Shamt, DL, GRLenVT));
5376
5377 return SDValue();
5378}
5379
5380// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5381// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5382static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5383 unsigned Depth) {
5384 // Limit recursion.
5386 return false;
5387 switch (Src.getOpcode()) {
5388 case ISD::SETCC:
5389 case ISD::TRUNCATE:
5390 return Src.getOperand(0).getValueSizeInBits() == Size;
5391 case ISD::FREEZE:
5392 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5393 case ISD::AND:
5394 case ISD::XOR:
5395 case ISD::OR:
5396 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5397 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5398 case ISD::SELECT:
5399 case ISD::VSELECT:
5400 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5401 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5402 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5403 case ISD::BUILD_VECTOR:
5404 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5405 ISD::isBuildVectorAllOnes(Src.getNode());
5406 }
5407 return false;
5408}
5409
5410// Helper to push sign extension of vXi1 SETCC result through bitops.
5412 SDValue Src, const SDLoc &DL) {
5413 switch (Src.getOpcode()) {
5414 case ISD::SETCC:
5415 case ISD::FREEZE:
5416 case ISD::TRUNCATE:
5417 case ISD::BUILD_VECTOR:
5418 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5419 case ISD::AND:
5420 case ISD::XOR:
5421 case ISD::OR:
5422 return DAG.getNode(
5423 Src.getOpcode(), DL, SExtVT,
5424 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5425 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5426 case ISD::SELECT:
5427 case ISD::VSELECT:
5428 return DAG.getSelect(
5429 DL, SExtVT, Src.getOperand(0),
5430 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5431 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5432 }
5433 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5434}
5435
5436static SDValue
5439 const LoongArchSubtarget &Subtarget) {
5440 SDLoc DL(N);
5441 EVT VT = N->getValueType(0);
5442 SDValue Src = N->getOperand(0);
5443 EVT SrcVT = Src.getValueType();
5444
5445 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5446 return SDValue();
5447
5448 bool UseLASX;
5449 unsigned Opc = ISD::DELETED_NODE;
5450 EVT CmpVT = Src.getOperand(0).getValueType();
5451 EVT EltVT = CmpVT.getVectorElementType();
5452
5453 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5454 UseLASX = false;
5455 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5456 CmpVT.getSizeInBits() == 256)
5457 UseLASX = true;
5458 else
5459 return SDValue();
5460
5461 SDValue SrcN1 = Src.getOperand(1);
5462 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5463 default:
5464 break;
5465 case ISD::SETEQ:
5466 // x == 0 => not (vmsknez.b x)
5467 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5468 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5469 break;
5470 case ISD::SETGT:
5471 // x > -1 => vmskgez.b x
5472 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5473 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5474 break;
5475 case ISD::SETGE:
5476 // x >= 0 => vmskgez.b x
5477 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5478 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5479 break;
5480 case ISD::SETLT:
5481 // x < 0 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETLE:
5488 // x <= -1 => vmskltz.{b,h,w,d} x
5489 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5490 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5491 EltVT == MVT::i64))
5492 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5493 break;
5494 case ISD::SETNE:
5495 // x != 0 => vmsknez.b x
5496 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5497 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5498 break;
5499 }
5500
5501 if (Opc == ISD::DELETED_NODE)
5502 return SDValue();
5503
5504 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5506 V = DAG.getZExtOrTrunc(V, DL, T);
5507 return DAG.getBitcast(VT, V);
5508}
5509
5512 const LoongArchSubtarget &Subtarget) {
5513 SDLoc DL(N);
5514 EVT VT = N->getValueType(0);
5515 SDValue Src = N->getOperand(0);
5516 EVT SrcVT = Src.getValueType();
5517 MVT GRLenVT = Subtarget.getGRLenVT();
5518
5519 if (!DCI.isBeforeLegalizeOps())
5520 return SDValue();
5521
5522 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5523 return SDValue();
5524
5525 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5526 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5527 if (Res)
5528 return Res;
5529
5530 // Generate vXi1 using [X]VMSKLTZ
5531 MVT SExtVT;
5532 unsigned Opc;
5533 bool UseLASX = false;
5534 bool PropagateSExt = false;
5535
5536 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5537 EVT CmpVT = Src.getOperand(0).getValueType();
5538 if (CmpVT.getSizeInBits() > 256)
5539 return SDValue();
5540 }
5541
5542 switch (SrcVT.getSimpleVT().SimpleTy) {
5543 default:
5544 return SDValue();
5545 case MVT::v2i1:
5546 SExtVT = MVT::v2i64;
5547 break;
5548 case MVT::v4i1:
5549 SExtVT = MVT::v4i32;
5550 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5551 SExtVT = MVT::v4i64;
5552 UseLASX = true;
5553 PropagateSExt = true;
5554 }
5555 break;
5556 case MVT::v8i1:
5557 SExtVT = MVT::v8i16;
5558 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5559 SExtVT = MVT::v8i32;
5560 UseLASX = true;
5561 PropagateSExt = true;
5562 }
5563 break;
5564 case MVT::v16i1:
5565 SExtVT = MVT::v16i8;
5566 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5567 SExtVT = MVT::v16i16;
5568 UseLASX = true;
5569 PropagateSExt = true;
5570 }
5571 break;
5572 case MVT::v32i1:
5573 SExtVT = MVT::v32i8;
5574 UseLASX = true;
5575 break;
5576 };
5577 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5578 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5579
5580 SDValue V;
5581 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5582 if (Src.getSimpleValueType() == MVT::v32i8) {
5583 SDValue Lo, Hi;
5584 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5585 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5586 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5587 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5588 DAG.getConstant(16, DL, MVT::i8));
5589 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5590 } else if (UseLASX) {
5591 return SDValue();
5592 }
5593 }
5594
5595 if (!V) {
5596 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5597 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5598 }
5599
5601 V = DAG.getZExtOrTrunc(V, DL, T);
5602 return DAG.getBitcast(VT, V);
5603}
5604
5607 const LoongArchSubtarget &Subtarget) {
5608 MVT GRLenVT = Subtarget.getGRLenVT();
5609 EVT ValTy = N->getValueType(0);
5610 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5611 ConstantSDNode *CN0, *CN1;
5612 SDLoc DL(N);
5613 unsigned ValBits = ValTy.getSizeInBits();
5614 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5615 unsigned Shamt;
5616 bool SwapAndRetried = false;
5617
5618 // BSTRPICK requires the 32S feature.
5619 if (!Subtarget.has32S())
5620 return SDValue();
5621
5622 if (DCI.isBeforeLegalizeOps())
5623 return SDValue();
5624
5625 if (ValBits != 32 && ValBits != 64)
5626 return SDValue();
5627
5628Retry:
5629 // 1st pattern to match BSTRINS:
5630 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5631 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5632 // =>
5633 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5634 if (N0.getOpcode() == ISD::AND &&
5635 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5636 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5637 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5638 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5639 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5640 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5641 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5642 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5643 (MaskIdx0 + MaskLen0 <= ValBits)) {
5644 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5645 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5646 N1.getOperand(0).getOperand(0),
5647 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5648 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5649 }
5650
5651 // 2nd pattern to match BSTRINS:
5652 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5653 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5654 // =>
5655 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5656 if (N0.getOpcode() == ISD::AND &&
5657 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5658 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5659 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5660 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5661 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5662 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5663 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5664 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5665 (MaskIdx0 + MaskLen0 <= ValBits)) {
5666 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5667 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5668 N1.getOperand(0).getOperand(0),
5669 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5670 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5671 }
5672
5673 // 3rd pattern to match BSTRINS:
5674 // R = or (and X, mask0), (and Y, mask1)
5675 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5676 // =>
5677 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5678 // where msb = lsb + size - 1
5679 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5680 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5681 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5682 (MaskIdx0 + MaskLen0 <= 64) &&
5683 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5684 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5685 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5686 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5687 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5688 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5689 DAG.getConstant(ValBits == 32
5690 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5691 : (MaskIdx0 + MaskLen0 - 1),
5692 DL, GRLenVT),
5693 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5694 }
5695
5696 // 4th pattern to match BSTRINS:
5697 // R = or (and X, mask), (shl Y, shamt)
5698 // where mask = (2**shamt - 1)
5699 // =>
5700 // R = BSTRINS X, Y, ValBits - 1, shamt
5701 // where ValBits = 32 or 64
5702 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5703 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5704 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5705 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5706 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5707 (MaskIdx0 + MaskLen0 <= ValBits)) {
5708 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5709 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5710 N1.getOperand(0),
5711 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5712 DAG.getConstant(Shamt, DL, GRLenVT));
5713 }
5714
5715 // 5th pattern to match BSTRINS:
5716 // R = or (and X, mask), const
5717 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5718 // =>
5719 // R = BSTRINS X, (const >> lsb), msb, lsb
5720 // where msb = lsb + size - 1
5721 if (N0.getOpcode() == ISD::AND &&
5722 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5723 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5724 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5725 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5726 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5727 return DAG.getNode(
5728 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5729 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5730 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5731 : (MaskIdx0 + MaskLen0 - 1),
5732 DL, GRLenVT),
5733 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5734 }
5735
5736 // 6th pattern.
5737 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5738 // by the incoming bits are known to be zero.
5739 // =>
5740 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5741 //
5742 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5743 // pattern is more common than the 1st. So we put the 1st before the 6th in
5744 // order to match as many nodes as possible.
5745 ConstantSDNode *CNMask, *CNShamt;
5746 unsigned MaskIdx, MaskLen;
5747 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5748 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5749 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5750 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5751 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5752 Shamt = CNShamt->getZExtValue();
5753 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5754 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5755 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5756 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5757 N1.getOperand(0).getOperand(0),
5758 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5759 DAG.getConstant(Shamt, DL, GRLenVT));
5760 }
5761 }
5762
5763 // 7th pattern.
5764 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5765 // overwritten by the incoming bits are known to be zero.
5766 // =>
5767 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5768 //
5769 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5770 // before the 7th in order to match as many nodes as possible.
5771 if (N1.getOpcode() == ISD::AND &&
5772 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5773 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5774 N1.getOperand(0).getOpcode() == ISD::SHL &&
5775 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5776 CNShamt->getZExtValue() == MaskIdx) {
5777 APInt ShMask(ValBits, CNMask->getZExtValue());
5778 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5779 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5780 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5781 N1.getOperand(0).getOperand(0),
5782 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5783 DAG.getConstant(MaskIdx, DL, GRLenVT));
5784 }
5785 }
5786
5787 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5788 if (!SwapAndRetried) {
5789 std::swap(N0, N1);
5790 SwapAndRetried = true;
5791 goto Retry;
5792 }
5793
5794 SwapAndRetried = false;
5795Retry2:
5796 // 8th pattern.
5797 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5798 // the incoming bits are known to be zero.
5799 // =>
5800 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5801 //
5802 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5803 // we put it here in order to match as many nodes as possible or generate less
5804 // instructions.
5805 if (N1.getOpcode() == ISD::AND &&
5806 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5807 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5808 APInt ShMask(ValBits, CNMask->getZExtValue());
5809 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5810 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5811 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5812 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5813 N1->getOperand(0),
5814 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5815 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5816 DAG.getConstant(MaskIdx, DL, GRLenVT));
5817 }
5818 }
5819 // Swap N0/N1 and retry.
5820 if (!SwapAndRetried) {
5821 std::swap(N0, N1);
5822 SwapAndRetried = true;
5823 goto Retry2;
5824 }
5825
5826 return SDValue();
5827}
5828
5829static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5830 ExtType = ISD::NON_EXTLOAD;
5831
5832 switch (V.getNode()->getOpcode()) {
5833 case ISD::LOAD: {
5834 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5835 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5836 (LoadNode->getMemoryVT() == MVT::i16)) {
5837 ExtType = LoadNode->getExtensionType();
5838 return true;
5839 }
5840 return false;
5841 }
5842 case ISD::AssertSext: {
5843 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5844 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5845 ExtType = ISD::SEXTLOAD;
5846 return true;
5847 }
5848 return false;
5849 }
5850 case ISD::AssertZext: {
5851 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5852 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5853 ExtType = ISD::ZEXTLOAD;
5854 return true;
5855 }
5856 return false;
5857 }
5858 default:
5859 return false;
5860 }
5861
5862 return false;
5863}
5864
5865// Eliminate redundant truncation and zero-extension nodes.
5866// * Case 1:
5867// +------------+ +------------+ +------------+
5868// | Input1 | | Input2 | | CC |
5869// +------------+ +------------+ +------------+
5870// | | |
5871// V V +----+
5872// +------------+ +------------+ |
5873// | TRUNCATE | | TRUNCATE | |
5874// +------------+ +------------+ |
5875// | | |
5876// V V |
5877// +------------+ +------------+ |
5878// | ZERO_EXT | | ZERO_EXT | |
5879// +------------+ +------------+ |
5880// | | |
5881// | +-------------+ |
5882// V V | |
5883// +----------------+ | |
5884// | AND | | |
5885// +----------------+ | |
5886// | | |
5887// +---------------+ | |
5888// | | |
5889// V V V
5890// +-------------+
5891// | CMP |
5892// +-------------+
5893// * Case 2:
5894// +------------+ +------------+ +-------------+ +------------+ +------------+
5895// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5896// +------------+ +------------+ +-------------+ +------------+ +------------+
5897// | | | | |
5898// V | | | |
5899// +------------+ | | | |
5900// | XOR |<---------------------+ | |
5901// +------------+ | | |
5902// | | | |
5903// V V +---------------+ |
5904// +------------+ +------------+ | |
5905// | TRUNCATE | | TRUNCATE | | +-------------------------+
5906// +------------+ +------------+ | |
5907// | | | |
5908// V V | |
5909// +------------+ +------------+ | |
5910// | ZERO_EXT | | ZERO_EXT | | |
5911// +------------+ +------------+ | |
5912// | | | |
5913// V V | |
5914// +----------------+ | |
5915// | AND | | |
5916// +----------------+ | |
5917// | | |
5918// +---------------+ | |
5919// | | |
5920// V V V
5921// +-------------+
5922// | CMP |
5923// +-------------+
5926 const LoongArchSubtarget &Subtarget) {
5927 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5928
5929 SDNode *AndNode = N->getOperand(0).getNode();
5930 if (AndNode->getOpcode() != ISD::AND)
5931 return SDValue();
5932
5933 SDValue AndInputValue2 = AndNode->getOperand(1);
5934 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5935 return SDValue();
5936
5937 SDValue CmpInputValue = N->getOperand(1);
5938 SDValue AndInputValue1 = AndNode->getOperand(0);
5939 if (AndInputValue1.getOpcode() == ISD::XOR) {
5940 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5941 return SDValue();
5942 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5943 if (!CN || CN->getSExtValue() != -1)
5944 return SDValue();
5945 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5946 if (!CN || CN->getSExtValue() != 0)
5947 return SDValue();
5948 AndInputValue1 = AndInputValue1.getOperand(0);
5949 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5950 return SDValue();
5951 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5952 if (AndInputValue2 != CmpInputValue)
5953 return SDValue();
5954 } else {
5955 return SDValue();
5956 }
5957
5958 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5959 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5960 return SDValue();
5961
5962 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5963 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5964 return SDValue();
5965
5966 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5967 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5968 ISD::LoadExtType ExtType1;
5969 ISD::LoadExtType ExtType2;
5970
5971 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5972 !checkValueWidth(TruncInputValue2, ExtType2))
5973 return SDValue();
5974
5975 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5976 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5977 return SDValue();
5978
5979 if ((ExtType2 != ISD::ZEXTLOAD) &&
5980 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5981 return SDValue();
5982
5983 // These truncation and zero-extension nodes are not necessary, remove them.
5984 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5985 TruncInputValue1, TruncInputValue2);
5986 SDValue NewSetCC =
5987 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5988 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5989 return SDValue(N, 0);
5990}
5991
5992// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5995 const LoongArchSubtarget &Subtarget) {
5996 if (DCI.isBeforeLegalizeOps())
5997 return SDValue();
5998
5999 SDValue Src = N->getOperand(0);
6000 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6001 return SDValue();
6002
6003 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6004 Src.getOperand(0));
6005}
6006
6007// Perform common combines for BR_CC and SELECT_CC conditions.
6008static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6009 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6010 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6011
6012 // As far as arithmetic right shift always saves the sign,
6013 // shift can be omitted.
6014 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6015 // setge (sra X, N), 0 -> setge X, 0
6016 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6017 LHS.getOpcode() == ISD::SRA) {
6018 LHS = LHS.getOperand(0);
6019 return true;
6020 }
6021
6022 if (!ISD::isIntEqualitySetCC(CCVal))
6023 return false;
6024
6025 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6026 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6027 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6028 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6029 // If we're looking for eq 0 instead of ne 0, we need to invert the
6030 // condition.
6031 bool Invert = CCVal == ISD::SETEQ;
6032 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6033 if (Invert)
6034 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6035
6036 RHS = LHS.getOperand(1);
6037 LHS = LHS.getOperand(0);
6038 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6039
6040 CC = DAG.getCondCode(CCVal);
6041 return true;
6042 }
6043
6044 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6045 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6046 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6047 SDValue LHS0 = LHS.getOperand(0);
6048 if (LHS0.getOpcode() == ISD::AND &&
6049 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6050 uint64_t Mask = LHS0.getConstantOperandVal(1);
6051 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6052 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6053 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6054 CC = DAG.getCondCode(CCVal);
6055
6056 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6057 LHS = LHS0.getOperand(0);
6058 if (ShAmt != 0)
6059 LHS =
6060 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6061 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6062 return true;
6063 }
6064 }
6065 }
6066
6067 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6068 // This can occur when legalizing some floating point comparisons.
6069 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6070 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6071 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6072 CC = DAG.getCondCode(CCVal);
6073 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6074 return true;
6075 }
6076
6077 return false;
6078}
6079
6082 const LoongArchSubtarget &Subtarget) {
6083 SDValue LHS = N->getOperand(1);
6084 SDValue RHS = N->getOperand(2);
6085 SDValue CC = N->getOperand(3);
6086 SDLoc DL(N);
6087
6088 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6089 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6090 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6091
6092 return SDValue();
6093}
6094
6097 const LoongArchSubtarget &Subtarget) {
6098 // Transform
6099 SDValue LHS = N->getOperand(0);
6100 SDValue RHS = N->getOperand(1);
6101 SDValue CC = N->getOperand(2);
6102 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6103 SDValue TrueV = N->getOperand(3);
6104 SDValue FalseV = N->getOperand(4);
6105 SDLoc DL(N);
6106 EVT VT = N->getValueType(0);
6107
6108 // If the True and False values are the same, we don't need a select_cc.
6109 if (TrueV == FalseV)
6110 return TrueV;
6111
6112 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6113 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6114 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6116 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6117 if (CCVal == ISD::CondCode::SETGE)
6118 std::swap(TrueV, FalseV);
6119
6120 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6121 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6122 // Only handle simm12, if it is not in this range, it can be considered as
6123 // register.
6124 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6125 isInt<12>(TrueSImm - FalseSImm)) {
6126 SDValue SRA =
6127 DAG.getNode(ISD::SRA, DL, VT, LHS,
6128 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6129 SDValue AND =
6130 DAG.getNode(ISD::AND, DL, VT, SRA,
6131 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6132 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6133 }
6134
6135 if (CCVal == ISD::CondCode::SETGE)
6136 std::swap(TrueV, FalseV);
6137 }
6138
6139 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6140 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6141 {LHS, RHS, CC, TrueV, FalseV});
6142
6143 return SDValue();
6144}
6145
6146template <unsigned N>
6148 SelectionDAG &DAG,
6149 const LoongArchSubtarget &Subtarget,
6150 bool IsSigned = false) {
6151 SDLoc DL(Node);
6152 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6153 // Check the ImmArg.
6154 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6155 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6156 DAG.getContext()->emitError(Node->getOperationName(0) +
6157 ": argument out of range.");
6158 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6159 }
6160 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6161}
6162
6163template <unsigned N>
6164static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6165 SelectionDAG &DAG, bool IsSigned = false) {
6166 SDLoc DL(Node);
6167 EVT ResTy = Node->getValueType(0);
6168 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6169
6170 // Check the ImmArg.
6171 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6172 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6173 DAG.getContext()->emitError(Node->getOperationName(0) +
6174 ": argument out of range.");
6175 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6176 }
6177 return DAG.getConstant(
6179 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6180 DL, ResTy);
6181}
6182
6184 SDLoc DL(Node);
6185 EVT ResTy = Node->getValueType(0);
6186 SDValue Vec = Node->getOperand(2);
6187 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6188 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6189}
6190
6192 SDLoc DL(Node);
6193 EVT ResTy = Node->getValueType(0);
6194 SDValue One = DAG.getConstant(1, DL, ResTy);
6195 SDValue Bit =
6196 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6197
6198 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6199 DAG.getNOT(DL, Bit, ResTy));
6200}
6201
6202template <unsigned N>
6204 SDLoc DL(Node);
6205 EVT ResTy = Node->getValueType(0);
6206 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6207 // Check the unsigned ImmArg.
6208 if (!isUInt<N>(CImm->getZExtValue())) {
6209 DAG.getContext()->emitError(Node->getOperationName(0) +
6210 ": argument out of range.");
6211 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6212 }
6213
6214 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6215 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6216
6217 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6218}
6219
6220template <unsigned N>
6222 SDLoc DL(Node);
6223 EVT ResTy = Node->getValueType(0);
6224 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6225 // Check the unsigned ImmArg.
6226 if (!isUInt<N>(CImm->getZExtValue())) {
6227 DAG.getContext()->emitError(Node->getOperationName(0) +
6228 ": argument out of range.");
6229 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6230 }
6231
6232 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6233 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6234 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6235}
6236
6237template <unsigned N>
6239 SDLoc DL(Node);
6240 EVT ResTy = Node->getValueType(0);
6241 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6242 // Check the unsigned ImmArg.
6243 if (!isUInt<N>(CImm->getZExtValue())) {
6244 DAG.getContext()->emitError(Node->getOperationName(0) +
6245 ": argument out of range.");
6246 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6247 }
6248
6249 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6250 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6251 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6252}
6253
6254template <unsigned W>
6256 unsigned ResOp) {
6257 unsigned Imm = N->getConstantOperandVal(2);
6258 if (!isUInt<W>(Imm)) {
6259 const StringRef ErrorMsg = "argument out of range";
6260 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6261 return DAG.getUNDEF(N->getValueType(0));
6262 }
6263 SDLoc DL(N);
6264 SDValue Vec = N->getOperand(1);
6265 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6267 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6268}
6269
6270static SDValue
6273 const LoongArchSubtarget &Subtarget) {
6274 SDLoc DL(N);
6275 switch (N->getConstantOperandVal(0)) {
6276 default:
6277 break;
6278 case Intrinsic::loongarch_lsx_vadd_b:
6279 case Intrinsic::loongarch_lsx_vadd_h:
6280 case Intrinsic::loongarch_lsx_vadd_w:
6281 case Intrinsic::loongarch_lsx_vadd_d:
6282 case Intrinsic::loongarch_lasx_xvadd_b:
6283 case Intrinsic::loongarch_lasx_xvadd_h:
6284 case Intrinsic::loongarch_lasx_xvadd_w:
6285 case Intrinsic::loongarch_lasx_xvadd_d:
6286 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6287 N->getOperand(2));
6288 case Intrinsic::loongarch_lsx_vaddi_bu:
6289 case Intrinsic::loongarch_lsx_vaddi_hu:
6290 case Intrinsic::loongarch_lsx_vaddi_wu:
6291 case Intrinsic::loongarch_lsx_vaddi_du:
6292 case Intrinsic::loongarch_lasx_xvaddi_bu:
6293 case Intrinsic::loongarch_lasx_xvaddi_hu:
6294 case Intrinsic::loongarch_lasx_xvaddi_wu:
6295 case Intrinsic::loongarch_lasx_xvaddi_du:
6296 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6297 lowerVectorSplatImm<5>(N, 2, DAG));
6298 case Intrinsic::loongarch_lsx_vsub_b:
6299 case Intrinsic::loongarch_lsx_vsub_h:
6300 case Intrinsic::loongarch_lsx_vsub_w:
6301 case Intrinsic::loongarch_lsx_vsub_d:
6302 case Intrinsic::loongarch_lasx_xvsub_b:
6303 case Intrinsic::loongarch_lasx_xvsub_h:
6304 case Intrinsic::loongarch_lasx_xvsub_w:
6305 case Intrinsic::loongarch_lasx_xvsub_d:
6306 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6307 N->getOperand(2));
6308 case Intrinsic::loongarch_lsx_vsubi_bu:
6309 case Intrinsic::loongarch_lsx_vsubi_hu:
6310 case Intrinsic::loongarch_lsx_vsubi_wu:
6311 case Intrinsic::loongarch_lsx_vsubi_du:
6312 case Intrinsic::loongarch_lasx_xvsubi_bu:
6313 case Intrinsic::loongarch_lasx_xvsubi_hu:
6314 case Intrinsic::loongarch_lasx_xvsubi_wu:
6315 case Intrinsic::loongarch_lasx_xvsubi_du:
6316 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6317 lowerVectorSplatImm<5>(N, 2, DAG));
6318 case Intrinsic::loongarch_lsx_vneg_b:
6319 case Intrinsic::loongarch_lsx_vneg_h:
6320 case Intrinsic::loongarch_lsx_vneg_w:
6321 case Intrinsic::loongarch_lsx_vneg_d:
6322 case Intrinsic::loongarch_lasx_xvneg_b:
6323 case Intrinsic::loongarch_lasx_xvneg_h:
6324 case Intrinsic::loongarch_lasx_xvneg_w:
6325 case Intrinsic::loongarch_lasx_xvneg_d:
6326 return DAG.getNode(
6327 ISD::SUB, DL, N->getValueType(0),
6328 DAG.getConstant(
6329 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6330 /*isSigned=*/true),
6331 SDLoc(N), N->getValueType(0)),
6332 N->getOperand(1));
6333 case Intrinsic::loongarch_lsx_vmax_b:
6334 case Intrinsic::loongarch_lsx_vmax_h:
6335 case Intrinsic::loongarch_lsx_vmax_w:
6336 case Intrinsic::loongarch_lsx_vmax_d:
6337 case Intrinsic::loongarch_lasx_xvmax_b:
6338 case Intrinsic::loongarch_lasx_xvmax_h:
6339 case Intrinsic::loongarch_lasx_xvmax_w:
6340 case Intrinsic::loongarch_lasx_xvmax_d:
6341 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6342 N->getOperand(2));
6343 case Intrinsic::loongarch_lsx_vmax_bu:
6344 case Intrinsic::loongarch_lsx_vmax_hu:
6345 case Intrinsic::loongarch_lsx_vmax_wu:
6346 case Intrinsic::loongarch_lsx_vmax_du:
6347 case Intrinsic::loongarch_lasx_xvmax_bu:
6348 case Intrinsic::loongarch_lasx_xvmax_hu:
6349 case Intrinsic::loongarch_lasx_xvmax_wu:
6350 case Intrinsic::loongarch_lasx_xvmax_du:
6351 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6352 N->getOperand(2));
6353 case Intrinsic::loongarch_lsx_vmaxi_b:
6354 case Intrinsic::loongarch_lsx_vmaxi_h:
6355 case Intrinsic::loongarch_lsx_vmaxi_w:
6356 case Intrinsic::loongarch_lsx_vmaxi_d:
6357 case Intrinsic::loongarch_lasx_xvmaxi_b:
6358 case Intrinsic::loongarch_lasx_xvmaxi_h:
6359 case Intrinsic::loongarch_lasx_xvmaxi_w:
6360 case Intrinsic::loongarch_lasx_xvmaxi_d:
6361 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6362 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6363 case Intrinsic::loongarch_lsx_vmaxi_bu:
6364 case Intrinsic::loongarch_lsx_vmaxi_hu:
6365 case Intrinsic::loongarch_lsx_vmaxi_wu:
6366 case Intrinsic::loongarch_lsx_vmaxi_du:
6367 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6368 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6369 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6370 case Intrinsic::loongarch_lasx_xvmaxi_du:
6371 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6372 lowerVectorSplatImm<5>(N, 2, DAG));
6373 case Intrinsic::loongarch_lsx_vmin_b:
6374 case Intrinsic::loongarch_lsx_vmin_h:
6375 case Intrinsic::loongarch_lsx_vmin_w:
6376 case Intrinsic::loongarch_lsx_vmin_d:
6377 case Intrinsic::loongarch_lasx_xvmin_b:
6378 case Intrinsic::loongarch_lasx_xvmin_h:
6379 case Intrinsic::loongarch_lasx_xvmin_w:
6380 case Intrinsic::loongarch_lasx_xvmin_d:
6381 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6382 N->getOperand(2));
6383 case Intrinsic::loongarch_lsx_vmin_bu:
6384 case Intrinsic::loongarch_lsx_vmin_hu:
6385 case Intrinsic::loongarch_lsx_vmin_wu:
6386 case Intrinsic::loongarch_lsx_vmin_du:
6387 case Intrinsic::loongarch_lasx_xvmin_bu:
6388 case Intrinsic::loongarch_lasx_xvmin_hu:
6389 case Intrinsic::loongarch_lasx_xvmin_wu:
6390 case Intrinsic::loongarch_lasx_xvmin_du:
6391 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6392 N->getOperand(2));
6393 case Intrinsic::loongarch_lsx_vmini_b:
6394 case Intrinsic::loongarch_lsx_vmini_h:
6395 case Intrinsic::loongarch_lsx_vmini_w:
6396 case Intrinsic::loongarch_lsx_vmini_d:
6397 case Intrinsic::loongarch_lasx_xvmini_b:
6398 case Intrinsic::loongarch_lasx_xvmini_h:
6399 case Intrinsic::loongarch_lasx_xvmini_w:
6400 case Intrinsic::loongarch_lasx_xvmini_d:
6401 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6402 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6403 case Intrinsic::loongarch_lsx_vmini_bu:
6404 case Intrinsic::loongarch_lsx_vmini_hu:
6405 case Intrinsic::loongarch_lsx_vmini_wu:
6406 case Intrinsic::loongarch_lsx_vmini_du:
6407 case Intrinsic::loongarch_lasx_xvmini_bu:
6408 case Intrinsic::loongarch_lasx_xvmini_hu:
6409 case Intrinsic::loongarch_lasx_xvmini_wu:
6410 case Intrinsic::loongarch_lasx_xvmini_du:
6411 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6412 lowerVectorSplatImm<5>(N, 2, DAG));
6413 case Intrinsic::loongarch_lsx_vmul_b:
6414 case Intrinsic::loongarch_lsx_vmul_h:
6415 case Intrinsic::loongarch_lsx_vmul_w:
6416 case Intrinsic::loongarch_lsx_vmul_d:
6417 case Intrinsic::loongarch_lasx_xvmul_b:
6418 case Intrinsic::loongarch_lasx_xvmul_h:
6419 case Intrinsic::loongarch_lasx_xvmul_w:
6420 case Intrinsic::loongarch_lasx_xvmul_d:
6421 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6422 N->getOperand(2));
6423 case Intrinsic::loongarch_lsx_vmadd_b:
6424 case Intrinsic::loongarch_lsx_vmadd_h:
6425 case Intrinsic::loongarch_lsx_vmadd_w:
6426 case Intrinsic::loongarch_lsx_vmadd_d:
6427 case Intrinsic::loongarch_lasx_xvmadd_b:
6428 case Intrinsic::loongarch_lasx_xvmadd_h:
6429 case Intrinsic::loongarch_lasx_xvmadd_w:
6430 case Intrinsic::loongarch_lasx_xvmadd_d: {
6431 EVT ResTy = N->getValueType(0);
6432 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6433 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6434 N->getOperand(3)));
6435 }
6436 case Intrinsic::loongarch_lsx_vmsub_b:
6437 case Intrinsic::loongarch_lsx_vmsub_h:
6438 case Intrinsic::loongarch_lsx_vmsub_w:
6439 case Intrinsic::loongarch_lsx_vmsub_d:
6440 case Intrinsic::loongarch_lasx_xvmsub_b:
6441 case Intrinsic::loongarch_lasx_xvmsub_h:
6442 case Intrinsic::loongarch_lasx_xvmsub_w:
6443 case Intrinsic::loongarch_lasx_xvmsub_d: {
6444 EVT ResTy = N->getValueType(0);
6445 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6446 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6447 N->getOperand(3)));
6448 }
6449 case Intrinsic::loongarch_lsx_vdiv_b:
6450 case Intrinsic::loongarch_lsx_vdiv_h:
6451 case Intrinsic::loongarch_lsx_vdiv_w:
6452 case Intrinsic::loongarch_lsx_vdiv_d:
6453 case Intrinsic::loongarch_lasx_xvdiv_b:
6454 case Intrinsic::loongarch_lasx_xvdiv_h:
6455 case Intrinsic::loongarch_lasx_xvdiv_w:
6456 case Intrinsic::loongarch_lasx_xvdiv_d:
6457 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6458 N->getOperand(2));
6459 case Intrinsic::loongarch_lsx_vdiv_bu:
6460 case Intrinsic::loongarch_lsx_vdiv_hu:
6461 case Intrinsic::loongarch_lsx_vdiv_wu:
6462 case Intrinsic::loongarch_lsx_vdiv_du:
6463 case Intrinsic::loongarch_lasx_xvdiv_bu:
6464 case Intrinsic::loongarch_lasx_xvdiv_hu:
6465 case Intrinsic::loongarch_lasx_xvdiv_wu:
6466 case Intrinsic::loongarch_lasx_xvdiv_du:
6467 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6468 N->getOperand(2));
6469 case Intrinsic::loongarch_lsx_vmod_b:
6470 case Intrinsic::loongarch_lsx_vmod_h:
6471 case Intrinsic::loongarch_lsx_vmod_w:
6472 case Intrinsic::loongarch_lsx_vmod_d:
6473 case Intrinsic::loongarch_lasx_xvmod_b:
6474 case Intrinsic::loongarch_lasx_xvmod_h:
6475 case Intrinsic::loongarch_lasx_xvmod_w:
6476 case Intrinsic::loongarch_lasx_xvmod_d:
6477 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6478 N->getOperand(2));
6479 case Intrinsic::loongarch_lsx_vmod_bu:
6480 case Intrinsic::loongarch_lsx_vmod_hu:
6481 case Intrinsic::loongarch_lsx_vmod_wu:
6482 case Intrinsic::loongarch_lsx_vmod_du:
6483 case Intrinsic::loongarch_lasx_xvmod_bu:
6484 case Intrinsic::loongarch_lasx_xvmod_hu:
6485 case Intrinsic::loongarch_lasx_xvmod_wu:
6486 case Intrinsic::loongarch_lasx_xvmod_du:
6487 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6488 N->getOperand(2));
6489 case Intrinsic::loongarch_lsx_vand_v:
6490 case Intrinsic::loongarch_lasx_xvand_v:
6491 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6492 N->getOperand(2));
6493 case Intrinsic::loongarch_lsx_vor_v:
6494 case Intrinsic::loongarch_lasx_xvor_v:
6495 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6496 N->getOperand(2));
6497 case Intrinsic::loongarch_lsx_vxor_v:
6498 case Intrinsic::loongarch_lasx_xvxor_v:
6499 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6500 N->getOperand(2));
6501 case Intrinsic::loongarch_lsx_vnor_v:
6502 case Intrinsic::loongarch_lasx_xvnor_v: {
6503 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6504 N->getOperand(2));
6505 return DAG.getNOT(DL, Res, Res->getValueType(0));
6506 }
6507 case Intrinsic::loongarch_lsx_vandi_b:
6508 case Intrinsic::loongarch_lasx_xvandi_b:
6509 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6510 lowerVectorSplatImm<8>(N, 2, DAG));
6511 case Intrinsic::loongarch_lsx_vori_b:
6512 case Intrinsic::loongarch_lasx_xvori_b:
6513 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6514 lowerVectorSplatImm<8>(N, 2, DAG));
6515 case Intrinsic::loongarch_lsx_vxori_b:
6516 case Intrinsic::loongarch_lasx_xvxori_b:
6517 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6518 lowerVectorSplatImm<8>(N, 2, DAG));
6519 case Intrinsic::loongarch_lsx_vsll_b:
6520 case Intrinsic::loongarch_lsx_vsll_h:
6521 case Intrinsic::loongarch_lsx_vsll_w:
6522 case Intrinsic::loongarch_lsx_vsll_d:
6523 case Intrinsic::loongarch_lasx_xvsll_b:
6524 case Intrinsic::loongarch_lasx_xvsll_h:
6525 case Intrinsic::loongarch_lasx_xvsll_w:
6526 case Intrinsic::loongarch_lasx_xvsll_d:
6527 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6528 truncateVecElts(N, DAG));
6529 case Intrinsic::loongarch_lsx_vslli_b:
6530 case Intrinsic::loongarch_lasx_xvslli_b:
6531 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6532 lowerVectorSplatImm<3>(N, 2, DAG));
6533 case Intrinsic::loongarch_lsx_vslli_h:
6534 case Intrinsic::loongarch_lasx_xvslli_h:
6535 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6536 lowerVectorSplatImm<4>(N, 2, DAG));
6537 case Intrinsic::loongarch_lsx_vslli_w:
6538 case Intrinsic::loongarch_lasx_xvslli_w:
6539 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6540 lowerVectorSplatImm<5>(N, 2, DAG));
6541 case Intrinsic::loongarch_lsx_vslli_d:
6542 case Intrinsic::loongarch_lasx_xvslli_d:
6543 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6544 lowerVectorSplatImm<6>(N, 2, DAG));
6545 case Intrinsic::loongarch_lsx_vsrl_b:
6546 case Intrinsic::loongarch_lsx_vsrl_h:
6547 case Intrinsic::loongarch_lsx_vsrl_w:
6548 case Intrinsic::loongarch_lsx_vsrl_d:
6549 case Intrinsic::loongarch_lasx_xvsrl_b:
6550 case Intrinsic::loongarch_lasx_xvsrl_h:
6551 case Intrinsic::loongarch_lasx_xvsrl_w:
6552 case Intrinsic::loongarch_lasx_xvsrl_d:
6553 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6554 truncateVecElts(N, DAG));
6555 case Intrinsic::loongarch_lsx_vsrli_b:
6556 case Intrinsic::loongarch_lasx_xvsrli_b:
6557 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6558 lowerVectorSplatImm<3>(N, 2, DAG));
6559 case Intrinsic::loongarch_lsx_vsrli_h:
6560 case Intrinsic::loongarch_lasx_xvsrli_h:
6561 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6562 lowerVectorSplatImm<4>(N, 2, DAG));
6563 case Intrinsic::loongarch_lsx_vsrli_w:
6564 case Intrinsic::loongarch_lasx_xvsrli_w:
6565 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6566 lowerVectorSplatImm<5>(N, 2, DAG));
6567 case Intrinsic::loongarch_lsx_vsrli_d:
6568 case Intrinsic::loongarch_lasx_xvsrli_d:
6569 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6570 lowerVectorSplatImm<6>(N, 2, DAG));
6571 case Intrinsic::loongarch_lsx_vsra_b:
6572 case Intrinsic::loongarch_lsx_vsra_h:
6573 case Intrinsic::loongarch_lsx_vsra_w:
6574 case Intrinsic::loongarch_lsx_vsra_d:
6575 case Intrinsic::loongarch_lasx_xvsra_b:
6576 case Intrinsic::loongarch_lasx_xvsra_h:
6577 case Intrinsic::loongarch_lasx_xvsra_w:
6578 case Intrinsic::loongarch_lasx_xvsra_d:
6579 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6580 truncateVecElts(N, DAG));
6581 case Intrinsic::loongarch_lsx_vsrai_b:
6582 case Intrinsic::loongarch_lasx_xvsrai_b:
6583 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6584 lowerVectorSplatImm<3>(N, 2, DAG));
6585 case Intrinsic::loongarch_lsx_vsrai_h:
6586 case Intrinsic::loongarch_lasx_xvsrai_h:
6587 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6588 lowerVectorSplatImm<4>(N, 2, DAG));
6589 case Intrinsic::loongarch_lsx_vsrai_w:
6590 case Intrinsic::loongarch_lasx_xvsrai_w:
6591 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6592 lowerVectorSplatImm<5>(N, 2, DAG));
6593 case Intrinsic::loongarch_lsx_vsrai_d:
6594 case Intrinsic::loongarch_lasx_xvsrai_d:
6595 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6596 lowerVectorSplatImm<6>(N, 2, DAG));
6597 case Intrinsic::loongarch_lsx_vclz_b:
6598 case Intrinsic::loongarch_lsx_vclz_h:
6599 case Intrinsic::loongarch_lsx_vclz_w:
6600 case Intrinsic::loongarch_lsx_vclz_d:
6601 case Intrinsic::loongarch_lasx_xvclz_b:
6602 case Intrinsic::loongarch_lasx_xvclz_h:
6603 case Intrinsic::loongarch_lasx_xvclz_w:
6604 case Intrinsic::loongarch_lasx_xvclz_d:
6605 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6606 case Intrinsic::loongarch_lsx_vpcnt_b:
6607 case Intrinsic::loongarch_lsx_vpcnt_h:
6608 case Intrinsic::loongarch_lsx_vpcnt_w:
6609 case Intrinsic::loongarch_lsx_vpcnt_d:
6610 case Intrinsic::loongarch_lasx_xvpcnt_b:
6611 case Intrinsic::loongarch_lasx_xvpcnt_h:
6612 case Intrinsic::loongarch_lasx_xvpcnt_w:
6613 case Intrinsic::loongarch_lasx_xvpcnt_d:
6614 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6615 case Intrinsic::loongarch_lsx_vbitclr_b:
6616 case Intrinsic::loongarch_lsx_vbitclr_h:
6617 case Intrinsic::loongarch_lsx_vbitclr_w:
6618 case Intrinsic::loongarch_lsx_vbitclr_d:
6619 case Intrinsic::loongarch_lasx_xvbitclr_b:
6620 case Intrinsic::loongarch_lasx_xvbitclr_h:
6621 case Intrinsic::loongarch_lasx_xvbitclr_w:
6622 case Intrinsic::loongarch_lasx_xvbitclr_d:
6623 return lowerVectorBitClear(N, DAG);
6624 case Intrinsic::loongarch_lsx_vbitclri_b:
6625 case Intrinsic::loongarch_lasx_xvbitclri_b:
6626 return lowerVectorBitClearImm<3>(N, DAG);
6627 case Intrinsic::loongarch_lsx_vbitclri_h:
6628 case Intrinsic::loongarch_lasx_xvbitclri_h:
6629 return lowerVectorBitClearImm<4>(N, DAG);
6630 case Intrinsic::loongarch_lsx_vbitclri_w:
6631 case Intrinsic::loongarch_lasx_xvbitclri_w:
6632 return lowerVectorBitClearImm<5>(N, DAG);
6633 case Intrinsic::loongarch_lsx_vbitclri_d:
6634 case Intrinsic::loongarch_lasx_xvbitclri_d:
6635 return lowerVectorBitClearImm<6>(N, DAG);
6636 case Intrinsic::loongarch_lsx_vbitset_b:
6637 case Intrinsic::loongarch_lsx_vbitset_h:
6638 case Intrinsic::loongarch_lsx_vbitset_w:
6639 case Intrinsic::loongarch_lsx_vbitset_d:
6640 case Intrinsic::loongarch_lasx_xvbitset_b:
6641 case Intrinsic::loongarch_lasx_xvbitset_h:
6642 case Intrinsic::loongarch_lasx_xvbitset_w:
6643 case Intrinsic::loongarch_lasx_xvbitset_d: {
6644 EVT VecTy = N->getValueType(0);
6645 SDValue One = DAG.getConstant(1, DL, VecTy);
6646 return DAG.getNode(
6647 ISD::OR, DL, VecTy, N->getOperand(1),
6648 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6649 }
6650 case Intrinsic::loongarch_lsx_vbitseti_b:
6651 case Intrinsic::loongarch_lasx_xvbitseti_b:
6652 return lowerVectorBitSetImm<3>(N, DAG);
6653 case Intrinsic::loongarch_lsx_vbitseti_h:
6654 case Intrinsic::loongarch_lasx_xvbitseti_h:
6655 return lowerVectorBitSetImm<4>(N, DAG);
6656 case Intrinsic::loongarch_lsx_vbitseti_w:
6657 case Intrinsic::loongarch_lasx_xvbitseti_w:
6658 return lowerVectorBitSetImm<5>(N, DAG);
6659 case Intrinsic::loongarch_lsx_vbitseti_d:
6660 case Intrinsic::loongarch_lasx_xvbitseti_d:
6661 return lowerVectorBitSetImm<6>(N, DAG);
6662 case Intrinsic::loongarch_lsx_vbitrev_b:
6663 case Intrinsic::loongarch_lsx_vbitrev_h:
6664 case Intrinsic::loongarch_lsx_vbitrev_w:
6665 case Intrinsic::loongarch_lsx_vbitrev_d:
6666 case Intrinsic::loongarch_lasx_xvbitrev_b:
6667 case Intrinsic::loongarch_lasx_xvbitrev_h:
6668 case Intrinsic::loongarch_lasx_xvbitrev_w:
6669 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6670 EVT VecTy = N->getValueType(0);
6671 SDValue One = DAG.getConstant(1, DL, VecTy);
6672 return DAG.getNode(
6673 ISD::XOR, DL, VecTy, N->getOperand(1),
6674 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6675 }
6676 case Intrinsic::loongarch_lsx_vbitrevi_b:
6677 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6678 return lowerVectorBitRevImm<3>(N, DAG);
6679 case Intrinsic::loongarch_lsx_vbitrevi_h:
6680 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6681 return lowerVectorBitRevImm<4>(N, DAG);
6682 case Intrinsic::loongarch_lsx_vbitrevi_w:
6683 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6684 return lowerVectorBitRevImm<5>(N, DAG);
6685 case Intrinsic::loongarch_lsx_vbitrevi_d:
6686 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6687 return lowerVectorBitRevImm<6>(N, DAG);
6688 case Intrinsic::loongarch_lsx_vfadd_s:
6689 case Intrinsic::loongarch_lsx_vfadd_d:
6690 case Intrinsic::loongarch_lasx_xvfadd_s:
6691 case Intrinsic::loongarch_lasx_xvfadd_d:
6692 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6693 N->getOperand(2));
6694 case Intrinsic::loongarch_lsx_vfsub_s:
6695 case Intrinsic::loongarch_lsx_vfsub_d:
6696 case Intrinsic::loongarch_lasx_xvfsub_s:
6697 case Intrinsic::loongarch_lasx_xvfsub_d:
6698 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6699 N->getOperand(2));
6700 case Intrinsic::loongarch_lsx_vfmul_s:
6701 case Intrinsic::loongarch_lsx_vfmul_d:
6702 case Intrinsic::loongarch_lasx_xvfmul_s:
6703 case Intrinsic::loongarch_lasx_xvfmul_d:
6704 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6705 N->getOperand(2));
6706 case Intrinsic::loongarch_lsx_vfdiv_s:
6707 case Intrinsic::loongarch_lsx_vfdiv_d:
6708 case Intrinsic::loongarch_lasx_xvfdiv_s:
6709 case Intrinsic::loongarch_lasx_xvfdiv_d:
6710 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6711 N->getOperand(2));
6712 case Intrinsic::loongarch_lsx_vfmadd_s:
6713 case Intrinsic::loongarch_lsx_vfmadd_d:
6714 case Intrinsic::loongarch_lasx_xvfmadd_s:
6715 case Intrinsic::loongarch_lasx_xvfmadd_d:
6716 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6717 N->getOperand(2), N->getOperand(3));
6718 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6719 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6720 N->getOperand(1), N->getOperand(2),
6721 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6722 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6723 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6724 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6725 N->getOperand(1), N->getOperand(2),
6726 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6727 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6728 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6729 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6730 N->getOperand(1), N->getOperand(2),
6731 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6732 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6733 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6734 N->getOperand(1), N->getOperand(2),
6735 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6736 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6737 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6738 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6739 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6740 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6741 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6742 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6743 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6744 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6745 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6746 N->getOperand(1)));
6747 case Intrinsic::loongarch_lsx_vreplve_b:
6748 case Intrinsic::loongarch_lsx_vreplve_h:
6749 case Intrinsic::loongarch_lsx_vreplve_w:
6750 case Intrinsic::loongarch_lsx_vreplve_d:
6751 case Intrinsic::loongarch_lasx_xvreplve_b:
6752 case Intrinsic::loongarch_lasx_xvreplve_h:
6753 case Intrinsic::loongarch_lasx_xvreplve_w:
6754 case Intrinsic::loongarch_lasx_xvreplve_d:
6755 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6756 N->getOperand(1),
6757 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6758 N->getOperand(2)));
6759 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6760 if (!Subtarget.is64Bit())
6761 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6762 break;
6763 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6764 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6765 if (!Subtarget.is64Bit())
6766 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6767 break;
6768 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6769 if (!Subtarget.is64Bit())
6770 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6771 break;
6772 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6773 if (!Subtarget.is64Bit())
6774 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6775 break;
6776 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6777 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6778 if (!Subtarget.is64Bit())
6779 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6780 break;
6781 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6782 if (!Subtarget.is64Bit())
6783 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6784 break;
6785 case Intrinsic::loongarch_lsx_bz_b:
6786 case Intrinsic::loongarch_lsx_bz_h:
6787 case Intrinsic::loongarch_lsx_bz_w:
6788 case Intrinsic::loongarch_lsx_bz_d:
6789 case Intrinsic::loongarch_lasx_xbz_b:
6790 case Intrinsic::loongarch_lasx_xbz_h:
6791 case Intrinsic::loongarch_lasx_xbz_w:
6792 case Intrinsic::loongarch_lasx_xbz_d:
6793 if (!Subtarget.is64Bit())
6794 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6795 N->getOperand(1));
6796 break;
6797 case Intrinsic::loongarch_lsx_bz_v:
6798 case Intrinsic::loongarch_lasx_xbz_v:
6799 if (!Subtarget.is64Bit())
6800 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6801 N->getOperand(1));
6802 break;
6803 case Intrinsic::loongarch_lsx_bnz_b:
6804 case Intrinsic::loongarch_lsx_bnz_h:
6805 case Intrinsic::loongarch_lsx_bnz_w:
6806 case Intrinsic::loongarch_lsx_bnz_d:
6807 case Intrinsic::loongarch_lasx_xbnz_b:
6808 case Intrinsic::loongarch_lasx_xbnz_h:
6809 case Intrinsic::loongarch_lasx_xbnz_w:
6810 case Intrinsic::loongarch_lasx_xbnz_d:
6811 if (!Subtarget.is64Bit())
6812 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6813 N->getOperand(1));
6814 break;
6815 case Intrinsic::loongarch_lsx_bnz_v:
6816 case Intrinsic::loongarch_lasx_xbnz_v:
6817 if (!Subtarget.is64Bit())
6818 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6819 N->getOperand(1));
6820 break;
6821 case Intrinsic::loongarch_lasx_concat_128_s:
6822 case Intrinsic::loongarch_lasx_concat_128_d:
6823 case Intrinsic::loongarch_lasx_concat_128:
6824 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6825 N->getOperand(1), N->getOperand(2));
6826 }
6827 return SDValue();
6828}
6829
6832 const LoongArchSubtarget &Subtarget) {
6833 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6834 // conversion is unnecessary and can be replaced with the
6835 // MOVFR2GR_S_LA64 operand.
6836 SDValue Op0 = N->getOperand(0);
6837 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6838 return Op0.getOperand(0);
6839 return SDValue();
6840}
6841
6844 const LoongArchSubtarget &Subtarget) {
6845 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6846 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6847 // operand.
6848 SDValue Op0 = N->getOperand(0);
6849 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6850 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6851 "Unexpected value type!");
6852 return Op0.getOperand(0);
6853 }
6854 return SDValue();
6855}
6856
6859 const LoongArchSubtarget &Subtarget) {
6860 MVT VT = N->getSimpleValueType(0);
6861 unsigned NumBits = VT.getScalarSizeInBits();
6862
6863 // Simplify the inputs.
6864 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6865 APInt DemandedMask(APInt::getAllOnes(NumBits));
6866 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6867 return SDValue(N, 0);
6868
6869 return SDValue();
6870}
6871
6872static SDValue
6875 const LoongArchSubtarget &Subtarget) {
6876 SDValue Op0 = N->getOperand(0);
6877 SDLoc DL(N);
6878
6879 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6880 // redundant. Instead, use BuildPairF64's operands directly.
6881 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6882 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6883
6884 if (Op0->isUndef()) {
6885 SDValue Lo = DAG.getUNDEF(MVT::i32);
6886 SDValue Hi = DAG.getUNDEF(MVT::i32);
6887 return DCI.CombineTo(N, Lo, Hi);
6888 }
6889
6890 // It's cheaper to materialise two 32-bit integers than to load a double
6891 // from the constant pool and transfer it to integer registers through the
6892 // stack.
6894 APInt V = C->getValueAPF().bitcastToAPInt();
6895 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6896 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6897 return DCI.CombineTo(N, Lo, Hi);
6898 }
6899
6900 return SDValue();
6901}
6902
6903static SDValue
6906 const LoongArchSubtarget &Subtarget) {
6907 if (!DCI.isBeforeLegalize())
6908 return SDValue();
6909
6910 MVT EltVT = N->getSimpleValueType(0);
6911 SDValue Vec = N->getOperand(0);
6912 EVT VecTy = Vec->getValueType(0);
6913 SDValue Idx = N->getOperand(1);
6914 unsigned IdxOp = Idx.getOpcode();
6915 SDLoc DL(N);
6916
6917 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6918 return SDValue();
6919
6920 // Combine:
6921 // t2 = truncate t1
6922 // t3 = {zero/sign/any}_extend t2
6923 // t4 = extract_vector_elt t0, t3
6924 // to:
6925 // t4 = extract_vector_elt t0, t1
6926 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6927 IdxOp == ISD::ANY_EXTEND) {
6928 SDValue IdxOrig = Idx.getOperand(0);
6929 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6930 return SDValue();
6931
6932 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6933 IdxOrig.getOperand(0));
6934 }
6935
6936 return SDValue();
6937}
6938
6939/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6942 const LoongArchSubtarget &Subtarget) {
6943 SDValue N0 = N->getOperand(0);
6944 SDValue N1 = N->getOperand(1);
6945 MVT VT = N->getSimpleValueType(0);
6946 SDLoc DL(N);
6947
6948 // VANDN(undef, x) -> 0
6949 // VANDN(x, undef) -> 0
6950 if (N0.isUndef() || N1.isUndef())
6951 return DAG.getConstant(0, DL, VT);
6952
6953 // VANDN(0, x) -> x
6955 return N1;
6956
6957 // VANDN(x, 0) -> 0
6959 return DAG.getConstant(0, DL, VT);
6960
6961 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6963 return DAG.getNOT(DL, N0, VT);
6964
6965 // Turn VANDN back to AND if input is inverted.
6966 if (SDValue Not = isNOT(N0, DAG))
6967 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6968
6969 // Folds for better commutativity:
6970 if (N1->hasOneUse()) {
6971 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6972 if (SDValue Not = isNOT(N1, DAG))
6973 return DAG.getNOT(
6974 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6975
6976 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6977 // -> NOT(OR(x, SplatVector(-Imm))
6978 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6979 // gain benefits.
6980 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6981 N1.getOpcode() == ISD::BUILD_VECTOR) {
6982 if (SDValue SplatValue =
6983 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6984 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6985 return SDValue();
6986
6987 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6988 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6989 SDValue Not =
6990 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6991 return DAG.getNOT(
6992 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6993 VT);
6994 }
6995 }
6996 }
6997 }
6998
6999 return SDValue();
7000}
7001
7003 DAGCombinerInfo &DCI) const {
7004 SelectionDAG &DAG = DCI.DAG;
7005 switch (N->getOpcode()) {
7006 default:
7007 break;
7008 case ISD::AND:
7009 return performANDCombine(N, DAG, DCI, Subtarget);
7010 case ISD::OR:
7011 return performORCombine(N, DAG, DCI, Subtarget);
7012 case ISD::SETCC:
7013 return performSETCCCombine(N, DAG, DCI, Subtarget);
7014 case ISD::SRL:
7015 return performSRLCombine(N, DAG, DCI, Subtarget);
7016 case ISD::BITCAST:
7017 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7018 case LoongArchISD::BITREV_W:
7019 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7020 case LoongArchISD::BR_CC:
7021 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7022 case LoongArchISD::SELECT_CC:
7023 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7025 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7026 case LoongArchISD::MOVGR2FR_W_LA64:
7027 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7028 case LoongArchISD::MOVFR2GR_S_LA64:
7029 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7030 case LoongArchISD::VMSKLTZ:
7031 case LoongArchISD::XVMSKLTZ:
7032 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7033 case LoongArchISD::SPLIT_PAIR_F64:
7034 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7036 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
7037 case LoongArchISD::VANDN:
7038 return performVANDNCombine(N, DAG, DCI, Subtarget);
7039 }
7040 return SDValue();
7041}
7042
7045 if (!ZeroDivCheck)
7046 return MBB;
7047
7048 // Build instructions:
7049 // MBB:
7050 // div(or mod) $dst, $dividend, $divisor
7051 // bne $divisor, $zero, SinkMBB
7052 // BreakMBB:
7053 // break 7 // BRK_DIVZERO
7054 // SinkMBB:
7055 // fallthrough
7056 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7057 MachineFunction::iterator It = ++MBB->getIterator();
7058 MachineFunction *MF = MBB->getParent();
7059 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7060 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7061 MF->insert(It, BreakMBB);
7062 MF->insert(It, SinkMBB);
7063
7064 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7065 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7066 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7067
7068 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7069 DebugLoc DL = MI.getDebugLoc();
7070 MachineOperand &Divisor = MI.getOperand(2);
7071 Register DivisorReg = Divisor.getReg();
7072
7073 // MBB:
7074 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7075 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7076 .addReg(LoongArch::R0)
7077 .addMBB(SinkMBB);
7078 MBB->addSuccessor(BreakMBB);
7079 MBB->addSuccessor(SinkMBB);
7080
7081 // BreakMBB:
7082 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7083 // definition of BRK_DIVZERO.
7084 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7085 BreakMBB->addSuccessor(SinkMBB);
7086
7087 // Clear Divisor's kill flag.
7088 Divisor.setIsKill(false);
7089
7090 return SinkMBB;
7091}
7092
7093static MachineBasicBlock *
7095 const LoongArchSubtarget &Subtarget) {
7096 unsigned CondOpc;
7097 switch (MI.getOpcode()) {
7098 default:
7099 llvm_unreachable("Unexpected opcode");
7100 case LoongArch::PseudoVBZ:
7101 CondOpc = LoongArch::VSETEQZ_V;
7102 break;
7103 case LoongArch::PseudoVBZ_B:
7104 CondOpc = LoongArch::VSETANYEQZ_B;
7105 break;
7106 case LoongArch::PseudoVBZ_H:
7107 CondOpc = LoongArch::VSETANYEQZ_H;
7108 break;
7109 case LoongArch::PseudoVBZ_W:
7110 CondOpc = LoongArch::VSETANYEQZ_W;
7111 break;
7112 case LoongArch::PseudoVBZ_D:
7113 CondOpc = LoongArch::VSETANYEQZ_D;
7114 break;
7115 case LoongArch::PseudoVBNZ:
7116 CondOpc = LoongArch::VSETNEZ_V;
7117 break;
7118 case LoongArch::PseudoVBNZ_B:
7119 CondOpc = LoongArch::VSETALLNEZ_B;
7120 break;
7121 case LoongArch::PseudoVBNZ_H:
7122 CondOpc = LoongArch::VSETALLNEZ_H;
7123 break;
7124 case LoongArch::PseudoVBNZ_W:
7125 CondOpc = LoongArch::VSETALLNEZ_W;
7126 break;
7127 case LoongArch::PseudoVBNZ_D:
7128 CondOpc = LoongArch::VSETALLNEZ_D;
7129 break;
7130 case LoongArch::PseudoXVBZ:
7131 CondOpc = LoongArch::XVSETEQZ_V;
7132 break;
7133 case LoongArch::PseudoXVBZ_B:
7134 CondOpc = LoongArch::XVSETANYEQZ_B;
7135 break;
7136 case LoongArch::PseudoXVBZ_H:
7137 CondOpc = LoongArch::XVSETANYEQZ_H;
7138 break;
7139 case LoongArch::PseudoXVBZ_W:
7140 CondOpc = LoongArch::XVSETANYEQZ_W;
7141 break;
7142 case LoongArch::PseudoXVBZ_D:
7143 CondOpc = LoongArch::XVSETANYEQZ_D;
7144 break;
7145 case LoongArch::PseudoXVBNZ:
7146 CondOpc = LoongArch::XVSETNEZ_V;
7147 break;
7148 case LoongArch::PseudoXVBNZ_B:
7149 CondOpc = LoongArch::XVSETALLNEZ_B;
7150 break;
7151 case LoongArch::PseudoXVBNZ_H:
7152 CondOpc = LoongArch::XVSETALLNEZ_H;
7153 break;
7154 case LoongArch::PseudoXVBNZ_W:
7155 CondOpc = LoongArch::XVSETALLNEZ_W;
7156 break;
7157 case LoongArch::PseudoXVBNZ_D:
7158 CondOpc = LoongArch::XVSETALLNEZ_D;
7159 break;
7160 }
7161
7162 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7163 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7164 DebugLoc DL = MI.getDebugLoc();
7167
7168 MachineFunction *F = BB->getParent();
7169 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7170 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7171 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7172
7173 F->insert(It, FalseBB);
7174 F->insert(It, TrueBB);
7175 F->insert(It, SinkBB);
7176
7177 // Transfer the remainder of MBB and its successor edges to Sink.
7178 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7180
7181 // Insert the real instruction to BB.
7182 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7183 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7184
7185 // Insert branch.
7186 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7187 BB->addSuccessor(FalseBB);
7188 BB->addSuccessor(TrueBB);
7189
7190 // FalseBB.
7191 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7192 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7193 .addReg(LoongArch::R0)
7194 .addImm(0);
7195 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7196 FalseBB->addSuccessor(SinkBB);
7197
7198 // TrueBB.
7199 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7200 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7201 .addReg(LoongArch::R0)
7202 .addImm(1);
7203 TrueBB->addSuccessor(SinkBB);
7204
7205 // SinkBB: merge the results.
7206 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7207 MI.getOperand(0).getReg())
7208 .addReg(RD1)
7209 .addMBB(FalseBB)
7210 .addReg(RD2)
7211 .addMBB(TrueBB);
7212
7213 // The pseudo instruction is gone now.
7214 MI.eraseFromParent();
7215 return SinkBB;
7216}
7217
7218static MachineBasicBlock *
7220 const LoongArchSubtarget &Subtarget) {
7221 unsigned InsOp;
7222 unsigned BroadcastOp;
7223 unsigned HalfSize;
7224 switch (MI.getOpcode()) {
7225 default:
7226 llvm_unreachable("Unexpected opcode");
7227 case LoongArch::PseudoXVINSGR2VR_B:
7228 HalfSize = 16;
7229 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7230 InsOp = LoongArch::XVEXTRINS_B;
7231 break;
7232 case LoongArch::PseudoXVINSGR2VR_H:
7233 HalfSize = 8;
7234 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7235 InsOp = LoongArch::XVEXTRINS_H;
7236 break;
7237 }
7238 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7239 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7240 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7241 DebugLoc DL = MI.getDebugLoc();
7243 // XDst = vector_insert XSrc, Elt, Idx
7244 Register XDst = MI.getOperand(0).getReg();
7245 Register XSrc = MI.getOperand(1).getReg();
7246 Register Elt = MI.getOperand(2).getReg();
7247 unsigned Idx = MI.getOperand(3).getImm();
7248
7249 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7250 Idx < HalfSize) {
7251 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7252 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7253
7254 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7255 .addReg(XSrc, 0, LoongArch::sub_128);
7256 BuildMI(*BB, MI, DL,
7257 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7258 : LoongArch::VINSGR2VR_B),
7259 ScratchSubReg2)
7260 .addReg(ScratchSubReg1)
7261 .addReg(Elt)
7262 .addImm(Idx);
7263
7264 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7265 .addImm(0)
7266 .addReg(ScratchSubReg2)
7267 .addImm(LoongArch::sub_128);
7268 } else {
7269 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7270 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7271
7272 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7273
7274 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7275 .addReg(ScratchReg1)
7276 .addReg(XSrc)
7277 .addImm(Idx >= HalfSize ? 48 : 18);
7278
7279 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7280 .addReg(XSrc)
7281 .addReg(ScratchReg2)
7282 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7283 }
7284
7285 MI.eraseFromParent();
7286 return BB;
7287}
7288
7291 const LoongArchSubtarget &Subtarget) {
7292 assert(Subtarget.hasExtLSX());
7293 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7294 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7295 DebugLoc DL = MI.getDebugLoc();
7297 Register Dst = MI.getOperand(0).getReg();
7298 Register Src = MI.getOperand(1).getReg();
7299 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7300 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7301 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7302
7303 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7304 BuildMI(*BB, MI, DL,
7305 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7306 : LoongArch::VINSGR2VR_W),
7307 ScratchReg2)
7308 .addReg(ScratchReg1)
7309 .addReg(Src)
7310 .addImm(0);
7311 BuildMI(
7312 *BB, MI, DL,
7313 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7314 ScratchReg3)
7315 .addReg(ScratchReg2);
7316 BuildMI(*BB, MI, DL,
7317 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7318 : LoongArch::VPICKVE2GR_W),
7319 Dst)
7320 .addReg(ScratchReg3)
7321 .addImm(0);
7322
7323 MI.eraseFromParent();
7324 return BB;
7325}
7326
7327static MachineBasicBlock *
7329 const LoongArchSubtarget &Subtarget) {
7330 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7331 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7332 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7334 Register Dst = MI.getOperand(0).getReg();
7335 Register Src = MI.getOperand(1).getReg();
7336 DebugLoc DL = MI.getDebugLoc();
7337 unsigned EleBits = 8;
7338 unsigned NotOpc = 0;
7339 unsigned MskOpc;
7340
7341 switch (MI.getOpcode()) {
7342 default:
7343 llvm_unreachable("Unexpected opcode");
7344 case LoongArch::PseudoVMSKLTZ_B:
7345 MskOpc = LoongArch::VMSKLTZ_B;
7346 break;
7347 case LoongArch::PseudoVMSKLTZ_H:
7348 MskOpc = LoongArch::VMSKLTZ_H;
7349 EleBits = 16;
7350 break;
7351 case LoongArch::PseudoVMSKLTZ_W:
7352 MskOpc = LoongArch::VMSKLTZ_W;
7353 EleBits = 32;
7354 break;
7355 case LoongArch::PseudoVMSKLTZ_D:
7356 MskOpc = LoongArch::VMSKLTZ_D;
7357 EleBits = 64;
7358 break;
7359 case LoongArch::PseudoVMSKGEZ_B:
7360 MskOpc = LoongArch::VMSKGEZ_B;
7361 break;
7362 case LoongArch::PseudoVMSKEQZ_B:
7363 MskOpc = LoongArch::VMSKNZ_B;
7364 NotOpc = LoongArch::VNOR_V;
7365 break;
7366 case LoongArch::PseudoVMSKNEZ_B:
7367 MskOpc = LoongArch::VMSKNZ_B;
7368 break;
7369 case LoongArch::PseudoXVMSKLTZ_B:
7370 MskOpc = LoongArch::XVMSKLTZ_B;
7371 RC = &LoongArch::LASX256RegClass;
7372 break;
7373 case LoongArch::PseudoXVMSKLTZ_H:
7374 MskOpc = LoongArch::XVMSKLTZ_H;
7375 RC = &LoongArch::LASX256RegClass;
7376 EleBits = 16;
7377 break;
7378 case LoongArch::PseudoXVMSKLTZ_W:
7379 MskOpc = LoongArch::XVMSKLTZ_W;
7380 RC = &LoongArch::LASX256RegClass;
7381 EleBits = 32;
7382 break;
7383 case LoongArch::PseudoXVMSKLTZ_D:
7384 MskOpc = LoongArch::XVMSKLTZ_D;
7385 RC = &LoongArch::LASX256RegClass;
7386 EleBits = 64;
7387 break;
7388 case LoongArch::PseudoXVMSKGEZ_B:
7389 MskOpc = LoongArch::XVMSKGEZ_B;
7390 RC = &LoongArch::LASX256RegClass;
7391 break;
7392 case LoongArch::PseudoXVMSKEQZ_B:
7393 MskOpc = LoongArch::XVMSKNZ_B;
7394 NotOpc = LoongArch::XVNOR_V;
7395 RC = &LoongArch::LASX256RegClass;
7396 break;
7397 case LoongArch::PseudoXVMSKNEZ_B:
7398 MskOpc = LoongArch::XVMSKNZ_B;
7399 RC = &LoongArch::LASX256RegClass;
7400 break;
7401 }
7402
7403 Register Msk = MRI.createVirtualRegister(RC);
7404 if (NotOpc) {
7405 Register Tmp = MRI.createVirtualRegister(RC);
7406 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7407 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7408 .addReg(Tmp, RegState::Kill)
7409 .addReg(Tmp, RegState::Kill);
7410 } else {
7411 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7412 }
7413
7414 if (TRI->getRegSizeInBits(*RC) > 128) {
7415 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7416 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7417 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7418 .addReg(Msk)
7419 .addImm(0);
7420 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7421 .addReg(Msk, RegState::Kill)
7422 .addImm(4);
7423 BuildMI(*BB, MI, DL,
7424 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7425 : LoongArch::BSTRINS_W),
7426 Dst)
7429 .addImm(256 / EleBits - 1)
7430 .addImm(128 / EleBits);
7431 } else {
7432 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7433 .addReg(Msk, RegState::Kill)
7434 .addImm(0);
7435 }
7436
7437 MI.eraseFromParent();
7438 return BB;
7439}
7440
7441static MachineBasicBlock *
7443 const LoongArchSubtarget &Subtarget) {
7444 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7445 "Unexpected instruction");
7446
7447 MachineFunction &MF = *BB->getParent();
7448 DebugLoc DL = MI.getDebugLoc();
7450 Register LoReg = MI.getOperand(0).getReg();
7451 Register HiReg = MI.getOperand(1).getReg();
7452 Register SrcReg = MI.getOperand(2).getReg();
7453
7454 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7455 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7456 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7457 MI.eraseFromParent(); // The pseudo instruction is gone now.
7458 return BB;
7459}
7460
7461static MachineBasicBlock *
7463 const LoongArchSubtarget &Subtarget) {
7464 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7465 "Unexpected instruction");
7466
7467 MachineFunction &MF = *BB->getParent();
7468 DebugLoc DL = MI.getDebugLoc();
7471 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7472 Register DstReg = MI.getOperand(0).getReg();
7473 Register LoReg = MI.getOperand(1).getReg();
7474 Register HiReg = MI.getOperand(2).getReg();
7475
7476 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7477 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7478 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7479 .addReg(TmpReg, RegState::Kill)
7480 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7481 MI.eraseFromParent(); // The pseudo instruction is gone now.
7482 return BB;
7483}
7484
7486 switch (MI.getOpcode()) {
7487 default:
7488 return false;
7489 case LoongArch::Select_GPR_Using_CC_GPR:
7490 return true;
7491 }
7492}
7493
7494static MachineBasicBlock *
7496 const LoongArchSubtarget &Subtarget) {
7497 // To "insert" Select_* instructions, we actually have to insert the triangle
7498 // control-flow pattern. The incoming instructions know the destination vreg
7499 // to set, the condition code register to branch on, the true/false values to
7500 // select between, and the condcode to use to select the appropriate branch.
7501 //
7502 // We produce the following control flow:
7503 // HeadMBB
7504 // | \
7505 // | IfFalseMBB
7506 // | /
7507 // TailMBB
7508 //
7509 // When we find a sequence of selects we attempt to optimize their emission
7510 // by sharing the control flow. Currently we only handle cases where we have
7511 // multiple selects with the exact same condition (same LHS, RHS and CC).
7512 // The selects may be interleaved with other instructions if the other
7513 // instructions meet some requirements we deem safe:
7514 // - They are not pseudo instructions.
7515 // - They are debug instructions. Otherwise,
7516 // - They do not have side-effects, do not access memory and their inputs do
7517 // not depend on the results of the select pseudo-instructions.
7518 // The TrueV/FalseV operands of the selects cannot depend on the result of
7519 // previous selects in the sequence.
7520 // These conditions could be further relaxed. See the X86 target for a
7521 // related approach and more information.
7522
7523 Register LHS = MI.getOperand(1).getReg();
7524 Register RHS;
7525 if (MI.getOperand(2).isReg())
7526 RHS = MI.getOperand(2).getReg();
7527 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7528
7529 SmallVector<MachineInstr *, 4> SelectDebugValues;
7530 SmallSet<Register, 4> SelectDests;
7531 SelectDests.insert(MI.getOperand(0).getReg());
7532
7533 MachineInstr *LastSelectPseudo = &MI;
7534 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7535 SequenceMBBI != E; ++SequenceMBBI) {
7536 if (SequenceMBBI->isDebugInstr())
7537 continue;
7538 if (isSelectPseudo(*SequenceMBBI)) {
7539 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7540 !SequenceMBBI->getOperand(2).isReg() ||
7541 SequenceMBBI->getOperand(2).getReg() != RHS ||
7542 SequenceMBBI->getOperand(3).getImm() != CC ||
7543 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7544 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7545 break;
7546 LastSelectPseudo = &*SequenceMBBI;
7547 SequenceMBBI->collectDebugValues(SelectDebugValues);
7548 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7549 continue;
7550 }
7551 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7552 SequenceMBBI->mayLoadOrStore() ||
7553 SequenceMBBI->usesCustomInsertionHook())
7554 break;
7555 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7556 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7557 }))
7558 break;
7559 }
7560
7561 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7562 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7563 DebugLoc DL = MI.getDebugLoc();
7565
7566 MachineBasicBlock *HeadMBB = BB;
7567 MachineFunction *F = BB->getParent();
7568 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7569 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7570
7571 F->insert(I, IfFalseMBB);
7572 F->insert(I, TailMBB);
7573
7574 // Set the call frame size on entry to the new basic blocks.
7575 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7576 IfFalseMBB->setCallFrameSize(CallFrameSize);
7577 TailMBB->setCallFrameSize(CallFrameSize);
7578
7579 // Transfer debug instructions associated with the selects to TailMBB.
7580 for (MachineInstr *DebugInstr : SelectDebugValues) {
7581 TailMBB->push_back(DebugInstr->removeFromParent());
7582 }
7583
7584 // Move all instructions after the sequence to TailMBB.
7585 TailMBB->splice(TailMBB->end(), HeadMBB,
7586 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7587 // Update machine-CFG edges by transferring all successors of the current
7588 // block to the new block which will contain the Phi nodes for the selects.
7589 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7590 // Set the successors for HeadMBB.
7591 HeadMBB->addSuccessor(IfFalseMBB);
7592 HeadMBB->addSuccessor(TailMBB);
7593
7594 // Insert appropriate branch.
7595 if (MI.getOperand(2).isImm())
7596 BuildMI(HeadMBB, DL, TII.get(CC))
7597 .addReg(LHS)
7598 .addImm(MI.getOperand(2).getImm())
7599 .addMBB(TailMBB);
7600 else
7601 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7602
7603 // IfFalseMBB just falls through to TailMBB.
7604 IfFalseMBB->addSuccessor(TailMBB);
7605
7606 // Create PHIs for all of the select pseudo-instructions.
7607 auto SelectMBBI = MI.getIterator();
7608 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7609 auto InsertionPoint = TailMBB->begin();
7610 while (SelectMBBI != SelectEnd) {
7611 auto Next = std::next(SelectMBBI);
7612 if (isSelectPseudo(*SelectMBBI)) {
7613 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7614 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7615 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7616 .addReg(SelectMBBI->getOperand(4).getReg())
7617 .addMBB(HeadMBB)
7618 .addReg(SelectMBBI->getOperand(5).getReg())
7619 .addMBB(IfFalseMBB);
7620 SelectMBBI->eraseFromParent();
7621 }
7622 SelectMBBI = Next;
7623 }
7624
7625 F->getProperties().resetNoPHIs();
7626 return TailMBB;
7627}
7628
7629MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7630 MachineInstr &MI, MachineBasicBlock *BB) const {
7631 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7632 DebugLoc DL = MI.getDebugLoc();
7633
7634 switch (MI.getOpcode()) {
7635 default:
7636 llvm_unreachable("Unexpected instr type to insert");
7637 case LoongArch::DIV_W:
7638 case LoongArch::DIV_WU:
7639 case LoongArch::MOD_W:
7640 case LoongArch::MOD_WU:
7641 case LoongArch::DIV_D:
7642 case LoongArch::DIV_DU:
7643 case LoongArch::MOD_D:
7644 case LoongArch::MOD_DU:
7645 return insertDivByZeroTrap(MI, BB);
7646 break;
7647 case LoongArch::WRFCSR: {
7648 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7649 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7650 .addReg(MI.getOperand(1).getReg());
7651 MI.eraseFromParent();
7652 return BB;
7653 }
7654 case LoongArch::RDFCSR: {
7655 MachineInstr *ReadFCSR =
7656 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7657 MI.getOperand(0).getReg())
7658 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7659 ReadFCSR->getOperand(1).setIsUndef();
7660 MI.eraseFromParent();
7661 return BB;
7662 }
7663 case LoongArch::Select_GPR_Using_CC_GPR:
7664 return emitSelectPseudo(MI, BB, Subtarget);
7665 case LoongArch::BuildPairF64Pseudo:
7666 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7667 case LoongArch::SplitPairF64Pseudo:
7668 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7669 case LoongArch::PseudoVBZ:
7670 case LoongArch::PseudoVBZ_B:
7671 case LoongArch::PseudoVBZ_H:
7672 case LoongArch::PseudoVBZ_W:
7673 case LoongArch::PseudoVBZ_D:
7674 case LoongArch::PseudoVBNZ:
7675 case LoongArch::PseudoVBNZ_B:
7676 case LoongArch::PseudoVBNZ_H:
7677 case LoongArch::PseudoVBNZ_W:
7678 case LoongArch::PseudoVBNZ_D:
7679 case LoongArch::PseudoXVBZ:
7680 case LoongArch::PseudoXVBZ_B:
7681 case LoongArch::PseudoXVBZ_H:
7682 case LoongArch::PseudoXVBZ_W:
7683 case LoongArch::PseudoXVBZ_D:
7684 case LoongArch::PseudoXVBNZ:
7685 case LoongArch::PseudoXVBNZ_B:
7686 case LoongArch::PseudoXVBNZ_H:
7687 case LoongArch::PseudoXVBNZ_W:
7688 case LoongArch::PseudoXVBNZ_D:
7689 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7690 case LoongArch::PseudoXVINSGR2VR_B:
7691 case LoongArch::PseudoXVINSGR2VR_H:
7692 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7693 case LoongArch::PseudoCTPOP:
7694 return emitPseudoCTPOP(MI, BB, Subtarget);
7695 case LoongArch::PseudoVMSKLTZ_B:
7696 case LoongArch::PseudoVMSKLTZ_H:
7697 case LoongArch::PseudoVMSKLTZ_W:
7698 case LoongArch::PseudoVMSKLTZ_D:
7699 case LoongArch::PseudoVMSKGEZ_B:
7700 case LoongArch::PseudoVMSKEQZ_B:
7701 case LoongArch::PseudoVMSKNEZ_B:
7702 case LoongArch::PseudoXVMSKLTZ_B:
7703 case LoongArch::PseudoXVMSKLTZ_H:
7704 case LoongArch::PseudoXVMSKLTZ_W:
7705 case LoongArch::PseudoXVMSKLTZ_D:
7706 case LoongArch::PseudoXVMSKGEZ_B:
7707 case LoongArch::PseudoXVMSKEQZ_B:
7708 case LoongArch::PseudoXVMSKNEZ_B:
7709 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7710 case TargetOpcode::STATEPOINT:
7711 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7712 // while bl call instruction (where statepoint will be lowered at the
7713 // end) has implicit def. This def is early-clobber as it will be set at
7714 // the moment of the call and earlier than any use is read.
7715 // Add this implicit dead def here as a workaround.
7716 MI.addOperand(*MI.getMF(),
7718 LoongArch::R1, /*isDef*/ true,
7719 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7720 /*isUndef*/ false, /*isEarlyClobber*/ true));
7721 if (!Subtarget.is64Bit())
7722 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7723 return emitPatchPoint(MI, BB);
7724 }
7725}
7726
7728 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7729 unsigned *Fast) const {
7730 if (!Subtarget.hasUAL())
7731 return false;
7732
7733 // TODO: set reasonable speed number.
7734 if (Fast)
7735 *Fast = 1;
7736 return true;
7737}
7738
7739//===----------------------------------------------------------------------===//
7740// Calling Convention Implementation
7741//===----------------------------------------------------------------------===//
7742
7743// Eight general-purpose registers a0-a7 used for passing integer arguments,
7744// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7745// fixed-point arguments, and floating-point arguments when no FPR is available
7746// or with soft float ABI.
7747const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7748 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7749 LoongArch::R10, LoongArch::R11};
7750// Eight floating-point registers fa0-fa7 used for passing floating-point
7751// arguments, and fa0-fa1 are also used to return values.
7752const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7753 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7754 LoongArch::F6, LoongArch::F7};
7755// FPR32 and FPR64 alias each other.
7757 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7758 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7759
7760const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7761 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7762 LoongArch::VR6, LoongArch::VR7};
7763
7764const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7765 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7766 LoongArch::XR6, LoongArch::XR7};
7767
7768// Pass a 2*GRLen argument that has been split into two GRLen values through
7769// registers or the stack as necessary.
7770static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7771 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7772 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7773 ISD::ArgFlagsTy ArgFlags2) {
7774 unsigned GRLenInBytes = GRLen / 8;
7775 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7776 // At least one half can be passed via register.
7777 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7778 VA1.getLocVT(), CCValAssign::Full));
7779 } else {
7780 // Both halves must be passed on the stack, with proper alignment.
7781 Align StackAlign =
7782 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7783 State.addLoc(
7785 State.AllocateStack(GRLenInBytes, StackAlign),
7786 VA1.getLocVT(), CCValAssign::Full));
7787 State.addLoc(CCValAssign::getMem(
7788 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7789 LocVT2, CCValAssign::Full));
7790 return false;
7791 }
7792 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7793 // The second half can also be passed via register.
7794 State.addLoc(
7795 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7796 } else {
7797 // The second half is passed via the stack, without additional alignment.
7798 State.addLoc(CCValAssign::getMem(
7799 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7800 LocVT2, CCValAssign::Full));
7801 }
7802 return false;
7803}
7804
7805// Implements the LoongArch calling convention. Returns true upon failure.
7807 unsigned ValNo, MVT ValVT,
7808 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7809 CCState &State, bool IsRet, Type *OrigTy) {
7810 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7811 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7812 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7813 MVT LocVT = ValVT;
7814
7815 // Any return value split into more than two values can't be returned
7816 // directly.
7817 if (IsRet && ValNo > 1)
7818 return true;
7819
7820 // If passing a variadic argument, or if no FPR is available.
7821 bool UseGPRForFloat = true;
7822
7823 switch (ABI) {
7824 default:
7825 llvm_unreachable("Unexpected ABI");
7826 break;
7831 UseGPRForFloat = ArgFlags.isVarArg();
7832 break;
7835 break;
7836 }
7837
7838 // If this is a variadic argument, the LoongArch calling convention requires
7839 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7840 // byte alignment. An aligned register should be used regardless of whether
7841 // the original argument was split during legalisation or not. The argument
7842 // will not be passed by registers if the original type is larger than
7843 // 2*GRLen, so the register alignment rule does not apply.
7844 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7845 if (ArgFlags.isVarArg() &&
7846 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7847 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7848 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7849 // Skip 'odd' register if necessary.
7850 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7851 State.AllocateReg(ArgGPRs);
7852 }
7853
7854 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7855 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7856 State.getPendingArgFlags();
7857
7858 assert(PendingLocs.size() == PendingArgFlags.size() &&
7859 "PendingLocs and PendingArgFlags out of sync");
7860
7861 // FPR32 and FPR64 alias each other.
7862 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7863 UseGPRForFloat = true;
7864
7865 if (UseGPRForFloat && ValVT == MVT::f32) {
7866 LocVT = GRLenVT;
7867 LocInfo = CCValAssign::BCvt;
7868 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7869 LocVT = MVT::i64;
7870 LocInfo = CCValAssign::BCvt;
7871 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7872 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7873 // registers are exhausted.
7874 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7875 // Depending on available argument GPRS, f64 may be passed in a pair of
7876 // GPRs, split between a GPR and the stack, or passed completely on the
7877 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7878 // cases.
7879 MCRegister Reg = State.AllocateReg(ArgGPRs);
7880 if (!Reg) {
7881 int64_t StackOffset = State.AllocateStack(8, Align(8));
7882 State.addLoc(
7883 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7884 return false;
7885 }
7886 LocVT = MVT::i32;
7887 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7888 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7889 if (HiReg) {
7890 State.addLoc(
7891 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7892 } else {
7893 int64_t StackOffset = State.AllocateStack(4, Align(4));
7894 State.addLoc(
7895 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7896 }
7897 return false;
7898 }
7899
7900 // Split arguments might be passed indirectly, so keep track of the pending
7901 // values.
7902 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7903 LocVT = GRLenVT;
7904 LocInfo = CCValAssign::Indirect;
7905 PendingLocs.push_back(
7906 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7907 PendingArgFlags.push_back(ArgFlags);
7908 if (!ArgFlags.isSplitEnd()) {
7909 return false;
7910 }
7911 }
7912
7913 // If the split argument only had two elements, it should be passed directly
7914 // in registers or on the stack.
7915 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7916 PendingLocs.size() <= 2) {
7917 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7918 // Apply the normal calling convention rules to the first half of the
7919 // split argument.
7920 CCValAssign VA = PendingLocs[0];
7921 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7922 PendingLocs.clear();
7923 PendingArgFlags.clear();
7924 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7925 ArgFlags);
7926 }
7927
7928 // Allocate to a register if possible, or else a stack slot.
7929 Register Reg;
7930 unsigned StoreSizeBytes = GRLen / 8;
7931 Align StackAlign = Align(GRLen / 8);
7932
7933 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7934 Reg = State.AllocateReg(ArgFPR32s);
7935 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7936 Reg = State.AllocateReg(ArgFPR64s);
7937 } else if (ValVT.is128BitVector()) {
7938 Reg = State.AllocateReg(ArgVRs);
7939 UseGPRForFloat = false;
7940 StoreSizeBytes = 16;
7941 StackAlign = Align(16);
7942 } else if (ValVT.is256BitVector()) {
7943 Reg = State.AllocateReg(ArgXRs);
7944 UseGPRForFloat = false;
7945 StoreSizeBytes = 32;
7946 StackAlign = Align(32);
7947 } else {
7948 Reg = State.AllocateReg(ArgGPRs);
7949 }
7950
7951 unsigned StackOffset =
7952 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7953
7954 // If we reach this point and PendingLocs is non-empty, we must be at the
7955 // end of a split argument that must be passed indirectly.
7956 if (!PendingLocs.empty()) {
7957 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7958 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7959 for (auto &It : PendingLocs) {
7960 if (Reg)
7961 It.convertToReg(Reg);
7962 else
7963 It.convertToMem(StackOffset);
7964 State.addLoc(It);
7965 }
7966 PendingLocs.clear();
7967 PendingArgFlags.clear();
7968 return false;
7969 }
7970 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7971 "Expected an GRLenVT at this stage");
7972
7973 if (Reg) {
7974 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7975 return false;
7976 }
7977
7978 // When a floating-point value is passed on the stack, no bit-cast is needed.
7979 if (ValVT.isFloatingPoint()) {
7980 LocVT = ValVT;
7981 LocInfo = CCValAssign::Full;
7982 }
7983
7984 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7985 return false;
7986}
7987
7988void LoongArchTargetLowering::analyzeInputArgs(
7989 MachineFunction &MF, CCState &CCInfo,
7990 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7991 LoongArchCCAssignFn Fn) const {
7992 FunctionType *FType = MF.getFunction().getFunctionType();
7993 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7994 MVT ArgVT = Ins[i].VT;
7995 Type *ArgTy = nullptr;
7996 if (IsRet)
7997 ArgTy = FType->getReturnType();
7998 else if (Ins[i].isOrigArg())
7999 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8001 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8002 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8003 CCInfo, IsRet, ArgTy)) {
8004 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8005 << '\n');
8006 llvm_unreachable("");
8007 }
8008 }
8009}
8010
8011void LoongArchTargetLowering::analyzeOutputArgs(
8012 MachineFunction &MF, CCState &CCInfo,
8013 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8014 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8015 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8016 MVT ArgVT = Outs[i].VT;
8017 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8019 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8020 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8021 CCInfo, IsRet, OrigTy)) {
8022 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8023 << "\n");
8024 llvm_unreachable("");
8025 }
8026 }
8027}
8028
8029// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8030// values.
8032 const CCValAssign &VA, const SDLoc &DL) {
8033 switch (VA.getLocInfo()) {
8034 default:
8035 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8036 case CCValAssign::Full:
8038 break;
8039 case CCValAssign::BCvt:
8040 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8041 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8042 else
8043 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8044 break;
8045 }
8046 return Val;
8047}
8048
8050 const CCValAssign &VA, const SDLoc &DL,
8051 const ISD::InputArg &In,
8052 const LoongArchTargetLowering &TLI) {
8055 EVT LocVT = VA.getLocVT();
8056 SDValue Val;
8057 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8058 Register VReg = RegInfo.createVirtualRegister(RC);
8059 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8060 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8061
8062 // If input is sign extended from 32 bits, note it for the OptW pass.
8063 if (In.isOrigArg()) {
8064 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8065 if (OrigArg->getType()->isIntegerTy()) {
8066 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8067 // An input zero extended from i31 can also be considered sign extended.
8068 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8069 (BitWidth < 32 && In.Flags.isZExt())) {
8072 LAFI->addSExt32Register(VReg);
8073 }
8074 }
8075 }
8076
8077 return convertLocVTToValVT(DAG, Val, VA, DL);
8078}
8079
8080// The caller is responsible for loading the full value if the argument is
8081// passed with CCValAssign::Indirect.
8083 const CCValAssign &VA, const SDLoc &DL) {
8085 MachineFrameInfo &MFI = MF.getFrameInfo();
8086 EVT ValVT = VA.getValVT();
8087 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8088 /*IsImmutable=*/true);
8089 SDValue FIN = DAG.getFrameIndex(
8091
8092 ISD::LoadExtType ExtType;
8093 switch (VA.getLocInfo()) {
8094 default:
8095 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8096 case CCValAssign::Full:
8098 case CCValAssign::BCvt:
8099 ExtType = ISD::NON_EXTLOAD;
8100 break;
8101 }
8102 return DAG.getExtLoad(
8103 ExtType, DL, VA.getLocVT(), Chain, FIN,
8105}
8106
8108 const CCValAssign &VA,
8109 const CCValAssign &HiVA,
8110 const SDLoc &DL) {
8111 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8112 "Unexpected VA");
8114 MachineFrameInfo &MFI = MF.getFrameInfo();
8116
8117 assert(VA.isRegLoc() && "Expected register VA assignment");
8118
8119 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8120 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8121 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8122 SDValue Hi;
8123 if (HiVA.isMemLoc()) {
8124 // Second half of f64 is passed on the stack.
8125 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8126 /*IsImmutable=*/true);
8127 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8128 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8130 } else {
8131 // Second half of f64 is passed in another GPR.
8132 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8133 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8134 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8135 }
8136 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8137}
8138
8140 const CCValAssign &VA, const SDLoc &DL) {
8141 EVT LocVT = VA.getLocVT();
8142
8143 switch (VA.getLocInfo()) {
8144 default:
8145 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8146 case CCValAssign::Full:
8147 break;
8148 case CCValAssign::BCvt:
8149 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8150 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8151 else
8152 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8153 break;
8154 }
8155 return Val;
8156}
8157
8158static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8159 CCValAssign::LocInfo LocInfo,
8160 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8161 CCState &State) {
8162 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8163 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8164 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8165 static const MCPhysReg GPRList[] = {
8166 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8167 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8168 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8169 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8170 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8171 return false;
8172 }
8173 }
8174
8175 if (LocVT == MVT::f32) {
8176 // Pass in STG registers: F1, F2, F3, F4
8177 // fs0,fs1,fs2,fs3
8178 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8179 LoongArch::F26, LoongArch::F27};
8180 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8181 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8182 return false;
8183 }
8184 }
8185
8186 if (LocVT == MVT::f64) {
8187 // Pass in STG registers: D1, D2, D3, D4
8188 // fs4,fs5,fs6,fs7
8189 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8190 LoongArch::F30_64, LoongArch::F31_64};
8191 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8192 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8193 return false;
8194 }
8195 }
8196
8197 report_fatal_error("No registers left in GHC calling convention");
8198 return true;
8199}
8200
8201// Transform physical registers into virtual registers.
8203 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8204 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8205 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8206
8208
8209 switch (CallConv) {
8210 default:
8211 llvm_unreachable("Unsupported calling convention");
8212 case CallingConv::C:
8213 case CallingConv::Fast:
8215 break;
8216 case CallingConv::GHC:
8217 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8218 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8220 "GHC calling convention requires the F and D extensions");
8221 }
8222
8223 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8224 MVT GRLenVT = Subtarget.getGRLenVT();
8225 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8226 // Used with varargs to acumulate store chains.
8227 std::vector<SDValue> OutChains;
8228
8229 // Assign locations to all of the incoming arguments.
8231 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8232
8233 if (CallConv == CallingConv::GHC)
8235 else
8236 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8237
8238 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8239 CCValAssign &VA = ArgLocs[i];
8240 SDValue ArgValue;
8241 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8242 // case.
8243 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8244 assert(VA.needsCustom());
8245 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8246 } else if (VA.isRegLoc())
8247 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8248 else
8249 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8250 if (VA.getLocInfo() == CCValAssign::Indirect) {
8251 // If the original argument was split and passed by reference, we need to
8252 // load all parts of it here (using the same address).
8253 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8255 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8256 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8257 assert(ArgPartOffset == 0);
8258 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8259 CCValAssign &PartVA = ArgLocs[i + 1];
8260 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8261 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8262 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8263 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8265 ++i;
8266 ++InsIdx;
8267 }
8268 continue;
8269 }
8270 InVals.push_back(ArgValue);
8271 }
8272
8273 if (IsVarArg) {
8275 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8276 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8277 MachineFrameInfo &MFI = MF.getFrameInfo();
8278 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8279 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8280
8281 // Offset of the first variable argument from stack pointer, and size of
8282 // the vararg save area. For now, the varargs save area is either zero or
8283 // large enough to hold a0-a7.
8284 int VaArgOffset, VarArgsSaveSize;
8285
8286 // If all registers are allocated, then all varargs must be passed on the
8287 // stack and we don't need to save any argregs.
8288 if (ArgRegs.size() == Idx) {
8289 VaArgOffset = CCInfo.getStackSize();
8290 VarArgsSaveSize = 0;
8291 } else {
8292 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8293 VaArgOffset = -VarArgsSaveSize;
8294 }
8295
8296 // Record the frame index of the first variable argument
8297 // which is a value necessary to VASTART.
8298 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8299 LoongArchFI->setVarArgsFrameIndex(FI);
8300
8301 // If saving an odd number of registers then create an extra stack slot to
8302 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8303 // offsets to even-numbered registered remain 2*GRLen-aligned.
8304 if (Idx % 2) {
8305 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8306 true);
8307 VarArgsSaveSize += GRLenInBytes;
8308 }
8309
8310 // Copy the integer registers that may have been used for passing varargs
8311 // to the vararg save area.
8312 for (unsigned I = Idx; I < ArgRegs.size();
8313 ++I, VaArgOffset += GRLenInBytes) {
8314 const Register Reg = RegInfo.createVirtualRegister(RC);
8315 RegInfo.addLiveIn(ArgRegs[I], Reg);
8316 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8317 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8318 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8319 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8321 cast<StoreSDNode>(Store.getNode())
8322 ->getMemOperand()
8323 ->setValue((Value *)nullptr);
8324 OutChains.push_back(Store);
8325 }
8326 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8327 }
8328
8329 // All stores are grouped in one node to allow the matching between
8330 // the size of Ins and InVals. This only happens for vararg functions.
8331 if (!OutChains.empty()) {
8332 OutChains.push_back(Chain);
8333 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8334 }
8335
8336 return Chain;
8337}
8338
8340 return CI->isTailCall();
8341}
8342
8343// Check if the return value is used as only a return value, as otherwise
8344// we can't perform a tail-call.
8346 SDValue &Chain) const {
8347 if (N->getNumValues() != 1)
8348 return false;
8349 if (!N->hasNUsesOfValue(1, 0))
8350 return false;
8351
8352 SDNode *Copy = *N->user_begin();
8353 if (Copy->getOpcode() != ISD::CopyToReg)
8354 return false;
8355
8356 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8357 // isn't safe to perform a tail call.
8358 if (Copy->getGluedNode())
8359 return false;
8360
8361 // The copy must be used by a LoongArchISD::RET, and nothing else.
8362 bool HasRet = false;
8363 for (SDNode *Node : Copy->users()) {
8364 if (Node->getOpcode() != LoongArchISD::RET)
8365 return false;
8366 HasRet = true;
8367 }
8368
8369 if (!HasRet)
8370 return false;
8371
8372 Chain = Copy->getOperand(0);
8373 return true;
8374}
8375
8376// Check whether the call is eligible for tail call optimization.
8377bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8378 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8379 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8380
8381 auto CalleeCC = CLI.CallConv;
8382 auto &Outs = CLI.Outs;
8383 auto &Caller = MF.getFunction();
8384 auto CallerCC = Caller.getCallingConv();
8385
8386 // Do not tail call opt if the stack is used to pass parameters.
8387 if (CCInfo.getStackSize() != 0)
8388 return false;
8389
8390 // Do not tail call opt if any parameters need to be passed indirectly.
8391 for (auto &VA : ArgLocs)
8392 if (VA.getLocInfo() == CCValAssign::Indirect)
8393 return false;
8394
8395 // Do not tail call opt if either caller or callee uses struct return
8396 // semantics.
8397 auto IsCallerStructRet = Caller.hasStructRetAttr();
8398 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8399 if (IsCallerStructRet || IsCalleeStructRet)
8400 return false;
8401
8402 // Do not tail call opt if either the callee or caller has a byval argument.
8403 for (auto &Arg : Outs)
8404 if (Arg.Flags.isByVal())
8405 return false;
8406
8407 // The callee has to preserve all registers the caller needs to preserve.
8408 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8409 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8410 if (CalleeCC != CallerCC) {
8411 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8412 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8413 return false;
8414 }
8415 return true;
8416}
8417
8419 return DAG.getDataLayout().getPrefTypeAlign(
8420 VT.getTypeForEVT(*DAG.getContext()));
8421}
8422
8423// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8424// and output parameter nodes.
8425SDValue
8427 SmallVectorImpl<SDValue> &InVals) const {
8428 SelectionDAG &DAG = CLI.DAG;
8429 SDLoc &DL = CLI.DL;
8431 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8433 SDValue Chain = CLI.Chain;
8434 SDValue Callee = CLI.Callee;
8435 CallingConv::ID CallConv = CLI.CallConv;
8436 bool IsVarArg = CLI.IsVarArg;
8437 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8438 MVT GRLenVT = Subtarget.getGRLenVT();
8439 bool &IsTailCall = CLI.IsTailCall;
8440
8442
8443 // Analyze the operands of the call, assigning locations to each operand.
8445 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8446
8447 if (CallConv == CallingConv::GHC)
8448 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8449 else
8450 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8451
8452 // Check if it's really possible to do a tail call.
8453 if (IsTailCall)
8454 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8455
8456 if (IsTailCall)
8457 ++NumTailCalls;
8458 else if (CLI.CB && CLI.CB->isMustTailCall())
8459 report_fatal_error("failed to perform tail call elimination on a call "
8460 "site marked musttail");
8461
8462 // Get a count of how many bytes are to be pushed on the stack.
8463 unsigned NumBytes = ArgCCInfo.getStackSize();
8464
8465 // Create local copies for byval args.
8466 SmallVector<SDValue> ByValArgs;
8467 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8468 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8469 if (!Flags.isByVal())
8470 continue;
8471
8472 SDValue Arg = OutVals[i];
8473 unsigned Size = Flags.getByValSize();
8474 Align Alignment = Flags.getNonZeroByValAlign();
8475
8476 int FI =
8477 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8478 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8479 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8480
8481 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8482 /*IsVolatile=*/false,
8483 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8485 ByValArgs.push_back(FIPtr);
8486 }
8487
8488 if (!IsTailCall)
8489 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8490
8491 // Copy argument values to their designated locations.
8493 SmallVector<SDValue> MemOpChains;
8494 SDValue StackPtr;
8495 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8496 ++i, ++OutIdx) {
8497 CCValAssign &VA = ArgLocs[i];
8498 SDValue ArgValue = OutVals[OutIdx];
8499 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8500
8501 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8502 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8503 assert(VA.isRegLoc() && "Expected register VA assignment");
8504 assert(VA.needsCustom());
8505 SDValue SplitF64 =
8506 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8507 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8508 SDValue Lo = SplitF64.getValue(0);
8509 SDValue Hi = SplitF64.getValue(1);
8510
8511 Register RegLo = VA.getLocReg();
8512 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8513
8514 // Get the CCValAssign for the Hi part.
8515 CCValAssign &HiVA = ArgLocs[++i];
8516
8517 if (HiVA.isMemLoc()) {
8518 // Second half of f64 is passed on the stack.
8519 if (!StackPtr.getNode())
8520 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8522 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8523 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8524 // Emit the store.
8525 MemOpChains.push_back(DAG.getStore(
8526 Chain, DL, Hi, Address,
8528 } else {
8529 // Second half of f64 is passed in another GPR.
8530 Register RegHigh = HiVA.getLocReg();
8531 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8532 }
8533 continue;
8534 }
8535
8536 // Promote the value if needed.
8537 // For now, only handle fully promoted and indirect arguments.
8538 if (VA.getLocInfo() == CCValAssign::Indirect) {
8539 // Store the argument in a stack slot and pass its address.
8540 Align StackAlign =
8541 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8542 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8543 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8544 // If the original argument was split and passed by reference, we need to
8545 // store the required parts of it here (and pass just one address).
8546 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8547 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8548 assert(ArgPartOffset == 0);
8549 // Calculate the total size to store. We don't have access to what we're
8550 // actually storing other than performing the loop and collecting the
8551 // info.
8553 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8554 SDValue PartValue = OutVals[OutIdx + 1];
8555 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8556 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8557 EVT PartVT = PartValue.getValueType();
8558
8559 StoredSize += PartVT.getStoreSize();
8560 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8561 Parts.push_back(std::make_pair(PartValue, Offset));
8562 ++i;
8563 ++OutIdx;
8564 }
8565 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8566 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8567 MemOpChains.push_back(
8568 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8570 for (const auto &Part : Parts) {
8571 SDValue PartValue = Part.first;
8572 SDValue PartOffset = Part.second;
8574 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8575 MemOpChains.push_back(
8576 DAG.getStore(Chain, DL, PartValue, Address,
8578 }
8579 ArgValue = SpillSlot;
8580 } else {
8581 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8582 }
8583
8584 // Use local copy if it is a byval arg.
8585 if (Flags.isByVal())
8586 ArgValue = ByValArgs[j++];
8587
8588 if (VA.isRegLoc()) {
8589 // Queue up the argument copies and emit them at the end.
8590 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8591 } else {
8592 assert(VA.isMemLoc() && "Argument not register or memory");
8593 assert(!IsTailCall && "Tail call not allowed if stack is used "
8594 "for passing parameters");
8595
8596 // Work out the address of the stack slot.
8597 if (!StackPtr.getNode())
8598 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8600 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8602
8603 // Emit the store.
8604 MemOpChains.push_back(
8605 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8606 }
8607 }
8608
8609 // Join the stores, which are independent of one another.
8610 if (!MemOpChains.empty())
8611 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8612
8613 SDValue Glue;
8614
8615 // Build a sequence of copy-to-reg nodes, chained and glued together.
8616 for (auto &Reg : RegsToPass) {
8617 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8618 Glue = Chain.getValue(1);
8619 }
8620
8621 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8622 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8623 // split it and then direct call can be matched by PseudoCALL.
8625 const GlobalValue *GV = S->getGlobal();
8626 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8629 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8630 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8631 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8634 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8635 }
8636
8637 // The first call operand is the chain and the second is the target address.
8639 Ops.push_back(Chain);
8640 Ops.push_back(Callee);
8641
8642 // Add argument registers to the end of the list so that they are
8643 // known live into the call.
8644 for (auto &Reg : RegsToPass)
8645 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8646
8647 if (!IsTailCall) {
8648 // Add a register mask operand representing the call-preserved registers.
8649 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8650 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8651 assert(Mask && "Missing call preserved mask for calling convention");
8652 Ops.push_back(DAG.getRegisterMask(Mask));
8653 }
8654
8655 // Glue the call to the argument copies, if any.
8656 if (Glue.getNode())
8657 Ops.push_back(Glue);
8658
8659 // Emit the call.
8660 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8661 unsigned Op;
8662 switch (DAG.getTarget().getCodeModel()) {
8663 default:
8664 report_fatal_error("Unsupported code model");
8665 case CodeModel::Small:
8666 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8667 break;
8668 case CodeModel::Medium:
8669 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8670 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8671 break;
8672 case CodeModel::Large:
8673 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8674 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8675 break;
8676 }
8677
8678 if (IsTailCall) {
8680 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8681 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8682 return Ret;
8683 }
8684
8685 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8686 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8687 Glue = Chain.getValue(1);
8688
8689 // Mark the end of the call, which is glued to the call itself.
8690 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8691 Glue = Chain.getValue(1);
8692
8693 // Assign locations to each value returned by this call.
8695 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8696 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8697
8698 // Copy all of the result registers out of their specified physreg.
8699 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8700 auto &VA = RVLocs[i];
8701 // Copy the value out.
8702 SDValue RetValue =
8703 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8704 // Glue the RetValue to the end of the call sequence.
8705 Chain = RetValue.getValue(1);
8706 Glue = RetValue.getValue(2);
8707
8708 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8709 assert(VA.needsCustom());
8710 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8711 MVT::i32, Glue);
8712 Chain = RetValue2.getValue(1);
8713 Glue = RetValue2.getValue(2);
8714 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8715 RetValue, RetValue2);
8716 } else
8717 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8718
8719 InVals.push_back(RetValue);
8720 }
8721
8722 return Chain;
8723}
8724
8726 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8727 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8728 const Type *RetTy) const {
8730 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8731
8732 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8733 LoongArchABI::ABI ABI =
8734 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8735 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8736 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8737 return false;
8738 }
8739 return true;
8740}
8741
8743 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8745 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8746 SelectionDAG &DAG) const {
8747 // Stores the assignment of the return value to a location.
8749
8750 // Info about the registers and stack slot.
8751 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8752 *DAG.getContext());
8753
8754 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8755 nullptr, CC_LoongArch);
8756 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8757 report_fatal_error("GHC functions return void only");
8758 SDValue Glue;
8759 SmallVector<SDValue, 4> RetOps(1, Chain);
8760
8761 // Copy the result values into the output registers.
8762 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8763 SDValue Val = OutVals[OutIdx];
8764 CCValAssign &VA = RVLocs[i];
8765 assert(VA.isRegLoc() && "Can only return in registers!");
8766
8767 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8768 // Handle returning f64 on LA32D with a soft float ABI.
8769 assert(VA.isRegLoc() && "Expected return via registers");
8770 assert(VA.needsCustom());
8771 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8772 DAG.getVTList(MVT::i32, MVT::i32), Val);
8773 SDValue Lo = SplitF64.getValue(0);
8774 SDValue Hi = SplitF64.getValue(1);
8775 Register RegLo = VA.getLocReg();
8776 Register RegHi = RVLocs[++i].getLocReg();
8777
8778 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8779 Glue = Chain.getValue(1);
8780 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8781 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8782 Glue = Chain.getValue(1);
8783 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8784 } else {
8785 // Handle a 'normal' return.
8786 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8787 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8788
8789 // Guarantee that all emitted copies are stuck together.
8790 Glue = Chain.getValue(1);
8791 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8792 }
8793 }
8794
8795 RetOps[0] = Chain; // Update chain.
8796
8797 // Add the glue node if we have it.
8798 if (Glue.getNode())
8799 RetOps.push_back(Glue);
8800
8801 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8802}
8803
8804// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8805// Note: The following prefixes are excluded:
8806// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8807// as they can be represented using [x]vrepli.[whb]
8809 const APInt &SplatValue, const unsigned SplatBitSize) const {
8810 uint64_t RequiredImm = 0;
8811 uint64_t V = SplatValue.getZExtValue();
8812 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8813 // 4'b0101
8814 RequiredImm = (0b10101 << 8) | (V >> 8);
8815 return {true, RequiredImm};
8816 } else if (SplatBitSize == 32) {
8817 // 4'b0001
8818 if (!(V & 0xFFFF00FF)) {
8819 RequiredImm = (0b10001 << 8) | (V >> 8);
8820 return {true, RequiredImm};
8821 }
8822 // 4'b0010
8823 if (!(V & 0xFF00FFFF)) {
8824 RequiredImm = (0b10010 << 8) | (V >> 16);
8825 return {true, RequiredImm};
8826 }
8827 // 4'b0011
8828 if (!(V & 0x00FFFFFF)) {
8829 RequiredImm = (0b10011 << 8) | (V >> 24);
8830 return {true, RequiredImm};
8831 }
8832 // 4'b0110
8833 if ((V & 0xFFFF00FF) == 0xFF) {
8834 RequiredImm = (0b10110 << 8) | (V >> 8);
8835 return {true, RequiredImm};
8836 }
8837 // 4'b0111
8838 if ((V & 0xFF00FFFF) == 0xFFFF) {
8839 RequiredImm = (0b10111 << 8) | (V >> 16);
8840 return {true, RequiredImm};
8841 }
8842 // 4'b1010
8843 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8844 RequiredImm =
8845 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8846 return {true, RequiredImm};
8847 }
8848 } else if (SplatBitSize == 64) {
8849 // 4'b1011
8850 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8851 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8852 RequiredImm =
8853 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8854 return {true, RequiredImm};
8855 }
8856 // 4'b1100
8857 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8858 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8859 RequiredImm =
8860 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8861 return {true, RequiredImm};
8862 }
8863 // 4'b1001
8864 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8865 uint8_t res = 0;
8866 for (int i = 0; i < 8; ++i) {
8867 uint8_t byte = x & 0xFF;
8868 if (byte == 0 || byte == 0xFF)
8869 res |= ((byte & 1) << i);
8870 else
8871 return {false, 0};
8872 x >>= 8;
8873 }
8874 return {true, res};
8875 };
8876 auto [IsSame, Suffix] = sameBitsPreByte(V);
8877 if (IsSame) {
8878 RequiredImm = (0b11001 << 8) | Suffix;
8879 return {true, RequiredImm};
8880 }
8881 }
8882 return {false, RequiredImm};
8883}
8884
8886 EVT VT) const {
8887 if (!Subtarget.hasExtLSX())
8888 return false;
8889
8890 if (VT == MVT::f32) {
8891 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8892 return (masked == 0x3e000000 || masked == 0x40000000);
8893 }
8894
8895 if (VT == MVT::f64) {
8896 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8897 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8898 }
8899
8900 return false;
8901}
8902
8903bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8904 bool ForCodeSize) const {
8905 // TODO: Maybe need more checks here after vector extension is supported.
8906 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8907 return false;
8908 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8909 return false;
8910 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8911}
8912
8914 return true;
8915}
8916
8918 return true;
8919}
8920
8921bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8922 const Instruction *I) const {
8923 if (!Subtarget.is64Bit())
8924 return isa<LoadInst>(I) || isa<StoreInst>(I);
8925
8926 if (isa<LoadInst>(I))
8927 return true;
8928
8929 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8930 // require fences beacuse we can use amswap_db.[w/d].
8931 Type *Ty = I->getOperand(0)->getType();
8932 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8933 unsigned Size = Ty->getIntegerBitWidth();
8934 return (Size == 8 || Size == 16);
8935 }
8936
8937 return false;
8938}
8939
8941 LLVMContext &Context,
8942 EVT VT) const {
8943 if (!VT.isVector())
8944 return getPointerTy(DL);
8946}
8947
8949 EVT VT = Y.getValueType();
8950
8951 if (VT.isVector())
8952 return Subtarget.hasExtLSX() && VT.isInteger();
8953
8954 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8955}
8956
8958 const CallBase &I,
8959 MachineFunction &MF,
8960 unsigned Intrinsic) const {
8961 switch (Intrinsic) {
8962 default:
8963 return false;
8964 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8965 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8966 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8967 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8968 Info.opc = ISD::INTRINSIC_W_CHAIN;
8969 Info.memVT = MVT::i32;
8970 Info.ptrVal = I.getArgOperand(0);
8971 Info.offset = 0;
8972 Info.align = Align(4);
8975 return true;
8976 // TODO: Add more Intrinsics later.
8977 }
8978}
8979
8980// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8981// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8982// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8983// regression, we need to implement it manually.
8986
8988 Op == AtomicRMWInst::And) &&
8989 "Unable to expand");
8990 unsigned MinWordSize = 4;
8991
8992 IRBuilder<> Builder(AI);
8993 LLVMContext &Ctx = Builder.getContext();
8994 const DataLayout &DL = AI->getDataLayout();
8995 Type *ValueType = AI->getType();
8996 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8997
8998 Value *Addr = AI->getPointerOperand();
8999 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9000 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9001
9002 Value *AlignedAddr = Builder.CreateIntrinsic(
9003 Intrinsic::ptrmask, {PtrTy, IntTy},
9004 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9005 "AlignedAddr");
9006
9007 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9008 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9009 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9010 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9011 Value *Mask = Builder.CreateShl(
9012 ConstantInt::get(WordType,
9013 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9014 ShiftAmt, "Mask");
9015 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9016 Value *ValOperand_Shifted =
9017 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9018 ShiftAmt, "ValOperand_Shifted");
9019 Value *NewOperand;
9020 if (Op == AtomicRMWInst::And)
9021 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9022 else
9023 NewOperand = ValOperand_Shifted;
9024
9025 AtomicRMWInst *NewAI =
9026 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9027 AI->getOrdering(), AI->getSyncScopeID());
9028
9029 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9030 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9031 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9032 AI->replaceAllUsesWith(FinalOldResult);
9033 AI->eraseFromParent();
9034}
9035
9038 // TODO: Add more AtomicRMWInst that needs to be extended.
9039
9040 // Since floating-point operation requires a non-trivial set of data
9041 // operations, use CmpXChg to expand.
9042 if (AI->isFloatingPointOperation() ||
9048
9049 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9052 AI->getOperation() == AtomicRMWInst::Sub)) {
9054 }
9055
9056 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9057 if (Subtarget.hasLAMCAS()) {
9058 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9062 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9064 }
9065
9066 if (Size == 8 || Size == 16)
9069}
9070
9071static Intrinsic::ID
9073 AtomicRMWInst::BinOp BinOp) {
9074 if (GRLen == 64) {
9075 switch (BinOp) {
9076 default:
9077 llvm_unreachable("Unexpected AtomicRMW BinOp");
9079 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9080 case AtomicRMWInst::Add:
9081 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9082 case AtomicRMWInst::Sub:
9083 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9085 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9087 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9089 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9090 case AtomicRMWInst::Max:
9091 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9092 case AtomicRMWInst::Min:
9093 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9094 // TODO: support other AtomicRMWInst.
9095 }
9096 }
9097
9098 if (GRLen == 32) {
9099 switch (BinOp) {
9100 default:
9101 llvm_unreachable("Unexpected AtomicRMW BinOp");
9103 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9104 case AtomicRMWInst::Add:
9105 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9106 case AtomicRMWInst::Sub:
9107 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9109 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9111 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9113 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9114 case AtomicRMWInst::Max:
9115 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9116 case AtomicRMWInst::Min:
9117 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9118 // TODO: support other AtomicRMWInst.
9119 }
9120 }
9121
9122 llvm_unreachable("Unexpected GRLen\n");
9123}
9124
9127 AtomicCmpXchgInst *CI) const {
9128
9129 if (Subtarget.hasLAMCAS())
9131
9133 if (Size == 8 || Size == 16)
9136}
9137
9139 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9140 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9141 unsigned GRLen = Subtarget.getGRLen();
9142 AtomicOrdering FailOrd = CI->getFailureOrdering();
9143 Value *FailureOrdering =
9144 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9145 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9146 if (GRLen == 64) {
9147 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9148 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9149 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9150 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9151 }
9152 Type *Tys[] = {AlignedAddr->getType()};
9153 Value *Result = Builder.CreateIntrinsic(
9154 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9155 if (GRLen == 64)
9156 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9157 return Result;
9158}
9159
9161 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9162 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9163 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9164 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9165 // mask, as this produces better code than the LL/SC loop emitted by
9166 // int_loongarch_masked_atomicrmw_xchg.
9167 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9170 if (CVal->isZero())
9171 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9172 Builder.CreateNot(Mask, "Inv_Mask"),
9173 AI->getAlign(), Ord);
9174 if (CVal->isMinusOne())
9175 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9176 AI->getAlign(), Ord);
9177 }
9178
9179 unsigned GRLen = Subtarget.getGRLen();
9180 Value *Ordering =
9181 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9182 Type *Tys[] = {AlignedAddr->getType()};
9184 AI->getModule(),
9186
9187 if (GRLen == 64) {
9188 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9189 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9190 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9191 }
9192
9193 Value *Result;
9194
9195 // Must pass the shift amount needed to sign extend the loaded value prior
9196 // to performing a signed comparison for min/max. ShiftAmt is the number of
9197 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9198 // is the number of bits to left+right shift the value in order to
9199 // sign-extend.
9200 if (AI->getOperation() == AtomicRMWInst::Min ||
9202 const DataLayout &DL = AI->getDataLayout();
9203 unsigned ValWidth =
9204 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9205 Value *SextShamt =
9206 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9207 Result = Builder.CreateCall(LlwOpScwLoop,
9208 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9209 } else {
9210 Result =
9211 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9212 }
9213
9214 if (GRLen == 64)
9215 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9216 return Result;
9217}
9218
9220 const MachineFunction &MF, EVT VT) const {
9221 VT = VT.getScalarType();
9222
9223 if (!VT.isSimple())
9224 return false;
9225
9226 switch (VT.getSimpleVT().SimpleTy) {
9227 case MVT::f32:
9228 case MVT::f64:
9229 return true;
9230 default:
9231 break;
9232 }
9233
9234 return false;
9235}
9236
9238 const Constant *PersonalityFn) const {
9239 return LoongArch::R4;
9240}
9241
9243 const Constant *PersonalityFn) const {
9244 return LoongArch::R5;
9245}
9246
9247//===----------------------------------------------------------------------===//
9248// Target Optimization Hooks
9249//===----------------------------------------------------------------------===//
9250
9252 const LoongArchSubtarget &Subtarget) {
9253 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9254 // IEEE float has 23 digits and double has 52 digits.
9255 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9256 return RefinementSteps;
9257}
9258
9260 SelectionDAG &DAG, int Enabled,
9261 int &RefinementSteps,
9262 bool &UseOneConstNR,
9263 bool Reciprocal) const {
9264 if (Subtarget.hasFrecipe()) {
9265 SDLoc DL(Operand);
9266 EVT VT = Operand.getValueType();
9267
9268 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9269 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9270 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9271 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9272 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9273
9274 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9275 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9276
9277 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9278 if (Reciprocal)
9279 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9280
9281 return Estimate;
9282 }
9283 }
9284
9285 return SDValue();
9286}
9287
9289 SelectionDAG &DAG,
9290 int Enabled,
9291 int &RefinementSteps) const {
9292 if (Subtarget.hasFrecipe()) {
9293 SDLoc DL(Operand);
9294 EVT VT = Operand.getValueType();
9295
9296 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9297 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9298 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9299 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9300 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9301
9302 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9303 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9304
9305 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9306 }
9307 }
9308
9309 return SDValue();
9310}
9311
9312//===----------------------------------------------------------------------===//
9313// LoongArch Inline Assembly Support
9314//===----------------------------------------------------------------------===//
9315
9317LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9318 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9319 //
9320 // 'f': A floating-point register (if available).
9321 // 'k': A memory operand whose address is formed by a base register and
9322 // (optionally scaled) index register.
9323 // 'l': A signed 16-bit constant.
9324 // 'm': A memory operand whose address is formed by a base register and
9325 // offset that is suitable for use in instructions with the same
9326 // addressing mode as st.w and ld.w.
9327 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9328 // instruction)
9329 // 'I': A signed 12-bit constant (for arithmetic instructions).
9330 // 'J': Integer zero.
9331 // 'K': An unsigned 12-bit constant (for logic instructions).
9332 // "ZB": An address that is held in a general-purpose register. The offset is
9333 // zero.
9334 // "ZC": A memory operand whose address is formed by a base register and
9335 // offset that is suitable for use in instructions with the same
9336 // addressing mode as ll.w and sc.w.
9337 if (Constraint.size() == 1) {
9338 switch (Constraint[0]) {
9339 default:
9340 break;
9341 case 'f':
9342 case 'q':
9343 return C_RegisterClass;
9344 case 'l':
9345 case 'I':
9346 case 'J':
9347 case 'K':
9348 return C_Immediate;
9349 case 'k':
9350 return C_Memory;
9351 }
9352 }
9353
9354 if (Constraint == "ZC" || Constraint == "ZB")
9355 return C_Memory;
9356
9357 // 'm' is handled here.
9358 return TargetLowering::getConstraintType(Constraint);
9359}
9360
9361InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9362 StringRef ConstraintCode) const {
9363 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9367 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9368}
9369
9370std::pair<unsigned, const TargetRegisterClass *>
9371LoongArchTargetLowering::getRegForInlineAsmConstraint(
9372 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9373 // First, see if this is a constraint that directly corresponds to a LoongArch
9374 // register class.
9375 if (Constraint.size() == 1) {
9376 switch (Constraint[0]) {
9377 case 'r':
9378 // TODO: Support fixed vectors up to GRLen?
9379 if (VT.isVector())
9380 break;
9381 return std::make_pair(0U, &LoongArch::GPRRegClass);
9382 case 'q':
9383 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9384 case 'f':
9385 if (Subtarget.hasBasicF() && VT == MVT::f32)
9386 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9387 if (Subtarget.hasBasicD() && VT == MVT::f64)
9388 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9389 if (Subtarget.hasExtLSX() &&
9390 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9391 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9392 if (Subtarget.hasExtLASX() &&
9393 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9394 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9395 break;
9396 default:
9397 break;
9398 }
9399 }
9400
9401 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9402 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9403 // constraints while the official register name is prefixed with a '$'. So we
9404 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9405 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9406 // case insensitive, so no need to convert the constraint to upper case here.
9407 //
9408 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9409 // decode the usage of register name aliases into their official names. And
9410 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9411 // official register names.
9412 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9413 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9414 bool IsFP = Constraint[2] == 'f';
9415 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9416 std::pair<unsigned, const TargetRegisterClass *> R;
9418 TRI, join_items("", Temp.first, Temp.second), VT);
9419 // Match those names to the widest floating point register type available.
9420 if (IsFP) {
9421 unsigned RegNo = R.first;
9422 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9423 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9424 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9425 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9426 }
9427 }
9428 }
9429 return R;
9430 }
9431
9432 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9433}
9434
9435void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9436 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9437 SelectionDAG &DAG) const {
9438 // Currently only support length 1 constraints.
9439 if (Constraint.size() == 1) {
9440 switch (Constraint[0]) {
9441 case 'l':
9442 // Validate & create a 16-bit signed immediate operand.
9443 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9444 uint64_t CVal = C->getSExtValue();
9445 if (isInt<16>(CVal))
9446 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9447 Subtarget.getGRLenVT()));
9448 }
9449 return;
9450 case 'I':
9451 // Validate & create a 12-bit signed immediate operand.
9452 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9453 uint64_t CVal = C->getSExtValue();
9454 if (isInt<12>(CVal))
9455 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9456 Subtarget.getGRLenVT()));
9457 }
9458 return;
9459 case 'J':
9460 // Validate & create an integer zero operand.
9461 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9462 if (C->getZExtValue() == 0)
9463 Ops.push_back(
9464 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9465 return;
9466 case 'K':
9467 // Validate & create a 12-bit unsigned immediate operand.
9468 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9469 uint64_t CVal = C->getZExtValue();
9470 if (isUInt<12>(CVal))
9471 Ops.push_back(
9472 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9473 }
9474 return;
9475 default:
9476 break;
9477 }
9478 }
9480}
9481
9482#define GET_REGISTER_MATCHER
9483#include "LoongArchGenAsmMatcher.inc"
9484
9487 const MachineFunction &MF) const {
9488 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9489 std::string NewRegName = Name.second.str();
9490 Register Reg = MatchRegisterAltName(NewRegName);
9491 if (!Reg)
9492 Reg = MatchRegisterName(NewRegName);
9493 if (!Reg)
9494 return Reg;
9495 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9496 if (!ReservedRegs.test(Reg))
9497 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9498 StringRef(RegName) + "\"."));
9499 return Reg;
9500}
9501
9503 EVT VT, SDValue C) const {
9504 // TODO: Support vectors.
9505 if (!VT.isScalarInteger())
9506 return false;
9507
9508 // Omit the optimization if the data size exceeds GRLen.
9509 if (VT.getSizeInBits() > Subtarget.getGRLen())
9510 return false;
9511
9512 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9513 const APInt &Imm = ConstNode->getAPIntValue();
9514 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9515 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9516 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9517 return true;
9518 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9519 if (ConstNode->hasOneUse() &&
9520 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9521 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9522 return true;
9523 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9524 // in which the immediate has two set bits. Or Break (MUL x, imm)
9525 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9526 // equals to (1 << s0) - (1 << s1).
9527 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9528 unsigned Shifts = Imm.countr_zero();
9529 // Reject immediates which can be composed via a single LUI.
9530 if (Shifts >= 12)
9531 return false;
9532 // Reject multiplications can be optimized to
9533 // (SLLI (ALSL x, x, 1/2/3/4), s).
9534 APInt ImmPop = Imm.ashr(Shifts);
9535 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9536 return false;
9537 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9538 // since it needs one more instruction than other 3 cases.
9539 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9540 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9541 (ImmSmall - Imm).isPowerOf2())
9542 return true;
9543 }
9544 }
9545
9546 return false;
9547}
9548
9550 const AddrMode &AM,
9551 Type *Ty, unsigned AS,
9552 Instruction *I) const {
9553 // LoongArch has four basic addressing modes:
9554 // 1. reg
9555 // 2. reg + 12-bit signed offset
9556 // 3. reg + 14-bit signed offset left-shifted by 2
9557 // 4. reg1 + reg2
9558 // TODO: Add more checks after support vector extension.
9559
9560 // No global is ever allowed as a base.
9561 if (AM.BaseGV)
9562 return false;
9563
9564 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9565 // with `UAL` feature.
9566 if (!isInt<12>(AM.BaseOffs) &&
9567 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9568 return false;
9569
9570 switch (AM.Scale) {
9571 case 0:
9572 // "r+i" or just "i", depending on HasBaseReg.
9573 break;
9574 case 1:
9575 // "r+r+i" is not allowed.
9576 if (AM.HasBaseReg && AM.BaseOffs)
9577 return false;
9578 // Otherwise we have "r+r" or "r+i".
9579 break;
9580 case 2:
9581 // "2*r+r" or "2*r+i" is not allowed.
9582 if (AM.HasBaseReg || AM.BaseOffs)
9583 return false;
9584 // Allow "2*r" as "r+r".
9585 break;
9586 default:
9587 return false;
9588 }
9589
9590 return true;
9591}
9592
9594 return isInt<12>(Imm);
9595}
9596
9598 return isInt<12>(Imm);
9599}
9600
9602 // Zexts are free if they can be combined with a load.
9603 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9604 // poorly with type legalization of compares preferring sext.
9605 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9606 EVT MemVT = LD->getMemoryVT();
9607 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9608 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9609 LD->getExtensionType() == ISD::ZEXTLOAD))
9610 return true;
9611 }
9612
9613 return TargetLowering::isZExtFree(Val, VT2);
9614}
9615
9617 EVT DstVT) const {
9618 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9619}
9620
9622 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9623}
9624
9626 // TODO: Support vectors.
9627 if (Y.getValueType().isVector())
9628 return false;
9629
9630 return !isa<ConstantSDNode>(Y);
9631}
9632
9634 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9635 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9636}
9637
9639 Type *Ty, bool IsSigned) const {
9640 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9641 return true;
9642
9643 return IsSigned;
9644}
9645
9647 // Return false to suppress the unnecessary extensions if the LibCall
9648 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9649 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9650 Type.getSizeInBits() < Subtarget.getGRLen()))
9651 return false;
9652 return true;
9653}
9654
9655// memcpy, and other memory intrinsics, typically tries to use wider load/store
9656// if the source/dest is aligned and the copy size is large enough. We therefore
9657// want to align such objects passed to memory intrinsics.
9659 unsigned &MinSize,
9660 Align &PrefAlign) const {
9661 if (!isa<MemIntrinsic>(CI))
9662 return false;
9663
9664 if (Subtarget.is64Bit()) {
9665 MinSize = 8;
9666 PrefAlign = Align(8);
9667 } else {
9668 MinSize = 4;
9669 PrefAlign = Align(4);
9670 }
9671
9672 return true;
9673}
9674
9683
9684bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9685 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9686 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9687 bool IsABIRegCopy = CC.has_value();
9688 EVT ValueVT = Val.getValueType();
9689
9690 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9691 PartVT == MVT::f32) {
9692 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9693 // nan, and cast to f32.
9694 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9695 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9696 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9697 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9698 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9699 Parts[0] = Val;
9700 return true;
9701 }
9702
9703 return false;
9704}
9705
9706SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9707 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9708 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9709 bool IsABIRegCopy = CC.has_value();
9710
9711 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9712 PartVT == MVT::f32) {
9713 SDValue Val = Parts[0];
9714
9715 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9716 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9717 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9718 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9719 return Val;
9720 }
9721
9722 return SDValue();
9723}
9724
9725MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9726 CallingConv::ID CC,
9727 EVT VT) const {
9728 // Use f32 to pass f16.
9729 if (VT == MVT::f16 && Subtarget.hasBasicF())
9730 return MVT::f32;
9731
9733}
9734
9735unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9736 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9737 // Use f32 to pass f16.
9738 if (VT == MVT::f16 && Subtarget.hasBasicF())
9739 return 1;
9740
9742}
9743
9745 SDValue Op, const APInt &OriginalDemandedBits,
9746 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9747 unsigned Depth) const {
9748 EVT VT = Op.getValueType();
9749 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9750 unsigned Opc = Op.getOpcode();
9751 switch (Opc) {
9752 default:
9753 break;
9754 case LoongArchISD::VMSKLTZ:
9755 case LoongArchISD::XVMSKLTZ: {
9756 SDValue Src = Op.getOperand(0);
9757 MVT SrcVT = Src.getSimpleValueType();
9758 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9759 unsigned NumElts = SrcVT.getVectorNumElements();
9760
9761 // If we don't need the sign bits at all just return zero.
9762 if (OriginalDemandedBits.countr_zero() >= NumElts)
9763 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9764
9765 // Only demand the vector elements of the sign bits we need.
9766 APInt KnownUndef, KnownZero;
9767 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9768 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9769 TLO, Depth + 1))
9770 return true;
9771
9772 Known.Zero = KnownZero.zext(BitWidth);
9773 Known.Zero.setHighBits(BitWidth - NumElts);
9774
9775 // [X]VMSKLTZ only uses the MSB from each vector element.
9776 KnownBits KnownSrc;
9777 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9778 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9779 Depth + 1))
9780 return true;
9781
9782 if (KnownSrc.One[SrcBits - 1])
9783 Known.One.setLowBits(NumElts);
9784 else if (KnownSrc.Zero[SrcBits - 1])
9785 Known.Zero.setLowBits(NumElts);
9786
9787 // Attempt to avoid multi-use ops if we don't need anything from it.
9789 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9790 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9791 return false;
9792 }
9793 }
9794
9796 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9797}
9798
9800 unsigned Opc = VecOp.getOpcode();
9801
9802 // Assume target opcodes can't be scalarized.
9803 // TODO - do we have any exceptions?
9804 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9805 return false;
9806
9807 // If the vector op is not supported, try to convert to scalar.
9808 EVT VecVT = VecOp.getValueType();
9810 return true;
9811
9812 // If the vector op is supported, but the scalar op is not, the transform may
9813 // not be worthwhile.
9814 EVT ScalarVT = VecVT.getScalarType();
9815 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9816}
9817
9819 unsigned Index) const {
9821 return false;
9822
9823 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9824 return Index == 0;
9825}
9826
9828 unsigned Index) const {
9829 EVT EltVT = VT.getScalarType();
9830
9831 // Extract a scalar FP value from index 0 of a vector is free.
9832 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9833}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt)
getVShiftAmt - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...