LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
97
100
104
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
158
162 Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
176 }
177
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
314 }
315 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
317 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
319 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
322 }
323 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 VT, Expand);
333 }
335 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
336 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
337 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
338 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
339
340 for (MVT VT :
341 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
342 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
352 }
353 }
354
355 // Set operations for 'LASX' feature.
356
357 if (Subtarget.hasExtLASX()) {
358 for (MVT VT : LASXVTs) {
362
368
372 }
373 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
376 Legal);
378 VT, Legal);
385 Expand);
390 }
391 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
393 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
395 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
398 }
399 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
407 VT, Expand);
409 }
410 }
411
412 // Set DAG combine for LA32 and LA64.
413
418
419 // Set DAG combine for 'LSX' feature.
420
421 if (Subtarget.hasExtLSX()) {
424 }
425
426 // Compute derived properties from the register classes.
428
430
433
435
437
438 // Function alignments.
440 // Set preferred alignments.
444
445 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
446 if (Subtarget.hasLAMCAS())
448
449 if (Subtarget.hasSCQ()) {
452 }
453}
454
456 const GlobalAddressSDNode *GA) const {
457 // In order to maximise the opportunity for common subexpression elimination,
458 // keep a separate ADD node for the global address offset instead of folding
459 // it in the global address node. Later peephole optimisations may choose to
460 // fold it back in when profitable.
461 return false;
462}
463
465 SelectionDAG &DAG) const {
466 switch (Op.getOpcode()) {
468 return lowerATOMIC_FENCE(Op, DAG);
470 return lowerEH_DWARF_CFA(Op, DAG);
472 return lowerGlobalAddress(Op, DAG);
474 return lowerGlobalTLSAddress(Op, DAG);
476 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
478 return lowerINTRINSIC_W_CHAIN(Op, DAG);
480 return lowerINTRINSIC_VOID(Op, DAG);
482 return lowerBlockAddress(Op, DAG);
483 case ISD::JumpTable:
484 return lowerJumpTable(Op, DAG);
485 case ISD::SHL_PARTS:
486 return lowerShiftLeftParts(Op, DAG);
487 case ISD::SRA_PARTS:
488 return lowerShiftRightParts(Op, DAG, true);
489 case ISD::SRL_PARTS:
490 return lowerShiftRightParts(Op, DAG, false);
492 return lowerConstantPool(Op, DAG);
493 case ISD::FP_TO_SINT:
494 return lowerFP_TO_SINT(Op, DAG);
495 case ISD::BITCAST:
496 return lowerBITCAST(Op, DAG);
497 case ISD::UINT_TO_FP:
498 return lowerUINT_TO_FP(Op, DAG);
499 case ISD::SINT_TO_FP:
500 return lowerSINT_TO_FP(Op, DAG);
501 case ISD::VASTART:
502 return lowerVASTART(Op, DAG);
503 case ISD::FRAMEADDR:
504 return lowerFRAMEADDR(Op, DAG);
505 case ISD::RETURNADDR:
506 return lowerRETURNADDR(Op, DAG);
508 return lowerWRITE_REGISTER(Op, DAG);
510 return lowerINSERT_VECTOR_ELT(Op, DAG);
512 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
514 return lowerBUILD_VECTOR(Op, DAG);
516 return lowerCONCAT_VECTORS(Op, DAG);
518 return lowerVECTOR_SHUFFLE(Op, DAG);
519 case ISD::BITREVERSE:
520 return lowerBITREVERSE(Op, DAG);
522 return lowerSCALAR_TO_VECTOR(Op, DAG);
523 case ISD::PREFETCH:
524 return lowerPREFETCH(Op, DAG);
525 case ISD::SELECT:
526 return lowerSELECT(Op, DAG);
527 case ISD::BRCOND:
528 return lowerBRCOND(Op, DAG);
529 case ISD::FP_TO_FP16:
530 return lowerFP_TO_FP16(Op, DAG);
531 case ISD::FP16_TO_FP:
532 return lowerFP16_TO_FP(Op, DAG);
533 case ISD::FP_TO_BF16:
534 return lowerFP_TO_BF16(Op, DAG);
535 case ISD::BF16_TO_FP:
536 return lowerBF16_TO_FP(Op, DAG);
538 return lowerVECREDUCE_ADD(Op, DAG);
546 return lowerVECREDUCE(Op, DAG);
547 }
548 return SDValue();
549}
550
551// Lower vecreduce_add using vhaddw instructions.
552// For Example:
553// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
554// can be lowered to:
555// VHADDW_D_W vr0, vr0, vr0
556// VHADDW_Q_D vr0, vr0, vr0
557// VPICKVE2GR_D a0, vr0, 0
558// ADDI_W a0, a0, 0
559SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
560 SelectionDAG &DAG) const {
561
562 SDLoc DL(Op);
563 MVT OpVT = Op.getSimpleValueType();
564 SDValue Val = Op.getOperand(0);
565
566 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
567 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
568
569 unsigned LegalVecSize = 128;
570 bool isLASX256Vector =
571 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
572
573 // Ensure operand type legal or enable it legal.
574 while (!isTypeLegal(Val.getSimpleValueType())) {
575 Val = DAG.WidenVector(Val, DL);
576 }
577
578 // NumEles is designed for iterations count, v4i32 for LSX
579 // and v8i32 for LASX should have the same count.
580 if (isLASX256Vector) {
581 NumEles /= 2;
582 LegalVecSize = 256;
583 }
584
585 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
586 MVT IntTy = MVT::getIntegerVT(EleBits);
587 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
588 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
589 }
590
591 if (isLASX256Vector) {
592 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
593 DAG.getConstant(2, DL, MVT::i64));
594 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
595 }
596
597 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
598 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
599}
600
601// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
602// For Example:
603// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
604// can be lowered to:
605// VBSRL_V vr1, vr0, 8
606// VMAX_W vr0, vr1, vr0
607// VBSRL_V vr1, vr0, 4
608// VMAX_W vr0, vr1, vr0
609// VPICKVE2GR_W a0, vr0, 0
610// For 256 bit vector, it is illegal and will be spilt into
611// two 128 bit vector by default then processed by this.
612SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
613 SelectionDAG &DAG) const {
614 SDLoc DL(Op);
615
616 MVT OpVT = Op.getSimpleValueType();
617 SDValue Val = Op.getOperand(0);
618
619 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
620 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
621
622 // Ensure operand type legal or enable it legal.
623 while (!isTypeLegal(Val.getSimpleValueType())) {
624 Val = DAG.WidenVector(Val, DL);
625 }
626
627 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
628 MVT VecTy = Val.getSimpleValueType();
629
630 for (int i = NumEles; i > 1; i /= 2) {
631 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
632 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
633 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
634 }
635
636 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
637 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
638}
639
640SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
641 SelectionDAG &DAG) const {
642 unsigned IsData = Op.getConstantOperandVal(4);
643
644 // We don't support non-data prefetch.
645 // Just preserve the chain.
646 if (!IsData)
647 return Op.getOperand(0);
648
649 return Op;
650}
651
652// Return true if Val is equal to (setcc LHS, RHS, CC).
653// Return false if Val is the inverse of (setcc LHS, RHS, CC).
654// Otherwise, return std::nullopt.
655static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
656 ISD::CondCode CC, SDValue Val) {
657 assert(Val->getOpcode() == ISD::SETCC);
658 SDValue LHS2 = Val.getOperand(0);
659 SDValue RHS2 = Val.getOperand(1);
660 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
661
662 if (LHS == LHS2 && RHS == RHS2) {
663 if (CC == CC2)
664 return true;
665 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
666 return false;
667 } else if (LHS == RHS2 && RHS == LHS2) {
669 if (CC == CC2)
670 return true;
671 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
672 return false;
673 }
674
675 return std::nullopt;
676}
677
679 const LoongArchSubtarget &Subtarget) {
680 SDValue CondV = N->getOperand(0);
681 SDValue TrueV = N->getOperand(1);
682 SDValue FalseV = N->getOperand(2);
683 MVT VT = N->getSimpleValueType(0);
684 SDLoc DL(N);
685
686 // (select c, -1, y) -> -c | y
687 if (isAllOnesConstant(TrueV)) {
688 SDValue Neg = DAG.getNegative(CondV, DL, VT);
689 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
690 }
691 // (select c, y, -1) -> (c-1) | y
692 if (isAllOnesConstant(FalseV)) {
693 SDValue Neg =
694 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
695 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
696 }
697
698 // (select c, 0, y) -> (c-1) & y
699 if (isNullConstant(TrueV)) {
700 SDValue Neg =
701 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
702 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
703 }
704 // (select c, y, 0) -> -c & y
705 if (isNullConstant(FalseV)) {
706 SDValue Neg = DAG.getNegative(CondV, DL, VT);
707 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
708 }
709
710 // select c, ~x, x --> xor -c, x
711 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
712 const APInt &TrueVal = TrueV->getAsAPIntVal();
713 const APInt &FalseVal = FalseV->getAsAPIntVal();
714 if (~TrueVal == FalseVal) {
715 SDValue Neg = DAG.getNegative(CondV, DL, VT);
716 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
717 }
718 }
719
720 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
721 // when both truev and falsev are also setcc.
722 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
723 FalseV.getOpcode() == ISD::SETCC) {
724 SDValue LHS = CondV.getOperand(0);
725 SDValue RHS = CondV.getOperand(1);
726 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
727
728 // (select x, x, y) -> x | y
729 // (select !x, x, y) -> x & y
730 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
731 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
732 DAG.getFreeze(FalseV));
733 }
734 // (select x, y, x) -> x & y
735 // (select !x, y, x) -> x | y
736 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
737 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
738 DAG.getFreeze(TrueV), FalseV);
739 }
740 }
741
742 return SDValue();
743}
744
745// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
746// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
747// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
748// being `0` or `-1`. In such cases we can replace `select` with `and`.
749// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
750// than `c0`?
751static SDValue
753 const LoongArchSubtarget &Subtarget) {
754 unsigned SelOpNo = 0;
755 SDValue Sel = BO->getOperand(0);
756 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
757 SelOpNo = 1;
758 Sel = BO->getOperand(1);
759 }
760
761 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
762 return SDValue();
763
764 unsigned ConstSelOpNo = 1;
765 unsigned OtherSelOpNo = 2;
766 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
767 ConstSelOpNo = 2;
768 OtherSelOpNo = 1;
769 }
770 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
771 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
772 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
773 return SDValue();
774
775 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
776 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
777 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
778 return SDValue();
779
780 SDLoc DL(Sel);
781 EVT VT = BO->getValueType(0);
782
783 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
784 if (SelOpNo == 1)
785 std::swap(NewConstOps[0], NewConstOps[1]);
786
787 SDValue NewConstOp =
788 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
789 if (!NewConstOp)
790 return SDValue();
791
792 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
793 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
794 return SDValue();
795
796 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
797 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
798 if (SelOpNo == 1)
799 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
800 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
801
802 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
803 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
804 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
805}
806
807// Changes the condition code and swaps operands if necessary, so the SetCC
808// operation matches one of the comparisons supported directly by branches
809// in the LoongArch ISA. May adjust compares to favor compare with 0 over
810// compare with 1/-1.
811static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
812 ISD::CondCode &CC, SelectionDAG &DAG) {
813 // If this is a single bit test that can't be handled by ANDI, shift the
814 // bit to be tested to the MSB and perform a signed compare with 0.
815 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
816 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
817 isa<ConstantSDNode>(LHS.getOperand(1))) {
818 uint64_t Mask = LHS.getConstantOperandVal(1);
819 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
820 unsigned ShAmt = 0;
821 if (isPowerOf2_64(Mask)) {
822 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
823 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
824 } else {
825 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
826 }
827
828 LHS = LHS.getOperand(0);
829 if (ShAmt != 0)
830 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
831 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
832 return;
833 }
834 }
835
836 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
837 int64_t C = RHSC->getSExtValue();
838 switch (CC) {
839 default:
840 break;
841 case ISD::SETGT:
842 // Convert X > -1 to X >= 0.
843 if (C == -1) {
844 RHS = DAG.getConstant(0, DL, RHS.getValueType());
845 CC = ISD::SETGE;
846 return;
847 }
848 break;
849 case ISD::SETLT:
850 // Convert X < 1 to 0 >= X.
851 if (C == 1) {
852 RHS = LHS;
853 LHS = DAG.getConstant(0, DL, RHS.getValueType());
854 CC = ISD::SETGE;
855 return;
856 }
857 break;
858 }
859 }
860
861 switch (CC) {
862 default:
863 break;
864 case ISD::SETGT:
865 case ISD::SETLE:
866 case ISD::SETUGT:
867 case ISD::SETULE:
869 std::swap(LHS, RHS);
870 break;
871 }
872}
873
874SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
875 SelectionDAG &DAG) const {
876 SDValue CondV = Op.getOperand(0);
877 SDValue TrueV = Op.getOperand(1);
878 SDValue FalseV = Op.getOperand(2);
879 SDLoc DL(Op);
880 MVT VT = Op.getSimpleValueType();
881 MVT GRLenVT = Subtarget.getGRLenVT();
882
883 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
884 return V;
885
886 if (Op.hasOneUse()) {
887 unsigned UseOpc = Op->user_begin()->getOpcode();
888 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
889 SDNode *BinOp = *Op->user_begin();
890 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
891 DAG, Subtarget)) {
892 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
893 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
894 // may return a constant node and cause crash in lowerSELECT.
895 if (NewSel.getOpcode() == ISD::SELECT)
896 return lowerSELECT(NewSel, DAG);
897 return NewSel;
898 }
899 }
900 }
901
902 // If the condition is not an integer SETCC which operates on GRLenVT, we need
903 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
904 // (select condv, truev, falsev)
905 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
906 if (CondV.getOpcode() != ISD::SETCC ||
907 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
908 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
909 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
910
911 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
912
913 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
914 }
915
916 // If the CondV is the output of a SETCC node which operates on GRLenVT
917 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
918 // to take advantage of the integer compare+branch instructions. i.e.: (select
919 // (setcc lhs, rhs, cc), truev, falsev)
920 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
921 SDValue LHS = CondV.getOperand(0);
922 SDValue RHS = CondV.getOperand(1);
923 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
924
925 // Special case for a select of 2 constants that have a difference of 1.
926 // Normally this is done by DAGCombine, but if the select is introduced by
927 // type legalization or op legalization, we miss it. Restricting to SETLT
928 // case for now because that is what signed saturating add/sub need.
929 // FIXME: We don't need the condition to be SETLT or even a SETCC,
930 // but we would probably want to swap the true/false values if the condition
931 // is SETGE/SETLE to avoid an XORI.
932 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
933 CCVal == ISD::SETLT) {
934 const APInt &TrueVal = TrueV->getAsAPIntVal();
935 const APInt &FalseVal = FalseV->getAsAPIntVal();
936 if (TrueVal - 1 == FalseVal)
937 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
938 if (TrueVal + 1 == FalseVal)
939 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
940 }
941
942 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
943 // 1 < x ? x : 1 -> 0 < x ? x : 1
944 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
945 RHS == TrueV && LHS == FalseV) {
946 LHS = DAG.getConstant(0, DL, VT);
947 // 0 <u x is the same as x != 0.
948 if (CCVal == ISD::SETULT) {
949 std::swap(LHS, RHS);
950 CCVal = ISD::SETNE;
951 }
952 }
953
954 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
955 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
956 RHS == FalseV) {
957 RHS = DAG.getConstant(0, DL, VT);
958 }
959
960 SDValue TargetCC = DAG.getCondCode(CCVal);
961
962 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
963 // (select (setcc lhs, rhs, CC), constant, falsev)
964 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
965 std::swap(TrueV, FalseV);
966 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
967 }
968
969 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
970 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
971}
972
973SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
974 SelectionDAG &DAG) const {
975 SDValue CondV = Op.getOperand(1);
976 SDLoc DL(Op);
977 MVT GRLenVT = Subtarget.getGRLenVT();
978
979 if (CondV.getOpcode() == ISD::SETCC) {
980 if (CondV.getOperand(0).getValueType() == GRLenVT) {
981 SDValue LHS = CondV.getOperand(0);
982 SDValue RHS = CondV.getOperand(1);
983 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
984
985 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
986
987 SDValue TargetCC = DAG.getCondCode(CCVal);
988 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
989 Op.getOperand(0), LHS, RHS, TargetCC,
990 Op.getOperand(2));
991 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
992 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
993 Op.getOperand(0), CondV, Op.getOperand(2));
994 }
995 }
996
997 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
998 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
999 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1000}
1001
1002SDValue
1003LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1004 SelectionDAG &DAG) const {
1005 SDLoc DL(Op);
1006 MVT OpVT = Op.getSimpleValueType();
1007
1008 SDValue Vector = DAG.getUNDEF(OpVT);
1009 SDValue Val = Op.getOperand(0);
1010 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1011
1012 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1013}
1014
1015SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1016 SelectionDAG &DAG) const {
1017 EVT ResTy = Op->getValueType(0);
1018 SDValue Src = Op->getOperand(0);
1019 SDLoc DL(Op);
1020
1021 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1022 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1023 unsigned int NewEltNum = NewVT.getVectorNumElements();
1024
1025 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1026
1028 for (unsigned int i = 0; i < NewEltNum; i++) {
1029 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1030 DAG.getConstant(i, DL, MVT::i64));
1031 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1032 ? (unsigned)LoongArchISD::BITREV_8B
1034 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1035 }
1036 SDValue Res =
1037 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1038
1039 switch (ResTy.getSimpleVT().SimpleTy) {
1040 default:
1041 return SDValue();
1042 case MVT::v16i8:
1043 case MVT::v32i8:
1044 return Res;
1045 case MVT::v8i16:
1046 case MVT::v16i16:
1047 case MVT::v4i32:
1048 case MVT::v8i32: {
1050 for (unsigned int i = 0; i < NewEltNum; i++)
1051 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1052 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1053 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1054 }
1055 }
1056}
1057
1058// Widen element type to get a new mask value (if possible).
1059// For example:
1060// shufflevector <4 x i32> %a, <4 x i32> %b,
1061// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1062// is equivalent to:
1063// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1064// can be lowered to:
1065// VPACKOD_D vr0, vr0, vr1
1067 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1068 unsigned EltBits = VT.getScalarSizeInBits();
1069
1070 if (EltBits > 32 || EltBits == 1)
1071 return SDValue();
1072
1073 SmallVector<int, 8> NewMask;
1074 if (widenShuffleMaskElts(Mask, NewMask)) {
1075 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1076 : MVT::getIntegerVT(EltBits * 2);
1077 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1078 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1079 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1080 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1081 return DAG.getBitcast(
1082 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1083 }
1084 }
1085
1086 return SDValue();
1087}
1088
1089/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1090/// instruction.
1091// The funciton matches elements from one of the input vector shuffled to the
1092// left or right with zeroable elements 'shifted in'. It handles both the
1093// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1094// lane.
1095// Mostly copied from X86.
1096static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1097 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1098 int MaskOffset, const APInt &Zeroable) {
1099 int Size = Mask.size();
1100 unsigned SizeInBits = Size * ScalarSizeInBits;
1101
1102 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1103 for (int i = 0; i < Size; i += Scale)
1104 for (int j = 0; j < Shift; ++j)
1105 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1106 return false;
1107
1108 return true;
1109 };
1110
1111 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1112 int Step = 1) {
1113 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1114 if (!(Mask[i] == -1 || Mask[i] == Low))
1115 return false;
1116 return true;
1117 };
1118
1119 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1120 for (int i = 0; i != Size; i += Scale) {
1121 unsigned Pos = Left ? i + Shift : i;
1122 unsigned Low = Left ? i : i + Shift;
1123 unsigned Len = Scale - Shift;
1124 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1125 return -1;
1126 }
1127
1128 int ShiftEltBits = ScalarSizeInBits * Scale;
1129 bool ByteShift = ShiftEltBits > 64;
1130 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1131 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1132 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1133
1134 // Normalize the scale for byte shifts to still produce an i64 element
1135 // type.
1136 Scale = ByteShift ? Scale / 2 : Scale;
1137
1138 // We need to round trip through the appropriate type for the shift.
1139 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1140 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1141 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1142 return (int)ShiftAmt;
1143 };
1144
1145 unsigned MaxWidth = 128;
1146 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1147 for (int Shift = 1; Shift != Scale; ++Shift)
1148 for (bool Left : {true, false})
1149 if (CheckZeros(Shift, Scale, Left)) {
1150 int ShiftAmt = MatchShift(Shift, Scale, Left);
1151 if (0 < ShiftAmt)
1152 return ShiftAmt;
1153 }
1154
1155 // no match
1156 return -1;
1157}
1158
1159/// Lower VECTOR_SHUFFLE as shift (if possible).
1160///
1161/// For example:
1162/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1163/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1164/// is lowered to:
1165/// (VBSLL_V $v0, $v0, 4)
1166///
1167/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1168/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1169/// is lowered to:
1170/// (VSLLI_D $v0, $v0, 32)
1172 MVT VT, SDValue V1, SDValue V2,
1173 SelectionDAG &DAG,
1174 const LoongArchSubtarget &Subtarget,
1175 const APInt &Zeroable) {
1176 int Size = Mask.size();
1177 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1178
1179 MVT ShiftVT;
1180 SDValue V = V1;
1181 unsigned Opcode;
1182
1183 // Try to match shuffle against V1 shift.
1184 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1185 Mask, 0, Zeroable);
1186
1187 // If V1 failed, try to match shuffle against V2 shift.
1188 if (ShiftAmt < 0) {
1189 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1190 Mask, Size, Zeroable);
1191 V = V2;
1192 }
1193
1194 if (ShiftAmt < 0)
1195 return SDValue();
1196
1197 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1198 "Illegal integer vector type");
1199 V = DAG.getBitcast(ShiftVT, V);
1200 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1201 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1202 return DAG.getBitcast(VT, V);
1203}
1204
1205/// Determine whether a range fits a regular pattern of values.
1206/// This function accounts for the possibility of jumping over the End iterator.
1207template <typename ValType>
1208static bool
1210 unsigned CheckStride,
1212 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1213 auto &I = Begin;
1214
1215 while (I != End) {
1216 if (*I != -1 && *I != ExpectedIndex)
1217 return false;
1218 ExpectedIndex += ExpectedIndexStride;
1219
1220 // Incrementing past End is undefined behaviour so we must increment one
1221 // step at a time and check for End at each step.
1222 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1223 ; // Empty loop body.
1224 }
1225 return true;
1226}
1227
1228/// Compute whether each element of a shuffle is zeroable.
1229///
1230/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1232 SDValue V2, APInt &KnownUndef,
1233 APInt &KnownZero) {
1234 int Size = Mask.size();
1235 KnownUndef = KnownZero = APInt::getZero(Size);
1236
1237 V1 = peekThroughBitcasts(V1);
1238 V2 = peekThroughBitcasts(V2);
1239
1240 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1241 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1242
1243 int VectorSizeInBits = V1.getValueSizeInBits();
1244 int ScalarSizeInBits = VectorSizeInBits / Size;
1245 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1246 (void)ScalarSizeInBits;
1247
1248 for (int i = 0; i < Size; ++i) {
1249 int M = Mask[i];
1250 if (M < 0) {
1251 KnownUndef.setBit(i);
1252 continue;
1253 }
1254 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1255 KnownZero.setBit(i);
1256 continue;
1257 }
1258 }
1259}
1260
1261/// Test whether a shuffle mask is equivalent within each sub-lane.
1262///
1263/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1264/// non-trivial to compute in the face of undef lanes. The representation is
1265/// suitable for use with existing 128-bit shuffles as entries from the second
1266/// vector have been remapped to [LaneSize, 2*LaneSize).
1267static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1268 ArrayRef<int> Mask,
1269 SmallVectorImpl<int> &RepeatedMask) {
1270 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1271 RepeatedMask.assign(LaneSize, -1);
1272 int Size = Mask.size();
1273 for (int i = 0; i < Size; ++i) {
1274 assert(Mask[i] == -1 || Mask[i] >= 0);
1275 if (Mask[i] < 0)
1276 continue;
1277 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1278 // This entry crosses lanes, so there is no way to model this shuffle.
1279 return false;
1280
1281 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1282 // Adjust second vector indices to start at LaneSize instead of Size.
1283 int LocalM =
1284 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1285 if (RepeatedMask[i % LaneSize] < 0)
1286 // This is the first non-undef entry in this slot of a 128-bit lane.
1287 RepeatedMask[i % LaneSize] = LocalM;
1288 else if (RepeatedMask[i % LaneSize] != LocalM)
1289 // Found a mismatch with the repeated mask.
1290 return false;
1291 }
1292 return true;
1293}
1294
1295/// Attempts to match vector shuffle as byte rotation.
1297 ArrayRef<int> Mask) {
1298
1299 SDValue Lo, Hi;
1300 SmallVector<int, 16> RepeatedMask;
1301
1302 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1303 return -1;
1304
1305 int NumElts = RepeatedMask.size();
1306 int Rotation = 0;
1307 int Scale = 16 / NumElts;
1308
1309 for (int i = 0; i < NumElts; ++i) {
1310 int M = RepeatedMask[i];
1311 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1312 "Unexpected mask index.");
1313 if (M < 0)
1314 continue;
1315
1316 // Determine where a rotated vector would have started.
1317 int StartIdx = i - (M % NumElts);
1318 if (StartIdx == 0)
1319 return -1;
1320
1321 // If we found the tail of a vector the rotation must be the missing
1322 // front. If we found the head of a vector, it must be how much of the
1323 // head.
1324 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1325
1326 if (Rotation == 0)
1327 Rotation = CandidateRotation;
1328 else if (Rotation != CandidateRotation)
1329 return -1;
1330
1331 // Compute which value this mask is pointing at.
1332 SDValue MaskV = M < NumElts ? V1 : V2;
1333
1334 // Compute which of the two target values this index should be assigned
1335 // to. This reflects whether the high elements are remaining or the low
1336 // elements are remaining.
1337 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1338
1339 // Either set up this value if we've not encountered it before, or check
1340 // that it remains consistent.
1341 if (!TargetV)
1342 TargetV = MaskV;
1343 else if (TargetV != MaskV)
1344 return -1;
1345 }
1346
1347 // Check that we successfully analyzed the mask, and normalize the results.
1348 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1349 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1350 if (!Lo)
1351 Lo = Hi;
1352 else if (!Hi)
1353 Hi = Lo;
1354
1355 V1 = Lo;
1356 V2 = Hi;
1357
1358 return Rotation * Scale;
1359}
1360
1361/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1362///
1363/// For example:
1364/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1365/// <2 x i32> <i32 3, i32 0>
1366/// is lowered to:
1367/// (VBSRL_V $v1, $v1, 8)
1368/// (VBSLL_V $v0, $v0, 8)
1369/// (VOR_V $v0, $V0, $v1)
1370static SDValue
1372 SDValue V1, SDValue V2, SelectionDAG &DAG,
1373 const LoongArchSubtarget &Subtarget) {
1374
1375 SDValue Lo = V1, Hi = V2;
1376 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1377 if (ByteRotation <= 0)
1378 return SDValue();
1379
1380 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1381 Lo = DAG.getBitcast(ByteVT, Lo);
1382 Hi = DAG.getBitcast(ByteVT, Hi);
1383
1384 int LoByteShift = 16 - ByteRotation;
1385 int HiByteShift = ByteRotation;
1386 MVT GRLenVT = Subtarget.getGRLenVT();
1387
1388 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1389 DAG.getConstant(LoByteShift, DL, GRLenVT));
1390 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1391 DAG.getConstant(HiByteShift, DL, GRLenVT));
1392 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1393}
1394
1395/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1396///
1397/// For example:
1398/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1399/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1400/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1401/// is lowered to:
1402/// (VREPLI $v1, 0)
1403/// (VILVL $v0, $v1, $v0)
1405 ArrayRef<int> Mask, MVT VT,
1406 SDValue V1, SDValue V2,
1407 SelectionDAG &DAG,
1408 const APInt &Zeroable) {
1409 int Bits = VT.getSizeInBits();
1410 int EltBits = VT.getScalarSizeInBits();
1411 int NumElements = VT.getVectorNumElements();
1412
1413 if (Zeroable.isAllOnes())
1414 return DAG.getConstant(0, DL, VT);
1415
1416 // Define a helper function to check a particular ext-scale and lower to it if
1417 // valid.
1418 auto Lower = [&](int Scale) -> SDValue {
1419 SDValue InputV;
1420 bool AnyExt = true;
1421 int Offset = 0;
1422 for (int i = 0; i < NumElements; i++) {
1423 int M = Mask[i];
1424 if (M < 0)
1425 continue;
1426 if (i % Scale != 0) {
1427 // Each of the extended elements need to be zeroable.
1428 if (!Zeroable[i])
1429 return SDValue();
1430
1431 AnyExt = false;
1432 continue;
1433 }
1434
1435 // Each of the base elements needs to be consecutive indices into the
1436 // same input vector.
1437 SDValue V = M < NumElements ? V1 : V2;
1438 M = M % NumElements;
1439 if (!InputV) {
1440 InputV = V;
1441 Offset = M - (i / Scale);
1442
1443 // These offset can't be handled
1444 if (Offset % (NumElements / Scale))
1445 return SDValue();
1446 } else if (InputV != V)
1447 return SDValue();
1448
1449 if (M != (Offset + (i / Scale)))
1450 return SDValue(); // Non-consecutive strided elements.
1451 }
1452
1453 // If we fail to find an input, we have a zero-shuffle which should always
1454 // have already been handled.
1455 if (!InputV)
1456 return SDValue();
1457
1458 do {
1459 unsigned VilVLoHi = LoongArchISD::VILVL;
1460 if (Offset >= (NumElements / 2)) {
1461 VilVLoHi = LoongArchISD::VILVH;
1462 Offset -= (NumElements / 2);
1463 }
1464
1465 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1466 SDValue Ext =
1467 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1468 InputV = DAG.getBitcast(InputVT, InputV);
1469 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1470 Scale /= 2;
1471 EltBits *= 2;
1472 NumElements /= 2;
1473 } while (Scale > 1);
1474 return DAG.getBitcast(VT, InputV);
1475 };
1476
1477 // Each iteration, try extending the elements half as much, but into twice as
1478 // many elements.
1479 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1480 NumExtElements *= 2) {
1481 if (SDValue V = Lower(NumElements / NumExtElements))
1482 return V;
1483 }
1484 return SDValue();
1485}
1486
1487/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1488///
1489/// VREPLVEI performs vector broadcast based on an element specified by an
1490/// integer immediate, with its mask being similar to:
1491/// <x, x, x, ...>
1492/// where x is any valid index.
1493///
1494/// When undef's appear in the mask they are treated as if they were whatever
1495/// value is necessary in order to fit the above form.
1496static SDValue
1498 SDValue V1, SDValue V2, SelectionDAG &DAG,
1499 const LoongArchSubtarget &Subtarget) {
1500 int SplatIndex = -1;
1501 for (const auto &M : Mask) {
1502 if (M != -1) {
1503 SplatIndex = M;
1504 break;
1505 }
1506 }
1507
1508 if (SplatIndex == -1)
1509 return DAG.getUNDEF(VT);
1510
1511 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1512 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1513 APInt Imm(64, SplatIndex);
1514 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1515 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1516 }
1517
1518 return SDValue();
1519}
1520
1521/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1522///
1523/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1524/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1525///
1526/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1527/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1528/// When undef's appear they are treated as if they were whatever value is
1529/// necessary in order to fit the above forms.
1530///
1531/// For example:
1532/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1533/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1534/// i32 7, i32 6, i32 5, i32 4>
1535/// is lowered to:
1536/// (VSHUF4I_H $v0, $v1, 27)
1537/// where the 27 comes from:
1538/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1539static SDValue
1541 SDValue V1, SDValue V2, SelectionDAG &DAG,
1542 const LoongArchSubtarget &Subtarget) {
1543
1544 unsigned SubVecSize = 4;
1545 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1546 SubVecSize = 2;
1547
1548 int SubMask[4] = {-1, -1, -1, -1};
1549 for (unsigned i = 0; i < SubVecSize; ++i) {
1550 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1551 int M = Mask[j];
1552
1553 // Convert from vector index to 4-element subvector index
1554 // If an index refers to an element outside of the subvector then give up
1555 if (M != -1) {
1556 M -= 4 * (j / SubVecSize);
1557 if (M < 0 || M >= 4)
1558 return SDValue();
1559 }
1560
1561 // If the mask has an undef, replace it with the current index.
1562 // Note that it might still be undef if the current index is also undef
1563 if (SubMask[i] == -1)
1564 SubMask[i] = M;
1565 // Check that non-undef values are the same as in the mask. If they
1566 // aren't then give up
1567 else if (M != -1 && M != SubMask[i])
1568 return SDValue();
1569 }
1570 }
1571
1572 // Calculate the immediate. Replace any remaining undefs with zero
1573 APInt Imm(64, 0);
1574 for (int i = SubVecSize - 1; i >= 0; --i) {
1575 int M = SubMask[i];
1576
1577 if (M == -1)
1578 M = 0;
1579
1580 Imm <<= 2;
1581 Imm |= M & 0x3;
1582 }
1583
1584 MVT GRLenVT = Subtarget.getGRLenVT();
1585
1586 // Return vshuf4i.d
1587 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1588 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1589 DAG.getConstant(Imm, DL, GRLenVT));
1590
1591 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1592 DAG.getConstant(Imm, DL, GRLenVT));
1593}
1594
1595/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1596///
1597/// VPACKEV interleaves the even elements from each vector.
1598///
1599/// It is possible to lower into VPACKEV when the mask consists of two of the
1600/// following forms interleaved:
1601/// <0, 2, 4, ...>
1602/// <n, n+2, n+4, ...>
1603/// where n is the number of elements in the vector.
1604/// For example:
1605/// <0, 0, 2, 2, 4, 4, ...>
1606/// <0, n, 2, n+2, 4, n+4, ...>
1607///
1608/// When undef's appear in the mask they are treated as if they were whatever
1609/// value is necessary in order to fit the above forms.
1611 MVT VT, SDValue V1, SDValue V2,
1612 SelectionDAG &DAG) {
1613
1614 const auto &Begin = Mask.begin();
1615 const auto &End = Mask.end();
1616 SDValue OriV1 = V1, OriV2 = V2;
1617
1618 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1619 V1 = OriV1;
1620 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1621 V1 = OriV2;
1622 else
1623 return SDValue();
1624
1625 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1626 V2 = OriV1;
1627 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1628 V2 = OriV2;
1629 else
1630 return SDValue();
1631
1632 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1633}
1634
1635/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1636///
1637/// VPACKOD interleaves the odd elements from each vector.
1638///
1639/// It is possible to lower into VPACKOD when the mask consists of two of the
1640/// following forms interleaved:
1641/// <1, 3, 5, ...>
1642/// <n+1, n+3, n+5, ...>
1643/// where n is the number of elements in the vector.
1644/// For example:
1645/// <1, 1, 3, 3, 5, 5, ...>
1646/// <1, n+1, 3, n+3, 5, n+5, ...>
1647///
1648/// When undef's appear in the mask they are treated as if they were whatever
1649/// value is necessary in order to fit the above forms.
1651 MVT VT, SDValue V1, SDValue V2,
1652 SelectionDAG &DAG) {
1653
1654 const auto &Begin = Mask.begin();
1655 const auto &End = Mask.end();
1656 SDValue OriV1 = V1, OriV2 = V2;
1657
1658 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1659 V1 = OriV1;
1660 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1661 V1 = OriV2;
1662 else
1663 return SDValue();
1664
1665 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1666 V2 = OriV1;
1667 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1668 V2 = OriV2;
1669 else
1670 return SDValue();
1671
1672 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1673}
1674
1675/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1676///
1677/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1678/// of each vector.
1679///
1680/// It is possible to lower into VILVH when the mask consists of two of the
1681/// following forms interleaved:
1682/// <x, x+1, x+2, ...>
1683/// <n+x, n+x+1, n+x+2, ...>
1684/// where n is the number of elements in the vector and x is half n.
1685/// For example:
1686/// <x, x, x+1, x+1, x+2, x+2, ...>
1687/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1688///
1689/// When undef's appear in the mask they are treated as if they were whatever
1690/// value is necessary in order to fit the above forms.
1692 MVT VT, SDValue V1, SDValue V2,
1693 SelectionDAG &DAG) {
1694
1695 const auto &Begin = Mask.begin();
1696 const auto &End = Mask.end();
1697 unsigned HalfSize = Mask.size() / 2;
1698 SDValue OriV1 = V1, OriV2 = V2;
1699
1700 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1701 V1 = OriV1;
1702 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1703 V1 = OriV2;
1704 else
1705 return SDValue();
1706
1707 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1708 V2 = OriV1;
1709 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1710 1))
1711 V2 = OriV2;
1712 else
1713 return SDValue();
1714
1715 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1716}
1717
1718/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1719///
1720/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1721/// of each vector.
1722///
1723/// It is possible to lower into VILVL when the mask consists of two of the
1724/// following forms interleaved:
1725/// <0, 1, 2, ...>
1726/// <n, n+1, n+2, ...>
1727/// where n is the number of elements in the vector.
1728/// For example:
1729/// <0, 0, 1, 1, 2, 2, ...>
1730/// <0, n, 1, n+1, 2, n+2, ...>
1731///
1732/// When undef's appear in the mask they are treated as if they were whatever
1733/// value is necessary in order to fit the above forms.
1735 MVT VT, SDValue V1, SDValue V2,
1736 SelectionDAG &DAG) {
1737
1738 const auto &Begin = Mask.begin();
1739 const auto &End = Mask.end();
1740 SDValue OriV1 = V1, OriV2 = V2;
1741
1742 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1743 V1 = OriV1;
1744 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1745 V1 = OriV2;
1746 else
1747 return SDValue();
1748
1749 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1750 V2 = OriV1;
1751 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1752 V2 = OriV2;
1753 else
1754 return SDValue();
1755
1756 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1757}
1758
1759/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1760///
1761/// VPICKEV copies the even elements of each vector into the result vector.
1762///
1763/// It is possible to lower into VPICKEV when the mask consists of two of the
1764/// following forms concatenated:
1765/// <0, 2, 4, ...>
1766/// <n, n+2, n+4, ...>
1767/// where n is the number of elements in the vector.
1768/// For example:
1769/// <0, 2, 4, ..., 0, 2, 4, ...>
1770/// <0, 2, 4, ..., n, n+2, n+4, ...>
1771///
1772/// When undef's appear in the mask they are treated as if they were whatever
1773/// value is necessary in order to fit the above forms.
1775 MVT VT, SDValue V1, SDValue V2,
1776 SelectionDAG &DAG) {
1777
1778 const auto &Begin = Mask.begin();
1779 const auto &Mid = Mask.begin() + Mask.size() / 2;
1780 const auto &End = Mask.end();
1781 SDValue OriV1 = V1, OriV2 = V2;
1782
1783 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1784 V1 = OriV1;
1785 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1786 V1 = OriV2;
1787 else
1788 return SDValue();
1789
1790 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1791 V2 = OriV1;
1792 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1793 V2 = OriV2;
1794
1795 else
1796 return SDValue();
1797
1798 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1799}
1800
1801/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1802///
1803/// VPICKOD copies the odd elements of each vector into the result vector.
1804///
1805/// It is possible to lower into VPICKOD when the mask consists of two of the
1806/// following forms concatenated:
1807/// <1, 3, 5, ...>
1808/// <n+1, n+3, n+5, ...>
1809/// where n is the number of elements in the vector.
1810/// For example:
1811/// <1, 3, 5, ..., 1, 3, 5, ...>
1812/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1813///
1814/// When undef's appear in the mask they are treated as if they were whatever
1815/// value is necessary in order to fit the above forms.
1817 MVT VT, SDValue V1, SDValue V2,
1818 SelectionDAG &DAG) {
1819
1820 const auto &Begin = Mask.begin();
1821 const auto &Mid = Mask.begin() + Mask.size() / 2;
1822 const auto &End = Mask.end();
1823 SDValue OriV1 = V1, OriV2 = V2;
1824
1825 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1826 V1 = OriV1;
1827 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1828 V1 = OriV2;
1829 else
1830 return SDValue();
1831
1832 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1833 V2 = OriV1;
1834 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1835 V2 = OriV2;
1836 else
1837 return SDValue();
1838
1839 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1840}
1841
1842/// Lower VECTOR_SHUFFLE into VSHUF.
1843///
1844/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1845/// adding it as an operand to the resulting VSHUF.
1847 MVT VT, SDValue V1, SDValue V2,
1848 SelectionDAG &DAG) {
1849
1851 for (auto M : Mask)
1852 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1853
1854 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1855 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1856
1857 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1858 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1859 // VSHF concatenates the vectors in a bitwise fashion:
1860 // <0b00, 0b01> + <0b10, 0b11> ->
1861 // 0b0100 + 0b1110 -> 0b01001110
1862 // <0b10, 0b11, 0b00, 0b01>
1863 // We must therefore swap the operands to get the correct result.
1864 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1865}
1866
1867/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1868///
1869/// This routine breaks down the specific type of 128-bit shuffle and
1870/// dispatches to the lowering routines accordingly.
1872 SDValue V1, SDValue V2, SelectionDAG &DAG,
1873 const LoongArchSubtarget &Subtarget) {
1874 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1875 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1876 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1877 "Vector type is unsupported for lsx!");
1879 "Two operands have different types!");
1880 assert(VT.getVectorNumElements() == Mask.size() &&
1881 "Unexpected mask size for shuffle!");
1882 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1883
1884 APInt KnownUndef, KnownZero;
1885 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1886 APInt Zeroable = KnownUndef | KnownZero;
1887
1888 SDValue Result;
1889 // TODO: Add more comparison patterns.
1890 if (V2.isUndef()) {
1891 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1892 Subtarget)))
1893 return Result;
1894 if ((Result =
1895 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1896 return Result;
1897
1898 // TODO: This comment may be enabled in the future to better match the
1899 // pattern for instruction selection.
1900 /* V2 = V1; */
1901 }
1902
1903 // It is recommended not to change the pattern comparison order for better
1904 // performance.
1905 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1906 return Result;
1907 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1908 return Result;
1909 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1910 return Result;
1911 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1912 return Result;
1913 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1914 return Result;
1915 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1916 return Result;
1917 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1918 (Result =
1919 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1920 return Result;
1921 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1922 Zeroable)))
1923 return Result;
1924 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1925 Zeroable)))
1926 return Result;
1927 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1928 Subtarget)))
1929 return Result;
1930 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1931 return NewShuffle;
1932 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1933 return Result;
1934 return SDValue();
1935}
1936
1937/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1938///
1939/// It is a XVREPLVEI when the mask is:
1940/// <x, x, x, ..., x+n, x+n, x+n, ...>
1941/// where the number of x is equal to n and n is half the length of vector.
1942///
1943/// When undef's appear in the mask they are treated as if they were whatever
1944/// value is necessary in order to fit the above form.
1945static SDValue
1947 SDValue V1, SDValue V2, SelectionDAG &DAG,
1948 const LoongArchSubtarget &Subtarget) {
1949 int SplatIndex = -1;
1950 for (const auto &M : Mask) {
1951 if (M != -1) {
1952 SplatIndex = M;
1953 break;
1954 }
1955 }
1956
1957 if (SplatIndex == -1)
1958 return DAG.getUNDEF(VT);
1959
1960 const auto &Begin = Mask.begin();
1961 const auto &End = Mask.end();
1962 unsigned HalfSize = Mask.size() / 2;
1963
1964 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1965 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1966 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1967 0)) {
1968 APInt Imm(64, SplatIndex);
1969 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1970 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1971 }
1972
1973 return SDValue();
1974}
1975
1976/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1977static SDValue
1979 SDValue V1, SDValue V2, SelectionDAG &DAG,
1980 const LoongArchSubtarget &Subtarget) {
1981 // When the size is less than or equal to 4, lower cost instructions may be
1982 // used.
1983 if (Mask.size() <= 4)
1984 return SDValue();
1985 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1986}
1987
1988/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1990 MVT VT, SDValue V1, SDValue V2,
1991 SelectionDAG &DAG) {
1992 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1993}
1994
1995/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1997 MVT VT, SDValue V1, SDValue V2,
1998 SelectionDAG &DAG) {
1999 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2000}
2001
2002/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2004 MVT VT, SDValue V1, SDValue V2,
2005 SelectionDAG &DAG) {
2006
2007 const auto &Begin = Mask.begin();
2008 const auto &End = Mask.end();
2009 unsigned HalfSize = Mask.size() / 2;
2010 unsigned LeftSize = HalfSize / 2;
2011 SDValue OriV1 = V1, OriV2 = V2;
2012
2013 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2014 1) &&
2015 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2016 V1 = OriV1;
2017 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2018 Mask.size() + HalfSize - LeftSize, 1) &&
2019 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2020 Mask.size() + HalfSize + LeftSize, 1))
2021 V1 = OriV2;
2022 else
2023 return SDValue();
2024
2025 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2026 1) &&
2027 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2028 1))
2029 V2 = OriV1;
2030 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2031 Mask.size() + HalfSize - LeftSize, 1) &&
2032 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2033 Mask.size() + HalfSize + LeftSize, 1))
2034 V2 = OriV2;
2035 else
2036 return SDValue();
2037
2038 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2039}
2040
2041/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2043 MVT VT, SDValue V1, SDValue V2,
2044 SelectionDAG &DAG) {
2045
2046 const auto &Begin = Mask.begin();
2047 const auto &End = Mask.end();
2048 unsigned HalfSize = Mask.size() / 2;
2049 SDValue OriV1 = V1, OriV2 = V2;
2050
2051 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2052 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2053 V1 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2055 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2056 Mask.size() + HalfSize, 1))
2057 V1 = OriV2;
2058 else
2059 return SDValue();
2060
2061 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2062 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2063 V2 = OriV1;
2064 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2065 1) &&
2066 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2067 Mask.size() + HalfSize, 1))
2068 V2 = OriV2;
2069 else
2070 return SDValue();
2071
2072 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2073}
2074
2075/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2077 MVT VT, SDValue V1, SDValue V2,
2078 SelectionDAG &DAG) {
2079
2080 const auto &Begin = Mask.begin();
2081 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2082 const auto &Mid = Mask.begin() + Mask.size() / 2;
2083 const auto &RightMid = Mask.end() - Mask.size() / 4;
2084 const auto &End = Mask.end();
2085 unsigned HalfSize = Mask.size() / 2;
2086 SDValue OriV1 = V1, OriV2 = V2;
2087
2088 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2089 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2090 V1 = OriV1;
2091 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2092 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2093 V1 = OriV2;
2094 else
2095 return SDValue();
2096
2097 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2098 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2099 V2 = OriV1;
2100 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2101 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2102 V2 = OriV2;
2103
2104 else
2105 return SDValue();
2106
2107 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2108}
2109
2110/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2112 MVT VT, SDValue V1, SDValue V2,
2113 SelectionDAG &DAG) {
2114
2115 const auto &Begin = Mask.begin();
2116 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2117 const auto &Mid = Mask.begin() + Mask.size() / 2;
2118 const auto &RightMid = Mask.end() - Mask.size() / 4;
2119 const auto &End = Mask.end();
2120 unsigned HalfSize = Mask.size() / 2;
2121 SDValue OriV1 = V1, OriV2 = V2;
2122
2123 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2124 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2125 V1 = OriV1;
2126 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2127 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2128 2))
2129 V1 = OriV2;
2130 else
2131 return SDValue();
2132
2133 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2134 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2135 V2 = OriV1;
2136 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2137 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2138 2))
2139 V2 = OriV2;
2140 else
2141 return SDValue();
2142
2143 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2144}
2145
2146/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2148 MVT VT, SDValue V1, SDValue V2,
2149 SelectionDAG &DAG) {
2150
2151 int MaskSize = Mask.size();
2152 int HalfSize = Mask.size() / 2;
2153 const auto &Begin = Mask.begin();
2154 const auto &Mid = Mask.begin() + HalfSize;
2155 const auto &End = Mask.end();
2156
2157 // VECTOR_SHUFFLE concatenates the vectors:
2158 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2159 // shuffling ->
2160 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2161 //
2162 // XVSHUF concatenates the vectors:
2163 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2164 // shuffling ->
2165 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2166 SmallVector<SDValue, 8> MaskAlloc;
2167 for (auto it = Begin; it < Mid; it++) {
2168 if (*it < 0) // UNDEF
2169 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2170 else if ((*it >= 0 && *it < HalfSize) ||
2171 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2172 int M = *it < HalfSize ? *it : *it - HalfSize;
2173 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2174 } else
2175 return SDValue();
2176 }
2177 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2178
2179 for (auto it = Mid; it < End; it++) {
2180 if (*it < 0) // UNDEF
2181 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2182 else if ((*it >= HalfSize && *it < MaskSize) ||
2183 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2184 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2185 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2186 } else
2187 return SDValue();
2188 }
2189 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2190
2191 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2192 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2193 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2194}
2195
2196/// Shuffle vectors by lane to generate more optimized instructions.
2197/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2198///
2199/// Therefore, except for the following four cases, other cases are regarded
2200/// as cross-lane shuffles, where optimization is relatively limited.
2201///
2202/// - Shuffle high, low lanes of two inputs vector
2203/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2204/// - Shuffle low, high lanes of two inputs vector
2205/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2206/// - Shuffle low, low lanes of two inputs vector
2207/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2208/// - Shuffle high, high lanes of two inputs vector
2209/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2210///
2211/// The first case is the closest to LoongArch instructions and the other
2212/// cases need to be converted to it for processing.
2213///
2214/// This function may modify V1, V2 and Mask
2216 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2217 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2218
2219 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2220
2221 int MaskSize = Mask.size();
2222 int HalfSize = Mask.size() / 2;
2223 MVT GRLenVT = Subtarget.getGRLenVT();
2224
2225 HalfMaskType preMask = None, postMask = None;
2226
2227 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2228 return M < 0 || (M >= 0 && M < HalfSize) ||
2229 (M >= MaskSize && M < MaskSize + HalfSize);
2230 }))
2231 preMask = HighLaneTy;
2232 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2233 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2234 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2235 }))
2236 preMask = LowLaneTy;
2237
2238 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2239 return M < 0 || (M >= 0 && M < HalfSize) ||
2240 (M >= MaskSize && M < MaskSize + HalfSize);
2241 }))
2242 postMask = HighLaneTy;
2243 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2244 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2245 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2246 }))
2247 postMask = LowLaneTy;
2248
2249 // The pre-half of mask is high lane type, and the post-half of mask
2250 // is low lane type, which is closest to the LoongArch instructions.
2251 //
2252 // Note: In the LoongArch architecture, the high lane of mask corresponds
2253 // to the lower 128-bit of vector register, and the low lane of mask
2254 // corresponds the higher 128-bit of vector register.
2255 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2256 return;
2257 }
2258 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2259 V1 = DAG.getBitcast(MVT::v4i64, V1);
2260 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2261 DAG.getConstant(0b01001110, DL, GRLenVT));
2262 V1 = DAG.getBitcast(VT, V1);
2263
2264 if (!V2.isUndef()) {
2265 V2 = DAG.getBitcast(MVT::v4i64, V2);
2266 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2267 DAG.getConstant(0b01001110, DL, GRLenVT));
2268 V2 = DAG.getBitcast(VT, V2);
2269 }
2270
2271 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2272 *it = *it < 0 ? *it : *it - HalfSize;
2273 }
2274 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2275 *it = *it < 0 ? *it : *it + HalfSize;
2276 }
2277 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2278 V1 = DAG.getBitcast(MVT::v4i64, V1);
2279 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2280 DAG.getConstant(0b11101110, DL, GRLenVT));
2281 V1 = DAG.getBitcast(VT, V1);
2282
2283 if (!V2.isUndef()) {
2284 V2 = DAG.getBitcast(MVT::v4i64, V2);
2285 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2286 DAG.getConstant(0b11101110, DL, GRLenVT));
2287 V2 = DAG.getBitcast(VT, V2);
2288 }
2289
2290 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2291 *it = *it < 0 ? *it : *it - HalfSize;
2292 }
2293 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2294 V1 = DAG.getBitcast(MVT::v4i64, V1);
2295 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2296 DAG.getConstant(0b01000100, DL, GRLenVT));
2297 V1 = DAG.getBitcast(VT, V1);
2298
2299 if (!V2.isUndef()) {
2300 V2 = DAG.getBitcast(MVT::v4i64, V2);
2301 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2302 DAG.getConstant(0b01000100, DL, GRLenVT));
2303 V2 = DAG.getBitcast(VT, V2);
2304 }
2305
2306 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2307 *it = *it < 0 ? *it : *it + HalfSize;
2308 }
2309 } else { // cross-lane
2310 return;
2311 }
2312}
2313
2314/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2315/// Only for 256-bit vector.
2316///
2317/// For example:
2318/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2319/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2320/// is lowerded to:
2321/// (XVPERMI $xr2, $xr0, 78)
2322/// (XVSHUF $xr1, $xr2, $xr0)
2323/// (XVORI $xr0, $xr1, 0)
2325 ArrayRef<int> Mask,
2326 MVT VT, SDValue V1,
2327 SDValue V2,
2328 SelectionDAG &DAG) {
2329 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2330 int Size = Mask.size();
2331 int LaneSize = Size / 2;
2332
2333 bool LaneCrossing[2] = {false, false};
2334 for (int i = 0; i < Size; ++i)
2335 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2336 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2337
2338 // Ensure that all lanes ared involved.
2339 if (!LaneCrossing[0] && !LaneCrossing[1])
2340 return SDValue();
2341
2342 SmallVector<int> InLaneMask;
2343 InLaneMask.assign(Mask.begin(), Mask.end());
2344 for (int i = 0; i < Size; ++i) {
2345 int &M = InLaneMask[i];
2346 if (M < 0)
2347 continue;
2348 if (((M % Size) / LaneSize) != (i / LaneSize))
2349 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2350 }
2351
2352 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2353 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2354 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2355 Flipped = DAG.getBitcast(VT, Flipped);
2356 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2357}
2358
2359/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2360///
2361/// This routine breaks down the specific type of 256-bit shuffle and
2362/// dispatches to the lowering routines accordingly.
2364 SDValue V1, SDValue V2, SelectionDAG &DAG,
2365 const LoongArchSubtarget &Subtarget) {
2366 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2367 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2368 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2369 "Vector type is unsupported for lasx!");
2371 "Two operands have different types!");
2372 assert(VT.getVectorNumElements() == Mask.size() &&
2373 "Unexpected mask size for shuffle!");
2374 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2375 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2376
2377 // canonicalize non cross-lane shuffle vector
2378 SmallVector<int> NewMask(Mask);
2379 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2380
2381 APInt KnownUndef, KnownZero;
2382 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2383 APInt Zeroable = KnownUndef | KnownZero;
2384
2385 SDValue Result;
2386 // TODO: Add more comparison patterns.
2387 if (V2.isUndef()) {
2388 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2389 Subtarget)))
2390 return Result;
2391 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2392 Subtarget)))
2393 return Result;
2394 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2395 V1, V2, DAG)))
2396 return Result;
2397
2398 // TODO: This comment may be enabled in the future to better match the
2399 // pattern for instruction selection.
2400 /* V2 = V1; */
2401 }
2402
2403 // It is recommended not to change the pattern comparison order for better
2404 // performance.
2405 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2406 return Result;
2407 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2408 return Result;
2409 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2410 return Result;
2411 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2412 return Result;
2413 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2414 return Result;
2415 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2416 return Result;
2417 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2418 Subtarget, Zeroable)))
2419 return Result;
2420 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2421 Subtarget)))
2422 return Result;
2423 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2424 return NewShuffle;
2425 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2426 return Result;
2427
2428 return SDValue();
2429}
2430
2431SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2432 SelectionDAG &DAG) const {
2433 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2434 ArrayRef<int> OrigMask = SVOp->getMask();
2435 SDValue V1 = Op.getOperand(0);
2436 SDValue V2 = Op.getOperand(1);
2437 MVT VT = Op.getSimpleValueType();
2438 int NumElements = VT.getVectorNumElements();
2439 SDLoc DL(Op);
2440
2441 bool V1IsUndef = V1.isUndef();
2442 bool V2IsUndef = V2.isUndef();
2443 if (V1IsUndef && V2IsUndef)
2444 return DAG.getUNDEF(VT);
2445
2446 // When we create a shuffle node we put the UNDEF node to second operand,
2447 // but in some cases the first operand may be transformed to UNDEF.
2448 // In this case we should just commute the node.
2449 if (V1IsUndef)
2450 return DAG.getCommutedVectorShuffle(*SVOp);
2451
2452 // Check for non-undef masks pointing at an undef vector and make the masks
2453 // undef as well. This makes it easier to match the shuffle based solely on
2454 // the mask.
2455 if (V2IsUndef &&
2456 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2457 SmallVector<int, 8> NewMask(OrigMask);
2458 for (int &M : NewMask)
2459 if (M >= NumElements)
2460 M = -1;
2461 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2462 }
2463
2464 // Check for illegal shuffle mask element index values.
2465 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2466 (void)MaskUpperLimit;
2467 assert(llvm::all_of(OrigMask,
2468 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2469 "Out of bounds shuffle index");
2470
2471 // For each vector width, delegate to a specialized lowering routine.
2472 if (VT.is128BitVector())
2473 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2474
2475 if (VT.is256BitVector())
2476 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2477
2478 return SDValue();
2479}
2480
2481SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2482 SelectionDAG &DAG) const {
2483 // Custom lower to ensure the libcall return is passed in an FPR on hard
2484 // float ABIs.
2485 SDLoc DL(Op);
2486 MakeLibCallOptions CallOptions;
2487 SDValue Op0 = Op.getOperand(0);
2488 SDValue Chain = SDValue();
2489 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2490 SDValue Res;
2491 std::tie(Res, Chain) =
2492 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2493 if (Subtarget.is64Bit())
2494 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2495 return DAG.getBitcast(MVT::i32, Res);
2496}
2497
2498SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2499 SelectionDAG &DAG) const {
2500 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2501 // float ABIs.
2502 SDLoc DL(Op);
2503 MakeLibCallOptions CallOptions;
2504 SDValue Op0 = Op.getOperand(0);
2505 SDValue Chain = SDValue();
2506 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2507 DL, MVT::f32, Op0)
2508 : DAG.getBitcast(MVT::f32, Op0);
2509 SDValue Res;
2510 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2511 CallOptions, DL, Chain);
2512 return Res;
2513}
2514
2515SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2516 SelectionDAG &DAG) const {
2517 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2518 SDLoc DL(Op);
2519 MakeLibCallOptions CallOptions;
2520 RTLIB::Libcall LC =
2521 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2522 SDValue Res =
2523 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2524 if (Subtarget.is64Bit())
2525 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2526 return DAG.getBitcast(MVT::i32, Res);
2527}
2528
2529SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2530 SelectionDAG &DAG) const {
2531 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2532 MVT VT = Op.getSimpleValueType();
2533 SDLoc DL(Op);
2534 Op = DAG.getNode(
2535 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2536 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2537 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2538 DL, MVT::f32, Op)
2539 : DAG.getBitcast(MVT::f32, Op);
2540 if (VT != MVT::f32)
2541 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2542 return Res;
2543}
2544
2545// Lower BUILD_VECTOR as broadcast load (if possible).
2546// For example:
2547// %a = load i8, ptr %ptr
2548// %b = build_vector %a, %a, %a, %a
2549// is lowered to :
2550// (VLDREPL_B $a0, 0)
2552 const SDLoc &DL,
2553 SelectionDAG &DAG) {
2554 MVT VT = BVOp->getSimpleValueType(0);
2555 int NumOps = BVOp->getNumOperands();
2556
2557 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2558 "Unsupported vector type for broadcast.");
2559
2560 SDValue IdentitySrc;
2561 bool IsIdeneity = true;
2562
2563 for (int i = 0; i != NumOps; i++) {
2564 SDValue Op = BVOp->getOperand(i);
2565 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2566 IsIdeneity = false;
2567 break;
2568 }
2569 IdentitySrc = BVOp->getOperand(0);
2570 }
2571
2572 // make sure that this load is valid and only has one user.
2573 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2574 return SDValue();
2575
2576 auto *LN = cast<LoadSDNode>(IdentitySrc);
2577 auto ExtType = LN->getExtensionType();
2578
2579 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2580 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2581 SDVTList Tys =
2582 LN->isIndexed()
2583 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2584 : DAG.getVTList(VT, MVT::Other);
2585 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2586 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2587 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2588 return BCast;
2589 }
2590 return SDValue();
2591}
2592
2593SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2594 SelectionDAG &DAG) const {
2595 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2596 EVT ResTy = Op->getValueType(0);
2597 unsigned NumElts = ResTy.getVectorNumElements();
2598 SDLoc DL(Op);
2599 APInt SplatValue, SplatUndef;
2600 unsigned SplatBitSize;
2601 bool HasAnyUndefs;
2602 bool IsConstant = false;
2603 bool UseSameConstant = true;
2604 SDValue ConstantValue;
2605 bool Is128Vec = ResTy.is128BitVector();
2606 bool Is256Vec = ResTy.is256BitVector();
2607
2608 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2609 (!Subtarget.hasExtLASX() || !Is256Vec))
2610 return SDValue();
2611
2612 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2613 return Result;
2614
2615 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2616 /*MinSplatBits=*/8) &&
2617 SplatBitSize <= 64) {
2618 // We can only cope with 8, 16, 32, or 64-bit elements.
2619 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2620 SplatBitSize != 64)
2621 return SDValue();
2622
2623 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2624 // We can only handle 64-bit elements that are within
2625 // the signed 32-bit range on 32-bit targets.
2626 if (!SplatValue.isSignedIntN(32))
2627 return SDValue();
2628 if ((Is128Vec && ResTy == MVT::v4i32) ||
2629 (Is256Vec && ResTy == MVT::v8i32))
2630 return Op;
2631 }
2632
2633 EVT ViaVecTy;
2634
2635 switch (SplatBitSize) {
2636 default:
2637 return SDValue();
2638 case 8:
2639 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2640 break;
2641 case 16:
2642 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2643 break;
2644 case 32:
2645 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2646 break;
2647 case 64:
2648 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2649 break;
2650 }
2651
2652 // SelectionDAG::getConstant will promote SplatValue appropriately.
2653 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2654
2655 // Bitcast to the type we originally wanted.
2656 if (ViaVecTy != ResTy)
2657 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2658
2659 return Result;
2660 }
2661
2662 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2663 return Op;
2664
2665 for (unsigned i = 0; i < NumElts; ++i) {
2666 SDValue Opi = Node->getOperand(i);
2667 if (isIntOrFPConstant(Opi)) {
2668 IsConstant = true;
2669 if (!ConstantValue.getNode())
2670 ConstantValue = Opi;
2671 else if (ConstantValue != Opi)
2672 UseSameConstant = false;
2673 }
2674 }
2675
2676 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2677 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2678 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2679 for (unsigned i = 0; i < NumElts; ++i) {
2680 SDValue Opi = Node->getOperand(i);
2681 if (!isIntOrFPConstant(Opi))
2682 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2683 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2684 }
2685 return Result;
2686 }
2687
2688 if (!IsConstant) {
2689 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2690 // The resulting code is the same length as the expansion, but it doesn't
2691 // use memory operations.
2692 assert(ResTy.isVector());
2693
2694 SDValue Op0 = Node->getOperand(0);
2695 SDValue Vector = DAG.getUNDEF(ResTy);
2696
2697 if (!Op0.isUndef())
2698 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2699 for (unsigned i = 1; i < NumElts; ++i) {
2700 SDValue Opi = Node->getOperand(i);
2701 if (Opi.isUndef())
2702 continue;
2703 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2704 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2705 }
2706 return Vector;
2707 }
2708
2709 return SDValue();
2710}
2711
2712SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2713 SelectionDAG &DAG) const {
2714 SDLoc DL(Op);
2715 MVT ResVT = Op.getSimpleValueType();
2716 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2717
2718 unsigned NumOperands = Op.getNumOperands();
2719 unsigned NumFreezeUndef = 0;
2720 unsigned NumZero = 0;
2721 unsigned NumNonZero = 0;
2722 unsigned NonZeros = 0;
2723 SmallSet<SDValue, 4> Undefs;
2724 for (unsigned i = 0; i != NumOperands; ++i) {
2725 SDValue SubVec = Op.getOperand(i);
2726 if (SubVec.isUndef())
2727 continue;
2728 if (ISD::isFreezeUndef(SubVec.getNode())) {
2729 // If the freeze(undef) has multiple uses then we must fold to zero.
2730 if (SubVec.hasOneUse()) {
2731 ++NumFreezeUndef;
2732 } else {
2733 ++NumZero;
2734 Undefs.insert(SubVec);
2735 }
2736 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2737 ++NumZero;
2738 else {
2739 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2740 NonZeros |= 1 << i;
2741 ++NumNonZero;
2742 }
2743 }
2744
2745 // If we have more than 2 non-zeros, build each half separately.
2746 if (NumNonZero > 2) {
2747 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2748 ArrayRef<SDUse> Ops = Op->ops();
2749 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2750 Ops.slice(0, NumOperands / 2));
2751 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2752 Ops.slice(NumOperands / 2));
2753 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2754 }
2755
2756 // Otherwise, build it up through insert_subvectors.
2757 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2758 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2759 : DAG.getUNDEF(ResVT));
2760
2761 // Replace Undef operands with ZeroVector.
2762 for (SDValue U : Undefs)
2763 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2764
2765 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2766 unsigned NumSubElems = SubVT.getVectorNumElements();
2767 for (unsigned i = 0; i != NumOperands; ++i) {
2768 if ((NonZeros & (1 << i)) == 0)
2769 continue;
2770
2771 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2772 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2773 }
2774
2775 return Vec;
2776}
2777
2778SDValue
2779LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2780 SelectionDAG &DAG) const {
2781 EVT VecTy = Op->getOperand(0)->getValueType(0);
2782 SDValue Idx = Op->getOperand(1);
2783 unsigned NumElts = VecTy.getVectorNumElements();
2784
2785 if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
2786 return Op;
2787
2788 return SDValue();
2789}
2790
2791SDValue
2792LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2793 SelectionDAG &DAG) const {
2794 MVT VT = Op.getSimpleValueType();
2795 MVT EltVT = VT.getVectorElementType();
2796 unsigned NumElts = VT.getVectorNumElements();
2797 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2798 SDLoc DL(Op);
2799 SDValue Op0 = Op.getOperand(0);
2800 SDValue Op1 = Op.getOperand(1);
2801 SDValue Op2 = Op.getOperand(2);
2802
2803 if (isa<ConstantSDNode>(Op2))
2804 return Op;
2805
2806 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2807 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2808
2809 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2810 return SDValue();
2811
2812 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2813 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2814
2815 SmallVector<SDValue, 32> RawIndices;
2816 for (unsigned i = 0; i < NumElts; ++i)
2817 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2818 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2819
2820 // insert vec, elt, idx
2821 // =>
2822 // select (splatidx == {0,1,2...}) ? splatelt : vec
2823 SDValue SelectCC =
2824 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2825 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2826}
2827
2828SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2829 SelectionDAG &DAG) const {
2830 SDLoc DL(Op);
2831 SyncScope::ID FenceSSID =
2832 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2833
2834 // singlethread fences only synchronize with signal handlers on the same
2835 // thread and thus only need to preserve instruction order, not actually
2836 // enforce memory ordering.
2837 if (FenceSSID == SyncScope::SingleThread)
2838 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2839 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2840
2841 return Op;
2842}
2843
2844SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2845 SelectionDAG &DAG) const {
2846
2847 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2848 DAG.getContext()->emitError(
2849 "On LA64, only 64-bit registers can be written.");
2850 return Op.getOperand(0);
2851 }
2852
2853 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2854 DAG.getContext()->emitError(
2855 "On LA32, only 32-bit registers can be written.");
2856 return Op.getOperand(0);
2857 }
2858
2859 return Op;
2860}
2861
2862SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2863 SelectionDAG &DAG) const {
2864 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2865 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2866 "be a constant integer");
2867 return SDValue();
2868 }
2869
2872 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2873 EVT VT = Op.getValueType();
2874 SDLoc DL(Op);
2875 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2876 unsigned Depth = Op.getConstantOperandVal(0);
2877 int GRLenInBytes = Subtarget.getGRLen() / 8;
2878
2879 while (Depth--) {
2880 int Offset = -(GRLenInBytes * 2);
2881 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2882 DAG.getSignedConstant(Offset, DL, VT));
2883 FrameAddr =
2884 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2885 }
2886 return FrameAddr;
2887}
2888
2889SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2890 SelectionDAG &DAG) const {
2891 // Currently only support lowering return address for current frame.
2892 if (Op.getConstantOperandVal(0) != 0) {
2893 DAG.getContext()->emitError(
2894 "return address can only be determined for the current frame");
2895 return SDValue();
2896 }
2897
2900 MVT GRLenVT = Subtarget.getGRLenVT();
2901
2902 // Return the value of the return address register, marking it an implicit
2903 // live-in.
2904 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
2905 getRegClassFor(GRLenVT));
2906 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
2907}
2908
2909SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2910 SelectionDAG &DAG) const {
2912 auto Size = Subtarget.getGRLen() / 8;
2913 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
2914 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2915}
2916
2917SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2918 SelectionDAG &DAG) const {
2920 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2921
2922 SDLoc DL(Op);
2923 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2925
2926 // vastart just stores the address of the VarArgsFrameIndex slot into the
2927 // memory location argument.
2928 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2929 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2930 MachinePointerInfo(SV));
2931}
2932
2933SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2934 SelectionDAG &DAG) const {
2935 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2936 !Subtarget.hasBasicD() && "unexpected target features");
2937
2938 SDLoc DL(Op);
2939 SDValue Op0 = Op.getOperand(0);
2940 if (Op0->getOpcode() == ISD::AND) {
2941 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
2942 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
2943 return Op;
2944 }
2945
2946 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
2947 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
2948 Op0.getConstantOperandVal(2) == UINT64_C(0))
2949 return Op;
2950
2951 if (Op0.getOpcode() == ISD::AssertZext &&
2952 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
2953 return Op;
2954
2955 EVT OpVT = Op0.getValueType();
2956 EVT RetVT = Op.getValueType();
2957 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2958 MakeLibCallOptions CallOptions;
2959 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
2960 SDValue Chain = SDValue();
2962 std::tie(Result, Chain) =
2963 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2964 return Result;
2965}
2966
2967SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2968 SelectionDAG &DAG) const {
2969 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2970 !Subtarget.hasBasicD() && "unexpected target features");
2971
2972 SDLoc DL(Op);
2973 SDValue Op0 = Op.getOperand(0);
2974
2975 if ((Op0.getOpcode() == ISD::AssertSext ||
2977 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
2978 return Op;
2979
2980 EVT OpVT = Op0.getValueType();
2981 EVT RetVT = Op.getValueType();
2982 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2983 MakeLibCallOptions CallOptions;
2984 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
2985 SDValue Chain = SDValue();
2987 std::tie(Result, Chain) =
2988 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2989 return Result;
2990}
2991
2992SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2993 SelectionDAG &DAG) const {
2994
2995 SDLoc DL(Op);
2996 EVT VT = Op.getValueType();
2997 SDValue Op0 = Op.getOperand(0);
2998 EVT Op0VT = Op0.getValueType();
2999
3000 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3001 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3002 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3003 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3004 }
3005 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3006 SDValue Lo, Hi;
3007 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3008 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3009 }
3010 return Op;
3011}
3012
3013SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3014 SelectionDAG &DAG) const {
3015
3016 SDLoc DL(Op);
3017 SDValue Op0 = Op.getOperand(0);
3018
3019 if (Op0.getValueType() == MVT::f16)
3020 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3021
3022 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3023 !Subtarget.hasBasicD()) {
3024 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3025 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3026 }
3027
3028 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3029 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3030 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3031}
3032
3034 SelectionDAG &DAG, unsigned Flags) {
3035 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3036}
3037
3039 SelectionDAG &DAG, unsigned Flags) {
3040 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3041 Flags);
3042}
3043
3045 SelectionDAG &DAG, unsigned Flags) {
3046 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3047 N->getOffset(), Flags);
3048}
3049
3051 SelectionDAG &DAG, unsigned Flags) {
3052 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3053}
3054
3055template <class NodeTy>
3056SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3058 bool IsLocal) const {
3059 SDLoc DL(N);
3060 EVT Ty = getPointerTy(DAG.getDataLayout());
3061 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3062 SDValue Load;
3063
3064 switch (M) {
3065 default:
3066 report_fatal_error("Unsupported code model");
3067
3068 case CodeModel::Large: {
3069 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3070
3071 // This is not actually used, but is necessary for successfully matching
3072 // the PseudoLA_*_LARGE nodes.
3073 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3074 if (IsLocal) {
3075 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3076 // eventually becomes the desired 5-insn code sequence.
3077 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3078 Tmp, Addr),
3079 0);
3080 } else {
3081 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3082 // eventually becomes the desired 5-insn code sequence.
3083 Load = SDValue(
3084 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3085 0);
3086 }
3087 break;
3088 }
3089
3090 case CodeModel::Small:
3091 case CodeModel::Medium:
3092 if (IsLocal) {
3093 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3094 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3095 Load = SDValue(
3096 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3097 } else {
3098 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3099 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3100 Load =
3101 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3102 }
3103 }
3104
3105 if (!IsLocal) {
3106 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3112 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3113 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3114 }
3115
3116 return Load;
3117}
3118
3119SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3120 SelectionDAG &DAG) const {
3121 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3122 DAG.getTarget().getCodeModel());
3123}
3124
3125SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3126 SelectionDAG &DAG) const {
3127 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3128 DAG.getTarget().getCodeModel());
3129}
3130
3131SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3132 SelectionDAG &DAG) const {
3133 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3134 DAG.getTarget().getCodeModel());
3135}
3136
3137SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3138 SelectionDAG &DAG) const {
3139 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3140 assert(N->getOffset() == 0 && "unexpected offset in global node");
3141 auto CM = DAG.getTarget().getCodeModel();
3142 const GlobalValue *GV = N->getGlobal();
3143
3144 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3145 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3146 CM = *GCM;
3147 }
3148
3149 return getAddr(N, DAG, CM, GV->isDSOLocal());
3150}
3151
3152SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3153 SelectionDAG &DAG,
3154 unsigned Opc, bool UseGOT,
3155 bool Large) const {
3156 SDLoc DL(N);
3157 EVT Ty = getPointerTy(DAG.getDataLayout());
3158 MVT GRLenVT = Subtarget.getGRLenVT();
3159
3160 // This is not actually used, but is necessary for successfully matching the
3161 // PseudoLA_*_LARGE nodes.
3162 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3163 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3164
3165 // Only IE needs an extra argument for large code model.
3166 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3167 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3168 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3169
3170 // If it is LE for normal/medium code model, the add tp operation will occur
3171 // during the pseudo-instruction expansion.
3172 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3173 return Offset;
3174
3175 if (UseGOT) {
3176 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3182 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3183 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3184 }
3185
3186 // Add the thread pointer.
3187 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3188 DAG.getRegister(LoongArch::R2, GRLenVT));
3189}
3190
3191SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3192 SelectionDAG &DAG,
3193 unsigned Opc,
3194 bool Large) const {
3195 SDLoc DL(N);
3196 EVT Ty = getPointerTy(DAG.getDataLayout());
3197 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3198
3199 // This is not actually used, but is necessary for successfully matching the
3200 // PseudoLA_*_LARGE nodes.
3201 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3202
3203 // Use a PC-relative addressing mode to access the dynamic GOT address.
3204 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3205 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3206 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3207
3208 // Prepare argument list to generate call.
3210 Args.emplace_back(Load, CallTy);
3211
3212 // Setup call to __tls_get_addr.
3214 CLI.setDebugLoc(DL)
3215 .setChain(DAG.getEntryNode())
3216 .setLibCallee(CallingConv::C, CallTy,
3217 DAG.getExternalSymbol("__tls_get_addr", Ty),
3218 std::move(Args));
3219
3220 return LowerCallTo(CLI).first;
3221}
3222
3223SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3224 SelectionDAG &DAG, unsigned Opc,
3225 bool Large) const {
3226 SDLoc DL(N);
3227 EVT Ty = getPointerTy(DAG.getDataLayout());
3228 const GlobalValue *GV = N->getGlobal();
3229
3230 // This is not actually used, but is necessary for successfully matching the
3231 // PseudoLA_*_LARGE nodes.
3232 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3233
3234 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3235 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3236 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3237 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3238 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3239}
3240
3241SDValue
3242LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3243 SelectionDAG &DAG) const {
3246 report_fatal_error("In GHC calling convention TLS is not supported");
3247
3248 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3249 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3250
3251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3252 assert(N->getOffset() == 0 && "unexpected offset in global node");
3253
3254 if (DAG.getTarget().useEmulatedTLS())
3255 reportFatalUsageError("the emulated TLS is prohibited");
3256
3257 bool IsDesc = DAG.getTarget().useTLSDESC();
3258
3259 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3261 // In this model, application code calls the dynamic linker function
3262 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3263 // runtime.
3264 if (!IsDesc)
3265 return getDynamicTLSAddr(N, DAG,
3266 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3267 : LoongArch::PseudoLA_TLS_GD,
3268 Large);
3269 break;
3271 // Same as GeneralDynamic, except for assembly modifiers and relocation
3272 // records.
3273 if (!IsDesc)
3274 return getDynamicTLSAddr(N, DAG,
3275 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3276 : LoongArch::PseudoLA_TLS_LD,
3277 Large);
3278 break;
3280 // This model uses the GOT to resolve TLS offsets.
3281 return getStaticTLSAddr(N, DAG,
3282 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3283 : LoongArch::PseudoLA_TLS_IE,
3284 /*UseGOT=*/true, Large);
3286 // This model is used when static linking as the TLS offsets are resolved
3287 // during program linking.
3288 //
3289 // This node doesn't need an extra argument for the large code model.
3290 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3291 /*UseGOT=*/false, Large);
3292 }
3293
3294 return getTLSDescAddr(N, DAG,
3295 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3296 : LoongArch::PseudoLA_TLS_DESC,
3297 Large);
3298}
3299
3300template <unsigned N>
3302 SelectionDAG &DAG, bool IsSigned = false) {
3303 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3304 // Check the ImmArg.
3305 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3306 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3307 DAG.getContext()->emitError(Op->getOperationName(0) +
3308 ": argument out of range.");
3309 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3310 }
3311 return SDValue();
3312}
3313
3314SDValue
3315LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3316 SelectionDAG &DAG) const {
3317 switch (Op.getConstantOperandVal(0)) {
3318 default:
3319 return SDValue(); // Don't custom lower most intrinsics.
3320 case Intrinsic::thread_pointer: {
3321 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3322 return DAG.getRegister(LoongArch::R2, PtrVT);
3323 }
3324 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3325 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3326 case Intrinsic::loongarch_lsx_vreplvei_d:
3327 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3328 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3329 case Intrinsic::loongarch_lsx_vreplvei_w:
3330 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3331 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3332 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3333 case Intrinsic::loongarch_lasx_xvpickve_d:
3334 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3335 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3336 case Intrinsic::loongarch_lasx_xvinsve0_d:
3337 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3338 case Intrinsic::loongarch_lsx_vsat_b:
3339 case Intrinsic::loongarch_lsx_vsat_bu:
3340 case Intrinsic::loongarch_lsx_vrotri_b:
3341 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3342 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3343 case Intrinsic::loongarch_lsx_vsrlri_b:
3344 case Intrinsic::loongarch_lsx_vsrari_b:
3345 case Intrinsic::loongarch_lsx_vreplvei_h:
3346 case Intrinsic::loongarch_lasx_xvsat_b:
3347 case Intrinsic::loongarch_lasx_xvsat_bu:
3348 case Intrinsic::loongarch_lasx_xvrotri_b:
3349 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3350 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3351 case Intrinsic::loongarch_lasx_xvsrlri_b:
3352 case Intrinsic::loongarch_lasx_xvsrari_b:
3353 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3354 case Intrinsic::loongarch_lasx_xvpickve_w:
3355 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3356 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3357 case Intrinsic::loongarch_lasx_xvinsve0_w:
3358 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3359 case Intrinsic::loongarch_lsx_vsat_h:
3360 case Intrinsic::loongarch_lsx_vsat_hu:
3361 case Intrinsic::loongarch_lsx_vrotri_h:
3362 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3363 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3364 case Intrinsic::loongarch_lsx_vsrlri_h:
3365 case Intrinsic::loongarch_lsx_vsrari_h:
3366 case Intrinsic::loongarch_lsx_vreplvei_b:
3367 case Intrinsic::loongarch_lasx_xvsat_h:
3368 case Intrinsic::loongarch_lasx_xvsat_hu:
3369 case Intrinsic::loongarch_lasx_xvrotri_h:
3370 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3371 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3372 case Intrinsic::loongarch_lasx_xvsrlri_h:
3373 case Intrinsic::loongarch_lasx_xvsrari_h:
3374 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3375 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3376 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3377 case Intrinsic::loongarch_lsx_vsrani_b_h:
3378 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3379 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3380 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3381 case Intrinsic::loongarch_lsx_vssrani_b_h:
3382 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3383 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3384 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3385 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3386 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3387 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3388 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3389 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3390 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3391 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3392 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3393 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3394 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3395 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3396 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3397 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3398 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3399 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3400 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3401 case Intrinsic::loongarch_lsx_vsat_w:
3402 case Intrinsic::loongarch_lsx_vsat_wu:
3403 case Intrinsic::loongarch_lsx_vrotri_w:
3404 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3405 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3406 case Intrinsic::loongarch_lsx_vsrlri_w:
3407 case Intrinsic::loongarch_lsx_vsrari_w:
3408 case Intrinsic::loongarch_lsx_vslei_bu:
3409 case Intrinsic::loongarch_lsx_vslei_hu:
3410 case Intrinsic::loongarch_lsx_vslei_wu:
3411 case Intrinsic::loongarch_lsx_vslei_du:
3412 case Intrinsic::loongarch_lsx_vslti_bu:
3413 case Intrinsic::loongarch_lsx_vslti_hu:
3414 case Intrinsic::loongarch_lsx_vslti_wu:
3415 case Intrinsic::loongarch_lsx_vslti_du:
3416 case Intrinsic::loongarch_lsx_vbsll_v:
3417 case Intrinsic::loongarch_lsx_vbsrl_v:
3418 case Intrinsic::loongarch_lasx_xvsat_w:
3419 case Intrinsic::loongarch_lasx_xvsat_wu:
3420 case Intrinsic::loongarch_lasx_xvrotri_w:
3421 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3422 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3423 case Intrinsic::loongarch_lasx_xvsrlri_w:
3424 case Intrinsic::loongarch_lasx_xvsrari_w:
3425 case Intrinsic::loongarch_lasx_xvslei_bu:
3426 case Intrinsic::loongarch_lasx_xvslei_hu:
3427 case Intrinsic::loongarch_lasx_xvslei_wu:
3428 case Intrinsic::loongarch_lasx_xvslei_du:
3429 case Intrinsic::loongarch_lasx_xvslti_bu:
3430 case Intrinsic::loongarch_lasx_xvslti_hu:
3431 case Intrinsic::loongarch_lasx_xvslti_wu:
3432 case Intrinsic::loongarch_lasx_xvslti_du:
3433 case Intrinsic::loongarch_lasx_xvbsll_v:
3434 case Intrinsic::loongarch_lasx_xvbsrl_v:
3435 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3436 case Intrinsic::loongarch_lsx_vseqi_b:
3437 case Intrinsic::loongarch_lsx_vseqi_h:
3438 case Intrinsic::loongarch_lsx_vseqi_w:
3439 case Intrinsic::loongarch_lsx_vseqi_d:
3440 case Intrinsic::loongarch_lsx_vslei_b:
3441 case Intrinsic::loongarch_lsx_vslei_h:
3442 case Intrinsic::loongarch_lsx_vslei_w:
3443 case Intrinsic::loongarch_lsx_vslei_d:
3444 case Intrinsic::loongarch_lsx_vslti_b:
3445 case Intrinsic::loongarch_lsx_vslti_h:
3446 case Intrinsic::loongarch_lsx_vslti_w:
3447 case Intrinsic::loongarch_lsx_vslti_d:
3448 case Intrinsic::loongarch_lasx_xvseqi_b:
3449 case Intrinsic::loongarch_lasx_xvseqi_h:
3450 case Intrinsic::loongarch_lasx_xvseqi_w:
3451 case Intrinsic::loongarch_lasx_xvseqi_d:
3452 case Intrinsic::loongarch_lasx_xvslei_b:
3453 case Intrinsic::loongarch_lasx_xvslei_h:
3454 case Intrinsic::loongarch_lasx_xvslei_w:
3455 case Intrinsic::loongarch_lasx_xvslei_d:
3456 case Intrinsic::loongarch_lasx_xvslti_b:
3457 case Intrinsic::loongarch_lasx_xvslti_h:
3458 case Intrinsic::loongarch_lasx_xvslti_w:
3459 case Intrinsic::loongarch_lasx_xvslti_d:
3460 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3461 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3462 case Intrinsic::loongarch_lsx_vsrani_h_w:
3463 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3464 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3465 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3466 case Intrinsic::loongarch_lsx_vssrani_h_w:
3467 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3468 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3469 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3470 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3471 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3472 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3473 case Intrinsic::loongarch_lsx_vfrstpi_b:
3474 case Intrinsic::loongarch_lsx_vfrstpi_h:
3475 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3476 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3477 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3478 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3479 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3480 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3481 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3482 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3483 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3484 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3485 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3486 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3487 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3488 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3489 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3490 case Intrinsic::loongarch_lsx_vsat_d:
3491 case Intrinsic::loongarch_lsx_vsat_du:
3492 case Intrinsic::loongarch_lsx_vrotri_d:
3493 case Intrinsic::loongarch_lsx_vsrlri_d:
3494 case Intrinsic::loongarch_lsx_vsrari_d:
3495 case Intrinsic::loongarch_lasx_xvsat_d:
3496 case Intrinsic::loongarch_lasx_xvsat_du:
3497 case Intrinsic::loongarch_lasx_xvrotri_d:
3498 case Intrinsic::loongarch_lasx_xvsrlri_d:
3499 case Intrinsic::loongarch_lasx_xvsrari_d:
3500 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3501 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3502 case Intrinsic::loongarch_lsx_vsrani_w_d:
3503 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3504 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3505 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3506 case Intrinsic::loongarch_lsx_vssrani_w_d:
3507 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3508 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3509 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3510 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3511 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3512 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3513 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3514 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3515 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3516 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3517 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3518 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3519 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3520 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3521 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3522 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3523 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3524 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3525 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3526 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3527 case Intrinsic::loongarch_lsx_vsrani_d_q:
3528 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3529 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3530 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3531 case Intrinsic::loongarch_lsx_vssrani_d_q:
3532 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3533 case Intrinsic::loongarch_lsx_vssrani_du_q:
3534 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3535 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3536 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3537 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3538 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3539 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3540 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3541 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3542 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3543 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3544 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3545 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3546 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3547 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3548 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3549 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3550 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3551 case Intrinsic::loongarch_lsx_vnori_b:
3552 case Intrinsic::loongarch_lsx_vshuf4i_b:
3553 case Intrinsic::loongarch_lsx_vshuf4i_h:
3554 case Intrinsic::loongarch_lsx_vshuf4i_w:
3555 case Intrinsic::loongarch_lasx_xvnori_b:
3556 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3557 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3558 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3559 case Intrinsic::loongarch_lasx_xvpermi_d:
3560 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3561 case Intrinsic::loongarch_lsx_vshuf4i_d:
3562 case Intrinsic::loongarch_lsx_vpermi_w:
3563 case Intrinsic::loongarch_lsx_vbitseli_b:
3564 case Intrinsic::loongarch_lsx_vextrins_b:
3565 case Intrinsic::loongarch_lsx_vextrins_h:
3566 case Intrinsic::loongarch_lsx_vextrins_w:
3567 case Intrinsic::loongarch_lsx_vextrins_d:
3568 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3569 case Intrinsic::loongarch_lasx_xvpermi_w:
3570 case Intrinsic::loongarch_lasx_xvpermi_q:
3571 case Intrinsic::loongarch_lasx_xvbitseli_b:
3572 case Intrinsic::loongarch_lasx_xvextrins_b:
3573 case Intrinsic::loongarch_lasx_xvextrins_h:
3574 case Intrinsic::loongarch_lasx_xvextrins_w:
3575 case Intrinsic::loongarch_lasx_xvextrins_d:
3576 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3577 case Intrinsic::loongarch_lsx_vrepli_b:
3578 case Intrinsic::loongarch_lsx_vrepli_h:
3579 case Intrinsic::loongarch_lsx_vrepli_w:
3580 case Intrinsic::loongarch_lsx_vrepli_d:
3581 case Intrinsic::loongarch_lasx_xvrepli_b:
3582 case Intrinsic::loongarch_lasx_xvrepli_h:
3583 case Intrinsic::loongarch_lasx_xvrepli_w:
3584 case Intrinsic::loongarch_lasx_xvrepli_d:
3585 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3586 case Intrinsic::loongarch_lsx_vldi:
3587 case Intrinsic::loongarch_lasx_xvldi:
3588 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3589 }
3590}
3591
3592// Helper function that emits error message for intrinsics with chain and return
3593// merge values of a UNDEF and the chain.
3595 StringRef ErrorMsg,
3596 SelectionDAG &DAG) {
3597 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3598 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3599 SDLoc(Op));
3600}
3601
3602SDValue
3603LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3604 SelectionDAG &DAG) const {
3605 SDLoc DL(Op);
3606 MVT GRLenVT = Subtarget.getGRLenVT();
3607 EVT VT = Op.getValueType();
3608 SDValue Chain = Op.getOperand(0);
3609 const StringRef ErrorMsgOOR = "argument out of range";
3610 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3611 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3612
3613 switch (Op.getConstantOperandVal(1)) {
3614 default:
3615 return Op;
3616 case Intrinsic::loongarch_crc_w_b_w:
3617 case Intrinsic::loongarch_crc_w_h_w:
3618 case Intrinsic::loongarch_crc_w_w_w:
3619 case Intrinsic::loongarch_crc_w_d_w:
3620 case Intrinsic::loongarch_crcc_w_b_w:
3621 case Intrinsic::loongarch_crcc_w_h_w:
3622 case Intrinsic::loongarch_crcc_w_w_w:
3623 case Intrinsic::loongarch_crcc_w_d_w:
3624 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3625 case Intrinsic::loongarch_csrrd_w:
3626 case Intrinsic::loongarch_csrrd_d: {
3627 unsigned Imm = Op.getConstantOperandVal(2);
3628 return !isUInt<14>(Imm)
3629 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3630 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3631 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3632 }
3633 case Intrinsic::loongarch_csrwr_w:
3634 case Intrinsic::loongarch_csrwr_d: {
3635 unsigned Imm = Op.getConstantOperandVal(3);
3636 return !isUInt<14>(Imm)
3637 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3638 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3639 {Chain, Op.getOperand(2),
3640 DAG.getConstant(Imm, DL, GRLenVT)});
3641 }
3642 case Intrinsic::loongarch_csrxchg_w:
3643 case Intrinsic::loongarch_csrxchg_d: {
3644 unsigned Imm = Op.getConstantOperandVal(4);
3645 return !isUInt<14>(Imm)
3646 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3647 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3648 {Chain, Op.getOperand(2), Op.getOperand(3),
3649 DAG.getConstant(Imm, DL, GRLenVT)});
3650 }
3651 case Intrinsic::loongarch_iocsrrd_d: {
3652 return DAG.getNode(
3653 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3654 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3655 }
3656#define IOCSRRD_CASE(NAME, NODE) \
3657 case Intrinsic::loongarch_##NAME: { \
3658 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3659 {Chain, Op.getOperand(2)}); \
3660 }
3661 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3662 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3663 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3664#undef IOCSRRD_CASE
3665 case Intrinsic::loongarch_cpucfg: {
3666 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3667 {Chain, Op.getOperand(2)});
3668 }
3669 case Intrinsic::loongarch_lddir_d: {
3670 unsigned Imm = Op.getConstantOperandVal(3);
3671 return !isUInt<8>(Imm)
3672 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3673 : Op;
3674 }
3675 case Intrinsic::loongarch_movfcsr2gr: {
3676 if (!Subtarget.hasBasicF())
3677 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3678 unsigned Imm = Op.getConstantOperandVal(2);
3679 return !isUInt<2>(Imm)
3680 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3681 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3682 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3683 }
3684 case Intrinsic::loongarch_lsx_vld:
3685 case Intrinsic::loongarch_lsx_vldrepl_b:
3686 case Intrinsic::loongarch_lasx_xvld:
3687 case Intrinsic::loongarch_lasx_xvldrepl_b:
3688 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3689 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3690 : SDValue();
3691 case Intrinsic::loongarch_lsx_vldrepl_h:
3692 case Intrinsic::loongarch_lasx_xvldrepl_h:
3693 return !isShiftedInt<11, 1>(
3694 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3696 Op, "argument out of range or not a multiple of 2", DAG)
3697 : SDValue();
3698 case Intrinsic::loongarch_lsx_vldrepl_w:
3699 case Intrinsic::loongarch_lasx_xvldrepl_w:
3700 return !isShiftedInt<10, 2>(
3701 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3703 Op, "argument out of range or not a multiple of 4", DAG)
3704 : SDValue();
3705 case Intrinsic::loongarch_lsx_vldrepl_d:
3706 case Intrinsic::loongarch_lasx_xvldrepl_d:
3707 return !isShiftedInt<9, 3>(
3708 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3710 Op, "argument out of range or not a multiple of 8", DAG)
3711 : SDValue();
3712 }
3713}
3714
3715// Helper function that emits error message for intrinsics with void return
3716// value and return the chain.
3718 SelectionDAG &DAG) {
3719
3720 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3721 return Op.getOperand(0);
3722}
3723
3724SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3725 SelectionDAG &DAG) const {
3726 SDLoc DL(Op);
3727 MVT GRLenVT = Subtarget.getGRLenVT();
3728 SDValue Chain = Op.getOperand(0);
3729 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3730 SDValue Op2 = Op.getOperand(2);
3731 const StringRef ErrorMsgOOR = "argument out of range";
3732 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3733 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3734 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3735
3736 switch (IntrinsicEnum) {
3737 default:
3738 // TODO: Add more Intrinsics.
3739 return SDValue();
3740 case Intrinsic::loongarch_cacop_d:
3741 case Intrinsic::loongarch_cacop_w: {
3742 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3743 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3744 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3745 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3746 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3747 unsigned Imm1 = Op2->getAsZExtVal();
3748 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3749 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3750 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3751 return Op;
3752 }
3753 case Intrinsic::loongarch_dbar: {
3754 unsigned Imm = Op2->getAsZExtVal();
3755 return !isUInt<15>(Imm)
3756 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3757 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3758 DAG.getConstant(Imm, DL, GRLenVT));
3759 }
3760 case Intrinsic::loongarch_ibar: {
3761 unsigned Imm = Op2->getAsZExtVal();
3762 return !isUInt<15>(Imm)
3763 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3764 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3765 DAG.getConstant(Imm, DL, GRLenVT));
3766 }
3767 case Intrinsic::loongarch_break: {
3768 unsigned Imm = Op2->getAsZExtVal();
3769 return !isUInt<15>(Imm)
3770 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3771 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3772 DAG.getConstant(Imm, DL, GRLenVT));
3773 }
3774 case Intrinsic::loongarch_movgr2fcsr: {
3775 if (!Subtarget.hasBasicF())
3776 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3777 unsigned Imm = Op2->getAsZExtVal();
3778 return !isUInt<2>(Imm)
3779 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3780 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3781 DAG.getConstant(Imm, DL, GRLenVT),
3782 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3783 Op.getOperand(3)));
3784 }
3785 case Intrinsic::loongarch_syscall: {
3786 unsigned Imm = Op2->getAsZExtVal();
3787 return !isUInt<15>(Imm)
3788 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3789 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3790 DAG.getConstant(Imm, DL, GRLenVT));
3791 }
3792#define IOCSRWR_CASE(NAME, NODE) \
3793 case Intrinsic::loongarch_##NAME: { \
3794 SDValue Op3 = Op.getOperand(3); \
3795 return Subtarget.is64Bit() \
3796 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3797 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3798 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3799 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3800 Op3); \
3801 }
3802 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3803 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3804 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3805#undef IOCSRWR_CASE
3806 case Intrinsic::loongarch_iocsrwr_d: {
3807 return !Subtarget.is64Bit()
3808 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3809 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3810 Op2,
3811 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3812 Op.getOperand(3)));
3813 }
3814#define ASRT_LE_GT_CASE(NAME) \
3815 case Intrinsic::loongarch_##NAME: { \
3816 return !Subtarget.is64Bit() \
3817 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3818 : Op; \
3819 }
3820 ASRT_LE_GT_CASE(asrtle_d)
3821 ASRT_LE_GT_CASE(asrtgt_d)
3822#undef ASRT_LE_GT_CASE
3823 case Intrinsic::loongarch_ldpte_d: {
3824 unsigned Imm = Op.getConstantOperandVal(3);
3825 return !Subtarget.is64Bit()
3826 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3827 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3828 : Op;
3829 }
3830 case Intrinsic::loongarch_lsx_vst:
3831 case Intrinsic::loongarch_lasx_xvst:
3832 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3833 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3834 : SDValue();
3835 case Intrinsic::loongarch_lasx_xvstelm_b:
3836 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3837 !isUInt<5>(Op.getConstantOperandVal(5)))
3838 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3839 : SDValue();
3840 case Intrinsic::loongarch_lsx_vstelm_b:
3841 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3842 !isUInt<4>(Op.getConstantOperandVal(5)))
3843 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3844 : SDValue();
3845 case Intrinsic::loongarch_lasx_xvstelm_h:
3846 return (!isShiftedInt<8, 1>(
3847 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3848 !isUInt<4>(Op.getConstantOperandVal(5)))
3850 Op, "argument out of range or not a multiple of 2", DAG)
3851 : SDValue();
3852 case Intrinsic::loongarch_lsx_vstelm_h:
3853 return (!isShiftedInt<8, 1>(
3854 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3855 !isUInt<3>(Op.getConstantOperandVal(5)))
3857 Op, "argument out of range or not a multiple of 2", DAG)
3858 : SDValue();
3859 case Intrinsic::loongarch_lasx_xvstelm_w:
3860 return (!isShiftedInt<8, 2>(
3861 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3862 !isUInt<3>(Op.getConstantOperandVal(5)))
3864 Op, "argument out of range or not a multiple of 4", DAG)
3865 : SDValue();
3866 case Intrinsic::loongarch_lsx_vstelm_w:
3867 return (!isShiftedInt<8, 2>(
3868 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3869 !isUInt<2>(Op.getConstantOperandVal(5)))
3871 Op, "argument out of range or not a multiple of 4", DAG)
3872 : SDValue();
3873 case Intrinsic::loongarch_lasx_xvstelm_d:
3874 return (!isShiftedInt<8, 3>(
3875 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3876 !isUInt<2>(Op.getConstantOperandVal(5)))
3878 Op, "argument out of range or not a multiple of 8", DAG)
3879 : SDValue();
3880 case Intrinsic::loongarch_lsx_vstelm_d:
3881 return (!isShiftedInt<8, 3>(
3882 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3883 !isUInt<1>(Op.getConstantOperandVal(5)))
3885 Op, "argument out of range or not a multiple of 8", DAG)
3886 : SDValue();
3887 }
3888}
3889
3890SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3891 SelectionDAG &DAG) const {
3892 SDLoc DL(Op);
3893 SDValue Lo = Op.getOperand(0);
3894 SDValue Hi = Op.getOperand(1);
3895 SDValue Shamt = Op.getOperand(2);
3896 EVT VT = Lo.getValueType();
3897
3898 // if Shamt-GRLen < 0: // Shamt < GRLen
3899 // Lo = Lo << Shamt
3900 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3901 // else:
3902 // Lo = 0
3903 // Hi = Lo << (Shamt-GRLen)
3904
3905 SDValue Zero = DAG.getConstant(0, DL, VT);
3906 SDValue One = DAG.getConstant(1, DL, VT);
3907 SDValue MinusGRLen =
3908 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3909 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3910 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3911 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3912
3913 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3914 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3915 SDValue ShiftRightLo =
3916 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
3917 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3918 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3919 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
3920
3921 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3922
3923 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3924 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3925
3926 SDValue Parts[2] = {Lo, Hi};
3927 return DAG.getMergeValues(Parts, DL);
3928}
3929
3930SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3931 SelectionDAG &DAG,
3932 bool IsSRA) const {
3933 SDLoc DL(Op);
3934 SDValue Lo = Op.getOperand(0);
3935 SDValue Hi = Op.getOperand(1);
3936 SDValue Shamt = Op.getOperand(2);
3937 EVT VT = Lo.getValueType();
3938
3939 // SRA expansion:
3940 // if Shamt-GRLen < 0: // Shamt < GRLen
3941 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3942 // Hi = Hi >>s Shamt
3943 // else:
3944 // Lo = Hi >>s (Shamt-GRLen);
3945 // Hi = Hi >>s (GRLen-1)
3946 //
3947 // SRL expansion:
3948 // if Shamt-GRLen < 0: // Shamt < GRLen
3949 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3950 // Hi = Hi >>u Shamt
3951 // else:
3952 // Lo = Hi >>u (Shamt-GRLen);
3953 // Hi = 0;
3954
3955 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3956
3957 SDValue Zero = DAG.getConstant(0, DL, VT);
3958 SDValue One = DAG.getConstant(1, DL, VT);
3959 SDValue MinusGRLen =
3960 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3961 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3962 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3963 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3964
3965 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3966 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3967 SDValue ShiftLeftHi =
3968 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
3969 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3970 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3971 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
3972 SDValue HiFalse =
3973 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
3974
3975 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3976
3977 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3978 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3979
3980 SDValue Parts[2] = {Lo, Hi};
3981 return DAG.getMergeValues(Parts, DL);
3982}
3983
3984// Returns the opcode of the target-specific SDNode that implements the 32-bit
3985// form of the given Opcode.
3987 switch (Opcode) {
3988 default:
3989 llvm_unreachable("Unexpected opcode");
3990 case ISD::SDIV:
3991 return LoongArchISD::DIV_W;
3992 case ISD::UDIV:
3993 return LoongArchISD::DIV_WU;
3994 case ISD::SREM:
3995 return LoongArchISD::MOD_W;
3996 case ISD::UREM:
3997 return LoongArchISD::MOD_WU;
3998 case ISD::SHL:
3999 return LoongArchISD::SLL_W;
4000 case ISD::SRA:
4001 return LoongArchISD::SRA_W;
4002 case ISD::SRL:
4003 return LoongArchISD::SRL_W;
4004 case ISD::ROTL:
4005 case ISD::ROTR:
4006 return LoongArchISD::ROTR_W;
4007 case ISD::CTTZ:
4008 return LoongArchISD::CTZ_W;
4009 case ISD::CTLZ:
4010 return LoongArchISD::CLZ_W;
4011 }
4012}
4013
4014// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4015// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4016// otherwise be promoted to i64, making it difficult to select the
4017// SLL_W/.../*W later one because the fact the operation was originally of
4018// type i8/i16/i32 is lost.
4020 unsigned ExtOpc = ISD::ANY_EXTEND) {
4021 SDLoc DL(N);
4022 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4023 SDValue NewOp0, NewRes;
4024
4025 switch (NumOp) {
4026 default:
4027 llvm_unreachable("Unexpected NumOp");
4028 case 1: {
4029 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4030 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4031 break;
4032 }
4033 case 2: {
4034 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4035 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4036 if (N->getOpcode() == ISD::ROTL) {
4037 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4038 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4039 }
4040 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4041 break;
4042 }
4043 // TODO:Handle more NumOp.
4044 }
4045
4046 // ReplaceNodeResults requires we maintain the same type for the return
4047 // value.
4048 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4049}
4050
4051// Converts the given 32-bit operation to a i64 operation with signed extension
4052// semantic to reduce the signed extension instructions.
4054 SDLoc DL(N);
4055 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4056 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4057 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4058 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4059 DAG.getValueType(MVT::i32));
4060 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4061}
4062
4063// Helper function that emits error message for intrinsics with/without chain
4064// and return a UNDEF or and the chain as the results.
4067 StringRef ErrorMsg, bool WithChain = true) {
4068 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4069 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4070 if (!WithChain)
4071 return;
4072 Results.push_back(N->getOperand(0));
4073}
4074
4075template <unsigned N>
4076static void
4078 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4079 unsigned ResOp) {
4080 const StringRef ErrorMsgOOR = "argument out of range";
4081 unsigned Imm = Node->getConstantOperandVal(2);
4082 if (!isUInt<N>(Imm)) {
4084 /*WithChain=*/false);
4085 return;
4086 }
4087 SDLoc DL(Node);
4088 SDValue Vec = Node->getOperand(1);
4089
4090 SDValue PickElt =
4091 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4092 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4094 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4095 PickElt.getValue(0)));
4096}
4097
4100 SelectionDAG &DAG,
4101 const LoongArchSubtarget &Subtarget,
4102 unsigned ResOp) {
4103 SDLoc DL(N);
4104 SDValue Vec = N->getOperand(1);
4105
4106 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4107 Results.push_back(
4108 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4109}
4110
4111static void
4113 SelectionDAG &DAG,
4114 const LoongArchSubtarget &Subtarget) {
4115 switch (N->getConstantOperandVal(0)) {
4116 default:
4117 llvm_unreachable("Unexpected Intrinsic.");
4118 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4119 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4121 break;
4122 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4123 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4124 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4126 break;
4127 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4128 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4130 break;
4131 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4132 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4134 break;
4135 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4136 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4137 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4139 break;
4140 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4141 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4143 break;
4144 case Intrinsic::loongarch_lsx_bz_b:
4145 case Intrinsic::loongarch_lsx_bz_h:
4146 case Intrinsic::loongarch_lsx_bz_w:
4147 case Intrinsic::loongarch_lsx_bz_d:
4148 case Intrinsic::loongarch_lasx_xbz_b:
4149 case Intrinsic::loongarch_lasx_xbz_h:
4150 case Intrinsic::loongarch_lasx_xbz_w:
4151 case Intrinsic::loongarch_lasx_xbz_d:
4152 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4154 break;
4155 case Intrinsic::loongarch_lsx_bz_v:
4156 case Intrinsic::loongarch_lasx_xbz_v:
4157 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4159 break;
4160 case Intrinsic::loongarch_lsx_bnz_b:
4161 case Intrinsic::loongarch_lsx_bnz_h:
4162 case Intrinsic::loongarch_lsx_bnz_w:
4163 case Intrinsic::loongarch_lsx_bnz_d:
4164 case Intrinsic::loongarch_lasx_xbnz_b:
4165 case Intrinsic::loongarch_lasx_xbnz_h:
4166 case Intrinsic::loongarch_lasx_xbnz_w:
4167 case Intrinsic::loongarch_lasx_xbnz_d:
4168 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4170 break;
4171 case Intrinsic::loongarch_lsx_bnz_v:
4172 case Intrinsic::loongarch_lasx_xbnz_v:
4173 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4175 break;
4176 }
4177}
4178
4181 SelectionDAG &DAG) {
4182 assert(N->getValueType(0) == MVT::i128 &&
4183 "AtomicCmpSwap on types less than 128 should be legal");
4184 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4185
4186 unsigned Opcode;
4187 switch (MemOp->getMergedOrdering()) {
4191 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4192 break;
4195 Opcode = LoongArch::PseudoCmpXchg128;
4196 break;
4197 default:
4198 llvm_unreachable("Unexpected ordering!");
4199 }
4200
4201 SDLoc DL(N);
4202 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4203 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4204 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4205 NewVal.first, NewVal.second, N->getOperand(0)};
4206
4207 SDNode *CmpSwap = DAG.getMachineNode(
4208 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4209 Ops);
4210 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4211 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4212 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4213 Results.push_back(SDValue(CmpSwap, 3));
4214}
4215
4218 SDLoc DL(N);
4219 EVT VT = N->getValueType(0);
4220 switch (N->getOpcode()) {
4221 default:
4222 llvm_unreachable("Don't know how to legalize this operation");
4223 case ISD::ADD:
4224 case ISD::SUB:
4225 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4226 "Unexpected custom legalisation");
4227 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4228 break;
4229 case ISD::SDIV:
4230 case ISD::UDIV:
4231 case ISD::SREM:
4232 case ISD::UREM:
4233 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4234 "Unexpected custom legalisation");
4235 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4236 Subtarget.hasDiv32() && VT == MVT::i32
4238 : ISD::SIGN_EXTEND));
4239 break;
4240 case ISD::SHL:
4241 case ISD::SRA:
4242 case ISD::SRL:
4243 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4244 "Unexpected custom legalisation");
4245 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4246 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4247 break;
4248 }
4249 break;
4250 case ISD::ROTL:
4251 case ISD::ROTR:
4252 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4253 "Unexpected custom legalisation");
4254 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4255 break;
4256 case ISD::FP_TO_SINT: {
4257 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4258 "Unexpected custom legalisation");
4259 SDValue Src = N->getOperand(0);
4260 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4261 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4263 if (!isTypeLegal(Src.getValueType()))
4264 return;
4265 if (Src.getValueType() == MVT::f16)
4266 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4267 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4268 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4269 return;
4270 }
4271 // If the FP type needs to be softened, emit a library call using the 'si'
4272 // version. If we left it to default legalization we'd end up with 'di'.
4273 RTLIB::Libcall LC;
4274 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4275 MakeLibCallOptions CallOptions;
4276 EVT OpVT = Src.getValueType();
4277 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4278 SDValue Chain = SDValue();
4279 SDValue Result;
4280 std::tie(Result, Chain) =
4281 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4282 Results.push_back(Result);
4283 break;
4284 }
4285 case ISD::BITCAST: {
4286 SDValue Src = N->getOperand(0);
4287 EVT SrcVT = Src.getValueType();
4288 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4289 Subtarget.hasBasicF()) {
4290 SDValue Dst =
4291 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4292 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4293 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4295 DAG.getVTList(MVT::i32, MVT::i32), Src);
4296 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4297 NewReg.getValue(0), NewReg.getValue(1));
4298 Results.push_back(RetReg);
4299 }
4300 break;
4301 }
4302 case ISD::FP_TO_UINT: {
4303 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4304 "Unexpected custom legalisation");
4305 auto &TLI = DAG.getTargetLoweringInfo();
4306 SDValue Tmp1, Tmp2;
4307 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4308 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4309 break;
4310 }
4311 case ISD::BSWAP: {
4312 SDValue Src = N->getOperand(0);
4313 assert((VT == MVT::i16 || VT == MVT::i32) &&
4314 "Unexpected custom legalization");
4315 MVT GRLenVT = Subtarget.getGRLenVT();
4316 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4317 SDValue Tmp;
4318 switch (VT.getSizeInBits()) {
4319 default:
4320 llvm_unreachable("Unexpected operand width");
4321 case 16:
4322 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4323 break;
4324 case 32:
4325 // Only LA64 will get to here due to the size mismatch between VT and
4326 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4327 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4328 break;
4329 }
4330 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4331 break;
4332 }
4333 case ISD::BITREVERSE: {
4334 SDValue Src = N->getOperand(0);
4335 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4336 "Unexpected custom legalization");
4337 MVT GRLenVT = Subtarget.getGRLenVT();
4338 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4339 SDValue Tmp;
4340 switch (VT.getSizeInBits()) {
4341 default:
4342 llvm_unreachable("Unexpected operand width");
4343 case 8:
4344 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4345 break;
4346 case 32:
4347 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4348 break;
4349 }
4350 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4351 break;
4352 }
4353 case ISD::CTLZ:
4354 case ISD::CTTZ: {
4355 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4356 "Unexpected custom legalisation");
4357 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4358 break;
4359 }
4361 SDValue Chain = N->getOperand(0);
4362 SDValue Op2 = N->getOperand(2);
4363 MVT GRLenVT = Subtarget.getGRLenVT();
4364 const StringRef ErrorMsgOOR = "argument out of range";
4365 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4366 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4367
4368 switch (N->getConstantOperandVal(1)) {
4369 default:
4370 llvm_unreachable("Unexpected Intrinsic.");
4371 case Intrinsic::loongarch_movfcsr2gr: {
4372 if (!Subtarget.hasBasicF()) {
4373 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4374 return;
4375 }
4376 unsigned Imm = Op2->getAsZExtVal();
4377 if (!isUInt<2>(Imm)) {
4378 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4379 return;
4380 }
4381 SDValue MOVFCSR2GRResults = DAG.getNode(
4382 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4383 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4384 Results.push_back(
4385 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4386 Results.push_back(MOVFCSR2GRResults.getValue(1));
4387 break;
4388 }
4389#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4390 case Intrinsic::loongarch_##NAME: { \
4391 SDValue NODE = DAG.getNode( \
4392 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4393 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4394 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4395 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4396 Results.push_back(NODE.getValue(1)); \
4397 break; \
4398 }
4399 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4400 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4401 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4402 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4403 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4404 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4405#undef CRC_CASE_EXT_BINARYOP
4406
4407#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4408 case Intrinsic::loongarch_##NAME: { \
4409 SDValue NODE = DAG.getNode( \
4410 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4411 {Chain, Op2, \
4412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4413 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4414 Results.push_back(NODE.getValue(1)); \
4415 break; \
4416 }
4417 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4418 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4419#undef CRC_CASE_EXT_UNARYOP
4420#define CSR_CASE(ID) \
4421 case Intrinsic::loongarch_##ID: { \
4422 if (!Subtarget.is64Bit()) \
4423 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4424 break; \
4425 }
4426 CSR_CASE(csrrd_d);
4427 CSR_CASE(csrwr_d);
4428 CSR_CASE(csrxchg_d);
4429 CSR_CASE(iocsrrd_d);
4430#undef CSR_CASE
4431 case Intrinsic::loongarch_csrrd_w: {
4432 unsigned Imm = Op2->getAsZExtVal();
4433 if (!isUInt<14>(Imm)) {
4434 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4435 return;
4436 }
4437 SDValue CSRRDResults =
4438 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4439 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4440 Results.push_back(
4441 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4442 Results.push_back(CSRRDResults.getValue(1));
4443 break;
4444 }
4445 case Intrinsic::loongarch_csrwr_w: {
4446 unsigned Imm = N->getConstantOperandVal(3);
4447 if (!isUInt<14>(Imm)) {
4448 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4449 return;
4450 }
4451 SDValue CSRWRResults =
4452 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4453 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4454 DAG.getConstant(Imm, DL, GRLenVT)});
4455 Results.push_back(
4456 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4457 Results.push_back(CSRWRResults.getValue(1));
4458 break;
4459 }
4460 case Intrinsic::loongarch_csrxchg_w: {
4461 unsigned Imm = N->getConstantOperandVal(4);
4462 if (!isUInt<14>(Imm)) {
4463 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4464 return;
4465 }
4466 SDValue CSRXCHGResults = DAG.getNode(
4467 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4468 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4470 DAG.getConstant(Imm, DL, GRLenVT)});
4471 Results.push_back(
4472 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4473 Results.push_back(CSRXCHGResults.getValue(1));
4474 break;
4475 }
4476#define IOCSRRD_CASE(NAME, NODE) \
4477 case Intrinsic::loongarch_##NAME: { \
4478 SDValue IOCSRRDResults = \
4479 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4480 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4481 Results.push_back( \
4482 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4483 Results.push_back(IOCSRRDResults.getValue(1)); \
4484 break; \
4485 }
4486 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4487 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4488 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4489#undef IOCSRRD_CASE
4490 case Intrinsic::loongarch_cpucfg: {
4491 SDValue CPUCFGResults =
4492 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4493 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4494 Results.push_back(
4495 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4496 Results.push_back(CPUCFGResults.getValue(1));
4497 break;
4498 }
4499 case Intrinsic::loongarch_lddir_d: {
4500 if (!Subtarget.is64Bit()) {
4501 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4502 return;
4503 }
4504 break;
4505 }
4506 }
4507 break;
4508 }
4509 case ISD::READ_REGISTER: {
4510 if (Subtarget.is64Bit())
4511 DAG.getContext()->emitError(
4512 "On LA64, only 64-bit registers can be read.");
4513 else
4514 DAG.getContext()->emitError(
4515 "On LA32, only 32-bit registers can be read.");
4516 Results.push_back(DAG.getUNDEF(VT));
4517 Results.push_back(N->getOperand(0));
4518 break;
4519 }
4521 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4522 break;
4523 }
4524 case ISD::LROUND: {
4525 SDValue Op0 = N->getOperand(0);
4526 EVT OpVT = Op0.getValueType();
4527 RTLIB::Libcall LC =
4528 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4529 MakeLibCallOptions CallOptions;
4530 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4531 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4532 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4533 Results.push_back(Result);
4534 break;
4535 }
4536 case ISD::ATOMIC_CMP_SWAP: {
4538 break;
4539 }
4540 case ISD::TRUNCATE: {
4541 MVT VT = N->getSimpleValueType(0);
4542 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4543 return;
4544
4545 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4546 SDValue In = N->getOperand(0);
4547 EVT InVT = In.getValueType();
4548 EVT InEltVT = InVT.getVectorElementType();
4549 EVT EltVT = VT.getVectorElementType();
4550 unsigned MinElts = VT.getVectorNumElements();
4551 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4552 unsigned InBits = InVT.getSizeInBits();
4553
4554 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4555 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4556 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4557 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4558 for (unsigned I = 0; I < MinElts; ++I)
4559 TruncMask[I] = Scale * I;
4560
4561 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4562 MVT SVT = In.getSimpleValueType().getScalarType();
4563 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4564 SDValue WidenIn =
4565 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4566 DAG.getVectorIdxConstant(0, DL));
4567 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4568 "Illegal vector type in truncation");
4569 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4570 Results.push_back(
4571 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4572 return;
4573 }
4574 }
4575
4576 break;
4577 }
4578 }
4579}
4580
4583 const LoongArchSubtarget &Subtarget) {
4584 if (DCI.isBeforeLegalizeOps())
4585 return SDValue();
4586
4587 SDValue FirstOperand = N->getOperand(0);
4588 SDValue SecondOperand = N->getOperand(1);
4589 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4590 EVT ValTy = N->getValueType(0);
4591 SDLoc DL(N);
4592 uint64_t lsb, msb;
4593 unsigned SMIdx, SMLen;
4594 ConstantSDNode *CN;
4595 SDValue NewOperand;
4596 MVT GRLenVT = Subtarget.getGRLenVT();
4597
4598 // BSTRPICK requires the 32S feature.
4599 if (!Subtarget.has32S())
4600 return SDValue();
4601
4602 // Op's second operand must be a shifted mask.
4603 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4604 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4605 return SDValue();
4606
4607 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4608 // Pattern match BSTRPICK.
4609 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4610 // => BSTRPICK $dst, $src, msb, lsb
4611 // where msb = lsb + len - 1
4612
4613 // The second operand of the shift must be an immediate.
4614 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4615 return SDValue();
4616
4617 lsb = CN->getZExtValue();
4618
4619 // Return if the shifted mask does not start at bit 0 or the sum of its
4620 // length and lsb exceeds the word's size.
4621 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4622 return SDValue();
4623
4624 NewOperand = FirstOperand.getOperand(0);
4625 } else {
4626 // Pattern match BSTRPICK.
4627 // $dst = and $src, (2**len- 1) , if len > 12
4628 // => BSTRPICK $dst, $src, msb, lsb
4629 // where lsb = 0 and msb = len - 1
4630
4631 // If the mask is <= 0xfff, andi can be used instead.
4632 if (CN->getZExtValue() <= 0xfff)
4633 return SDValue();
4634
4635 // Return if the MSB exceeds.
4636 if (SMIdx + SMLen > ValTy.getSizeInBits())
4637 return SDValue();
4638
4639 if (SMIdx > 0) {
4640 // Omit if the constant has more than 2 uses. This a conservative
4641 // decision. Whether it is a win depends on the HW microarchitecture.
4642 // However it should always be better for 1 and 2 uses.
4643 if (CN->use_size() > 2)
4644 return SDValue();
4645 // Return if the constant can be composed by a single LU12I.W.
4646 if ((CN->getZExtValue() & 0xfff) == 0)
4647 return SDValue();
4648 // Return if the constand can be composed by a single ADDI with
4649 // the zero register.
4650 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4651 return SDValue();
4652 }
4653
4654 lsb = SMIdx;
4655 NewOperand = FirstOperand;
4656 }
4657
4658 msb = lsb + SMLen - 1;
4659 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4660 DAG.getConstant(msb, DL, GRLenVT),
4661 DAG.getConstant(lsb, DL, GRLenVT));
4662 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4663 return NR0;
4664 // Try to optimize to
4665 // bstrpick $Rd, $Rs, msb, lsb
4666 // slli $Rd, $Rd, lsb
4667 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4668 DAG.getConstant(lsb, DL, GRLenVT));
4669}
4670
4673 const LoongArchSubtarget &Subtarget) {
4674 // BSTRPICK requires the 32S feature.
4675 if (!Subtarget.has32S())
4676 return SDValue();
4677
4678 if (DCI.isBeforeLegalizeOps())
4679 return SDValue();
4680
4681 // $dst = srl (and $src, Mask), Shamt
4682 // =>
4683 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4684 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4685 //
4686
4687 SDValue FirstOperand = N->getOperand(0);
4688 ConstantSDNode *CN;
4689 EVT ValTy = N->getValueType(0);
4690 SDLoc DL(N);
4691 MVT GRLenVT = Subtarget.getGRLenVT();
4692 unsigned MaskIdx, MaskLen;
4693 uint64_t Shamt;
4694
4695 // The first operand must be an AND and the second operand of the AND must be
4696 // a shifted mask.
4697 if (FirstOperand.getOpcode() != ISD::AND ||
4698 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4699 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4700 return SDValue();
4701
4702 // The second operand (shift amount) must be an immediate.
4703 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4704 return SDValue();
4705
4706 Shamt = CN->getZExtValue();
4707 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4708 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4709 FirstOperand->getOperand(0),
4710 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4711 DAG.getConstant(Shamt, DL, GRLenVT));
4712
4713 return SDValue();
4714}
4715
4716// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4717// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4718static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4719 unsigned Depth) {
4720 // Limit recursion.
4722 return false;
4723 switch (Src.getOpcode()) {
4724 case ISD::SETCC:
4725 case ISD::TRUNCATE:
4726 return Src.getOperand(0).getValueSizeInBits() == Size;
4727 case ISD::FREEZE:
4728 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4729 case ISD::AND:
4730 case ISD::XOR:
4731 case ISD::OR:
4732 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4733 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4734 case ISD::SELECT:
4735 case ISD::VSELECT:
4736 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4737 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4738 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4739 case ISD::BUILD_VECTOR:
4740 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4741 ISD::isBuildVectorAllOnes(Src.getNode());
4742 }
4743 return false;
4744}
4745
4746// Helper to push sign extension of vXi1 SETCC result through bitops.
4748 SDValue Src, const SDLoc &DL) {
4749 switch (Src.getOpcode()) {
4750 case ISD::SETCC:
4751 case ISD::FREEZE:
4752 case ISD::TRUNCATE:
4753 case ISD::BUILD_VECTOR:
4754 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4755 case ISD::AND:
4756 case ISD::XOR:
4757 case ISD::OR:
4758 return DAG.getNode(
4759 Src.getOpcode(), DL, SExtVT,
4760 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4761 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4762 case ISD::SELECT:
4763 case ISD::VSELECT:
4764 return DAG.getSelect(
4765 DL, SExtVT, Src.getOperand(0),
4766 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4767 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4768 }
4769 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4770}
4771
4772static SDValue
4775 const LoongArchSubtarget &Subtarget) {
4776 SDLoc DL(N);
4777 EVT VT = N->getValueType(0);
4778 SDValue Src = N->getOperand(0);
4779 EVT SrcVT = Src.getValueType();
4780
4781 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4782 return SDValue();
4783
4784 bool UseLASX;
4785 unsigned Opc = ISD::DELETED_NODE;
4786 EVT CmpVT = Src.getOperand(0).getValueType();
4787 EVT EltVT = CmpVT.getVectorElementType();
4788
4789 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4790 UseLASX = false;
4791 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4792 CmpVT.getSizeInBits() == 256)
4793 UseLASX = true;
4794 else
4795 return SDValue();
4796
4797 SDValue SrcN1 = Src.getOperand(1);
4798 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4799 default:
4800 break;
4801 case ISD::SETEQ:
4802 // x == 0 => not (vmsknez.b x)
4803 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4805 break;
4806 case ISD::SETGT:
4807 // x > -1 => vmskgez.b x
4808 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4810 break;
4811 case ISD::SETGE:
4812 // x >= 0 => vmskgez.b x
4813 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4815 break;
4816 case ISD::SETLT:
4817 // x < 0 => vmskltz.{b,h,w,d} x
4818 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4819 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4820 EltVT == MVT::i64))
4822 break;
4823 case ISD::SETLE:
4824 // x <= -1 => vmskltz.{b,h,w,d} x
4825 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4826 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4827 EltVT == MVT::i64))
4829 break;
4830 case ISD::SETNE:
4831 // x != 0 => vmsknez.b x
4832 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4834 break;
4835 }
4836
4837 if (Opc == ISD::DELETED_NODE)
4838 return SDValue();
4839
4840 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4842 V = DAG.getZExtOrTrunc(V, DL, T);
4843 return DAG.getBitcast(VT, V);
4844}
4845
4848 const LoongArchSubtarget &Subtarget) {
4849 SDLoc DL(N);
4850 EVT VT = N->getValueType(0);
4851 SDValue Src = N->getOperand(0);
4852 EVT SrcVT = Src.getValueType();
4853
4854 if (!DCI.isBeforeLegalizeOps())
4855 return SDValue();
4856
4857 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4858 return SDValue();
4859
4860 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4861 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4862 if (Res)
4863 return Res;
4864
4865 // Generate vXi1 using [X]VMSKLTZ
4866 MVT SExtVT;
4867 unsigned Opc;
4868 bool UseLASX = false;
4869 bool PropagateSExt = false;
4870
4871 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4872 EVT CmpVT = Src.getOperand(0).getValueType();
4873 if (CmpVT.getSizeInBits() > 256)
4874 return SDValue();
4875 }
4876
4877 switch (SrcVT.getSimpleVT().SimpleTy) {
4878 default:
4879 return SDValue();
4880 case MVT::v2i1:
4881 SExtVT = MVT::v2i64;
4882 break;
4883 case MVT::v4i1:
4884 SExtVT = MVT::v4i32;
4885 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4886 SExtVT = MVT::v4i64;
4887 UseLASX = true;
4888 PropagateSExt = true;
4889 }
4890 break;
4891 case MVT::v8i1:
4892 SExtVT = MVT::v8i16;
4893 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4894 SExtVT = MVT::v8i32;
4895 UseLASX = true;
4896 PropagateSExt = true;
4897 }
4898 break;
4899 case MVT::v16i1:
4900 SExtVT = MVT::v16i8;
4901 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4902 SExtVT = MVT::v16i16;
4903 UseLASX = true;
4904 PropagateSExt = true;
4905 }
4906 break;
4907 case MVT::v32i1:
4908 SExtVT = MVT::v32i8;
4909 UseLASX = true;
4910 break;
4911 };
4912 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4913 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4914
4915 SDValue V;
4916 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
4917 if (Src.getSimpleValueType() == MVT::v32i8) {
4918 SDValue Lo, Hi;
4919 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
4920 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
4921 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
4922 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
4923 DAG.getConstant(16, DL, MVT::i8));
4924 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
4925 } else if (UseLASX) {
4926 return SDValue();
4927 }
4928 }
4929
4930 if (!V) {
4932 V = DAG.getNode(Opc, DL, MVT::i64, Src);
4933 }
4934
4936 V = DAG.getZExtOrTrunc(V, DL, T);
4937 return DAG.getBitcast(VT, V);
4938}
4939
4942 const LoongArchSubtarget &Subtarget) {
4943 MVT GRLenVT = Subtarget.getGRLenVT();
4944 EVT ValTy = N->getValueType(0);
4945 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4946 ConstantSDNode *CN0, *CN1;
4947 SDLoc DL(N);
4948 unsigned ValBits = ValTy.getSizeInBits();
4949 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4950 unsigned Shamt;
4951 bool SwapAndRetried = false;
4952
4953 // BSTRPICK requires the 32S feature.
4954 if (!Subtarget.has32S())
4955 return SDValue();
4956
4957 if (DCI.isBeforeLegalizeOps())
4958 return SDValue();
4959
4960 if (ValBits != 32 && ValBits != 64)
4961 return SDValue();
4962
4963Retry:
4964 // 1st pattern to match BSTRINS:
4965 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
4966 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
4967 // =>
4968 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4969 if (N0.getOpcode() == ISD::AND &&
4970 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4971 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4972 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
4973 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4974 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4975 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4976 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4977 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4978 (MaskIdx0 + MaskLen0 <= ValBits)) {
4979 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4980 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4981 N1.getOperand(0).getOperand(0),
4982 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4983 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4984 }
4985
4986 // 2nd pattern to match BSTRINS:
4987 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
4988 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
4989 // =>
4990 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4991 if (N0.getOpcode() == ISD::AND &&
4992 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4993 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4994 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4995 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4996 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4997 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4998 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4999 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5000 (MaskIdx0 + MaskLen0 <= ValBits)) {
5001 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5002 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5003 N1.getOperand(0).getOperand(0),
5004 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5005 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5006 }
5007
5008 // 3rd pattern to match BSTRINS:
5009 // R = or (and X, mask0), (and Y, mask1)
5010 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5011 // =>
5012 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5013 // where msb = lsb + size - 1
5014 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5015 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5016 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5017 (MaskIdx0 + MaskLen0 <= 64) &&
5018 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5019 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5020 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5021 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5022 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5023 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5024 DAG.getConstant(ValBits == 32
5025 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5026 : (MaskIdx0 + MaskLen0 - 1),
5027 DL, GRLenVT),
5028 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5029 }
5030
5031 // 4th pattern to match BSTRINS:
5032 // R = or (and X, mask), (shl Y, shamt)
5033 // where mask = (2**shamt - 1)
5034 // =>
5035 // R = BSTRINS X, Y, ValBits - 1, shamt
5036 // where ValBits = 32 or 64
5037 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5038 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5039 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5040 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5041 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5042 (MaskIdx0 + MaskLen0 <= ValBits)) {
5043 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5044 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5045 N1.getOperand(0),
5046 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5047 DAG.getConstant(Shamt, DL, GRLenVT));
5048 }
5049
5050 // 5th pattern to match BSTRINS:
5051 // R = or (and X, mask), const
5052 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5053 // =>
5054 // R = BSTRINS X, (const >> lsb), msb, lsb
5055 // where msb = lsb + size - 1
5056 if (N0.getOpcode() == ISD::AND &&
5057 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5058 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5059 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5060 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5061 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5062 return DAG.getNode(
5063 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5064 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5065 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5066 : (MaskIdx0 + MaskLen0 - 1),
5067 DL, GRLenVT),
5068 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5069 }
5070
5071 // 6th pattern.
5072 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5073 // by the incoming bits are known to be zero.
5074 // =>
5075 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5076 //
5077 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5078 // pattern is more common than the 1st. So we put the 1st before the 6th in
5079 // order to match as many nodes as possible.
5080 ConstantSDNode *CNMask, *CNShamt;
5081 unsigned MaskIdx, MaskLen;
5082 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5083 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5084 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5085 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5086 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5087 Shamt = CNShamt->getZExtValue();
5088 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5089 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5090 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5091 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5092 N1.getOperand(0).getOperand(0),
5093 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5094 DAG.getConstant(Shamt, DL, GRLenVT));
5095 }
5096 }
5097
5098 // 7th pattern.
5099 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5100 // overwritten by the incoming bits are known to be zero.
5101 // =>
5102 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5103 //
5104 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5105 // before the 7th in order to match as many nodes as possible.
5106 if (N1.getOpcode() == ISD::AND &&
5107 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5108 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5109 N1.getOperand(0).getOpcode() == ISD::SHL &&
5110 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5111 CNShamt->getZExtValue() == MaskIdx) {
5112 APInt ShMask(ValBits, CNMask->getZExtValue());
5113 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5114 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5115 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5116 N1.getOperand(0).getOperand(0),
5117 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5118 DAG.getConstant(MaskIdx, DL, GRLenVT));
5119 }
5120 }
5121
5122 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5123 if (!SwapAndRetried) {
5124 std::swap(N0, N1);
5125 SwapAndRetried = true;
5126 goto Retry;
5127 }
5128
5129 SwapAndRetried = false;
5130Retry2:
5131 // 8th pattern.
5132 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5133 // the incoming bits are known to be zero.
5134 // =>
5135 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5136 //
5137 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5138 // we put it here in order to match as many nodes as possible or generate less
5139 // instructions.
5140 if (N1.getOpcode() == ISD::AND &&
5141 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5142 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5143 APInt ShMask(ValBits, CNMask->getZExtValue());
5144 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5145 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5146 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5147 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5148 N1->getOperand(0),
5149 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5150 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5151 DAG.getConstant(MaskIdx, DL, GRLenVT));
5152 }
5153 }
5154 // Swap N0/N1 and retry.
5155 if (!SwapAndRetried) {
5156 std::swap(N0, N1);
5157 SwapAndRetried = true;
5158 goto Retry2;
5159 }
5160
5161 return SDValue();
5162}
5163
5164static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5165 ExtType = ISD::NON_EXTLOAD;
5166
5167 switch (V.getNode()->getOpcode()) {
5168 case ISD::LOAD: {
5169 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5170 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5171 (LoadNode->getMemoryVT() == MVT::i16)) {
5172 ExtType = LoadNode->getExtensionType();
5173 return true;
5174 }
5175 return false;
5176 }
5177 case ISD::AssertSext: {
5178 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5179 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5180 ExtType = ISD::SEXTLOAD;
5181 return true;
5182 }
5183 return false;
5184 }
5185 case ISD::AssertZext: {
5186 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5187 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5188 ExtType = ISD::ZEXTLOAD;
5189 return true;
5190 }
5191 return false;
5192 }
5193 default:
5194 return false;
5195 }
5196
5197 return false;
5198}
5199
5200// Eliminate redundant truncation and zero-extension nodes.
5201// * Case 1:
5202// +------------+ +------------+ +------------+
5203// | Input1 | | Input2 | | CC |
5204// +------------+ +------------+ +------------+
5205// | | |
5206// V V +----+
5207// +------------+ +------------+ |
5208// | TRUNCATE | | TRUNCATE | |
5209// +------------+ +------------+ |
5210// | | |
5211// V V |
5212// +------------+ +------------+ |
5213// | ZERO_EXT | | ZERO_EXT | |
5214// +------------+ +------------+ |
5215// | | |
5216// | +-------------+ |
5217// V V | |
5218// +----------------+ | |
5219// | AND | | |
5220// +----------------+ | |
5221// | | |
5222// +---------------+ | |
5223// | | |
5224// V V V
5225// +-------------+
5226// | CMP |
5227// +-------------+
5228// * Case 2:
5229// +------------+ +------------+ +-------------+ +------------+ +------------+
5230// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5231// +------------+ +------------+ +-------------+ +------------+ +------------+
5232// | | | | |
5233// V | | | |
5234// +------------+ | | | |
5235// | XOR |<---------------------+ | |
5236// +------------+ | | |
5237// | | | |
5238// V V +---------------+ |
5239// +------------+ +------------+ | |
5240// | TRUNCATE | | TRUNCATE | | +-------------------------+
5241// +------------+ +------------+ | |
5242// | | | |
5243// V V | |
5244// +------------+ +------------+ | |
5245// | ZERO_EXT | | ZERO_EXT | | |
5246// +------------+ +------------+ | |
5247// | | | |
5248// V V | |
5249// +----------------+ | |
5250// | AND | | |
5251// +----------------+ | |
5252// | | |
5253// +---------------+ | |
5254// | | |
5255// V V V
5256// +-------------+
5257// | CMP |
5258// +-------------+
5261 const LoongArchSubtarget &Subtarget) {
5262 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5263
5264 SDNode *AndNode = N->getOperand(0).getNode();
5265 if (AndNode->getOpcode() != ISD::AND)
5266 return SDValue();
5267
5268 SDValue AndInputValue2 = AndNode->getOperand(1);
5269 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5270 return SDValue();
5271
5272 SDValue CmpInputValue = N->getOperand(1);
5273 SDValue AndInputValue1 = AndNode->getOperand(0);
5274 if (AndInputValue1.getOpcode() == ISD::XOR) {
5275 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5276 return SDValue();
5277 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5278 if (!CN || CN->getSExtValue() != -1)
5279 return SDValue();
5280 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5281 if (!CN || CN->getSExtValue() != 0)
5282 return SDValue();
5283 AndInputValue1 = AndInputValue1.getOperand(0);
5284 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5285 return SDValue();
5286 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5287 if (AndInputValue2 != CmpInputValue)
5288 return SDValue();
5289 } else {
5290 return SDValue();
5291 }
5292
5293 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5294 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5295 return SDValue();
5296
5297 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5298 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5299 return SDValue();
5300
5301 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5302 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5303 ISD::LoadExtType ExtType1;
5304 ISD::LoadExtType ExtType2;
5305
5306 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5307 !checkValueWidth(TruncInputValue2, ExtType2))
5308 return SDValue();
5309
5310 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5311 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5312 return SDValue();
5313
5314 if ((ExtType2 != ISD::ZEXTLOAD) &&
5315 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5316 return SDValue();
5317
5318 // These truncation and zero-extension nodes are not necessary, remove them.
5319 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5320 TruncInputValue1, TruncInputValue2);
5321 SDValue NewSetCC =
5322 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5323 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5324 return SDValue(N, 0);
5325}
5326
5327// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5330 const LoongArchSubtarget &Subtarget) {
5331 if (DCI.isBeforeLegalizeOps())
5332 return SDValue();
5333
5334 SDValue Src = N->getOperand(0);
5335 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5336 return SDValue();
5337
5338 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5339 Src.getOperand(0));
5340}
5341
5342// Perform common combines for BR_CC and SELECT_CC conditions.
5343static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5344 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5345 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5346
5347 // As far as arithmetic right shift always saves the sign,
5348 // shift can be omitted.
5349 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5350 // setge (sra X, N), 0 -> setge X, 0
5351 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5352 LHS.getOpcode() == ISD::SRA) {
5353 LHS = LHS.getOperand(0);
5354 return true;
5355 }
5356
5357 if (!ISD::isIntEqualitySetCC(CCVal))
5358 return false;
5359
5360 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5361 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5362 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5363 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5364 // If we're looking for eq 0 instead of ne 0, we need to invert the
5365 // condition.
5366 bool Invert = CCVal == ISD::SETEQ;
5367 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5368 if (Invert)
5369 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5370
5371 RHS = LHS.getOperand(1);
5372 LHS = LHS.getOperand(0);
5373 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5374
5375 CC = DAG.getCondCode(CCVal);
5376 return true;
5377 }
5378
5379 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5380 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5381 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5382 SDValue LHS0 = LHS.getOperand(0);
5383 if (LHS0.getOpcode() == ISD::AND &&
5384 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5385 uint64_t Mask = LHS0.getConstantOperandVal(1);
5386 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5387 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5388 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5389 CC = DAG.getCondCode(CCVal);
5390
5391 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5392 LHS = LHS0.getOperand(0);
5393 if (ShAmt != 0)
5394 LHS =
5395 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5396 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5397 return true;
5398 }
5399 }
5400 }
5401
5402 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5403 // This can occur when legalizing some floating point comparisons.
5404 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5405 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5406 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5407 CC = DAG.getCondCode(CCVal);
5408 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5409 return true;
5410 }
5411
5412 return false;
5413}
5414
5417 const LoongArchSubtarget &Subtarget) {
5418 SDValue LHS = N->getOperand(1);
5419 SDValue RHS = N->getOperand(2);
5420 SDValue CC = N->getOperand(3);
5421 SDLoc DL(N);
5422
5423 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5424 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5425 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5426
5427 return SDValue();
5428}
5429
5432 const LoongArchSubtarget &Subtarget) {
5433 // Transform
5434 SDValue LHS = N->getOperand(0);
5435 SDValue RHS = N->getOperand(1);
5436 SDValue CC = N->getOperand(2);
5437 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5438 SDValue TrueV = N->getOperand(3);
5439 SDValue FalseV = N->getOperand(4);
5440 SDLoc DL(N);
5441 EVT VT = N->getValueType(0);
5442
5443 // If the True and False values are the same, we don't need a select_cc.
5444 if (TrueV == FalseV)
5445 return TrueV;
5446
5447 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5448 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5449 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5451 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5452 if (CCVal == ISD::CondCode::SETGE)
5453 std::swap(TrueV, FalseV);
5454
5455 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5456 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5457 // Only handle simm12, if it is not in this range, it can be considered as
5458 // register.
5459 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5460 isInt<12>(TrueSImm - FalseSImm)) {
5461 SDValue SRA =
5462 DAG.getNode(ISD::SRA, DL, VT, LHS,
5463 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5464 SDValue AND =
5465 DAG.getNode(ISD::AND, DL, VT, SRA,
5466 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5467 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5468 }
5469
5470 if (CCVal == ISD::CondCode::SETGE)
5471 std::swap(TrueV, FalseV);
5472 }
5473
5474 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5475 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5476 {LHS, RHS, CC, TrueV, FalseV});
5477
5478 return SDValue();
5479}
5480
5481template <unsigned N>
5483 SelectionDAG &DAG,
5484 const LoongArchSubtarget &Subtarget,
5485 bool IsSigned = false) {
5486 SDLoc DL(Node);
5487 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5488 // Check the ImmArg.
5489 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5490 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5491 DAG.getContext()->emitError(Node->getOperationName(0) +
5492 ": argument out of range.");
5493 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5494 }
5495 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5496}
5497
5498template <unsigned N>
5499static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5500 SelectionDAG &DAG, bool IsSigned = false) {
5501 SDLoc DL(Node);
5502 EVT ResTy = Node->getValueType(0);
5503 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5504
5505 // Check the ImmArg.
5506 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5507 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5508 DAG.getContext()->emitError(Node->getOperationName(0) +
5509 ": argument out of range.");
5510 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5511 }
5512 return DAG.getConstant(
5514 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5515 DL, ResTy);
5516}
5517
5519 SDLoc DL(Node);
5520 EVT ResTy = Node->getValueType(0);
5521 SDValue Vec = Node->getOperand(2);
5522 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5523 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5524}
5525
5527 SDLoc DL(Node);
5528 EVT ResTy = Node->getValueType(0);
5529 SDValue One = DAG.getConstant(1, DL, ResTy);
5530 SDValue Bit =
5531 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5532
5533 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5534 DAG.getNOT(DL, Bit, ResTy));
5535}
5536
5537template <unsigned N>
5539 SDLoc DL(Node);
5540 EVT ResTy = Node->getValueType(0);
5541 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5542 // Check the unsigned ImmArg.
5543 if (!isUInt<N>(CImm->getZExtValue())) {
5544 DAG.getContext()->emitError(Node->getOperationName(0) +
5545 ": argument out of range.");
5546 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5547 }
5548
5549 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5550 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5551
5552 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5553}
5554
5555template <unsigned N>
5557 SDLoc DL(Node);
5558 EVT ResTy = Node->getValueType(0);
5559 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5560 // Check the unsigned ImmArg.
5561 if (!isUInt<N>(CImm->getZExtValue())) {
5562 DAG.getContext()->emitError(Node->getOperationName(0) +
5563 ": argument out of range.");
5564 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5565 }
5566
5567 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5568 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5569 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5570}
5571
5572template <unsigned N>
5574 SDLoc DL(Node);
5575 EVT ResTy = Node->getValueType(0);
5576 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5577 // Check the unsigned ImmArg.
5578 if (!isUInt<N>(CImm->getZExtValue())) {
5579 DAG.getContext()->emitError(Node->getOperationName(0) +
5580 ": argument out of range.");
5581 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5582 }
5583
5584 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5585 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5586 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5587}
5588
5589static SDValue
5592 const LoongArchSubtarget &Subtarget) {
5593 SDLoc DL(N);
5594 switch (N->getConstantOperandVal(0)) {
5595 default:
5596 break;
5597 case Intrinsic::loongarch_lsx_vadd_b:
5598 case Intrinsic::loongarch_lsx_vadd_h:
5599 case Intrinsic::loongarch_lsx_vadd_w:
5600 case Intrinsic::loongarch_lsx_vadd_d:
5601 case Intrinsic::loongarch_lasx_xvadd_b:
5602 case Intrinsic::loongarch_lasx_xvadd_h:
5603 case Intrinsic::loongarch_lasx_xvadd_w:
5604 case Intrinsic::loongarch_lasx_xvadd_d:
5605 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5606 N->getOperand(2));
5607 case Intrinsic::loongarch_lsx_vaddi_bu:
5608 case Intrinsic::loongarch_lsx_vaddi_hu:
5609 case Intrinsic::loongarch_lsx_vaddi_wu:
5610 case Intrinsic::loongarch_lsx_vaddi_du:
5611 case Intrinsic::loongarch_lasx_xvaddi_bu:
5612 case Intrinsic::loongarch_lasx_xvaddi_hu:
5613 case Intrinsic::loongarch_lasx_xvaddi_wu:
5614 case Intrinsic::loongarch_lasx_xvaddi_du:
5615 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5616 lowerVectorSplatImm<5>(N, 2, DAG));
5617 case Intrinsic::loongarch_lsx_vsub_b:
5618 case Intrinsic::loongarch_lsx_vsub_h:
5619 case Intrinsic::loongarch_lsx_vsub_w:
5620 case Intrinsic::loongarch_lsx_vsub_d:
5621 case Intrinsic::loongarch_lasx_xvsub_b:
5622 case Intrinsic::loongarch_lasx_xvsub_h:
5623 case Intrinsic::loongarch_lasx_xvsub_w:
5624 case Intrinsic::loongarch_lasx_xvsub_d:
5625 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5626 N->getOperand(2));
5627 case Intrinsic::loongarch_lsx_vsubi_bu:
5628 case Intrinsic::loongarch_lsx_vsubi_hu:
5629 case Intrinsic::loongarch_lsx_vsubi_wu:
5630 case Intrinsic::loongarch_lsx_vsubi_du:
5631 case Intrinsic::loongarch_lasx_xvsubi_bu:
5632 case Intrinsic::loongarch_lasx_xvsubi_hu:
5633 case Intrinsic::loongarch_lasx_xvsubi_wu:
5634 case Intrinsic::loongarch_lasx_xvsubi_du:
5635 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5636 lowerVectorSplatImm<5>(N, 2, DAG));
5637 case Intrinsic::loongarch_lsx_vneg_b:
5638 case Intrinsic::loongarch_lsx_vneg_h:
5639 case Intrinsic::loongarch_lsx_vneg_w:
5640 case Intrinsic::loongarch_lsx_vneg_d:
5641 case Intrinsic::loongarch_lasx_xvneg_b:
5642 case Intrinsic::loongarch_lasx_xvneg_h:
5643 case Intrinsic::loongarch_lasx_xvneg_w:
5644 case Intrinsic::loongarch_lasx_xvneg_d:
5645 return DAG.getNode(
5646 ISD::SUB, DL, N->getValueType(0),
5647 DAG.getConstant(
5648 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5649 /*isSigned=*/true),
5650 SDLoc(N), N->getValueType(0)),
5651 N->getOperand(1));
5652 case Intrinsic::loongarch_lsx_vmax_b:
5653 case Intrinsic::loongarch_lsx_vmax_h:
5654 case Intrinsic::loongarch_lsx_vmax_w:
5655 case Intrinsic::loongarch_lsx_vmax_d:
5656 case Intrinsic::loongarch_lasx_xvmax_b:
5657 case Intrinsic::loongarch_lasx_xvmax_h:
5658 case Intrinsic::loongarch_lasx_xvmax_w:
5659 case Intrinsic::loongarch_lasx_xvmax_d:
5660 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5661 N->getOperand(2));
5662 case Intrinsic::loongarch_lsx_vmax_bu:
5663 case Intrinsic::loongarch_lsx_vmax_hu:
5664 case Intrinsic::loongarch_lsx_vmax_wu:
5665 case Intrinsic::loongarch_lsx_vmax_du:
5666 case Intrinsic::loongarch_lasx_xvmax_bu:
5667 case Intrinsic::loongarch_lasx_xvmax_hu:
5668 case Intrinsic::loongarch_lasx_xvmax_wu:
5669 case Intrinsic::loongarch_lasx_xvmax_du:
5670 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5671 N->getOperand(2));
5672 case Intrinsic::loongarch_lsx_vmaxi_b:
5673 case Intrinsic::loongarch_lsx_vmaxi_h:
5674 case Intrinsic::loongarch_lsx_vmaxi_w:
5675 case Intrinsic::loongarch_lsx_vmaxi_d:
5676 case Intrinsic::loongarch_lasx_xvmaxi_b:
5677 case Intrinsic::loongarch_lasx_xvmaxi_h:
5678 case Intrinsic::loongarch_lasx_xvmaxi_w:
5679 case Intrinsic::loongarch_lasx_xvmaxi_d:
5680 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5681 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5682 case Intrinsic::loongarch_lsx_vmaxi_bu:
5683 case Intrinsic::loongarch_lsx_vmaxi_hu:
5684 case Intrinsic::loongarch_lsx_vmaxi_wu:
5685 case Intrinsic::loongarch_lsx_vmaxi_du:
5686 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5687 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5688 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5689 case Intrinsic::loongarch_lasx_xvmaxi_du:
5690 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5691 lowerVectorSplatImm<5>(N, 2, DAG));
5692 case Intrinsic::loongarch_lsx_vmin_b:
5693 case Intrinsic::loongarch_lsx_vmin_h:
5694 case Intrinsic::loongarch_lsx_vmin_w:
5695 case Intrinsic::loongarch_lsx_vmin_d:
5696 case Intrinsic::loongarch_lasx_xvmin_b:
5697 case Intrinsic::loongarch_lasx_xvmin_h:
5698 case Intrinsic::loongarch_lasx_xvmin_w:
5699 case Intrinsic::loongarch_lasx_xvmin_d:
5700 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5701 N->getOperand(2));
5702 case Intrinsic::loongarch_lsx_vmin_bu:
5703 case Intrinsic::loongarch_lsx_vmin_hu:
5704 case Intrinsic::loongarch_lsx_vmin_wu:
5705 case Intrinsic::loongarch_lsx_vmin_du:
5706 case Intrinsic::loongarch_lasx_xvmin_bu:
5707 case Intrinsic::loongarch_lasx_xvmin_hu:
5708 case Intrinsic::loongarch_lasx_xvmin_wu:
5709 case Intrinsic::loongarch_lasx_xvmin_du:
5710 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5711 N->getOperand(2));
5712 case Intrinsic::loongarch_lsx_vmini_b:
5713 case Intrinsic::loongarch_lsx_vmini_h:
5714 case Intrinsic::loongarch_lsx_vmini_w:
5715 case Intrinsic::loongarch_lsx_vmini_d:
5716 case Intrinsic::loongarch_lasx_xvmini_b:
5717 case Intrinsic::loongarch_lasx_xvmini_h:
5718 case Intrinsic::loongarch_lasx_xvmini_w:
5719 case Intrinsic::loongarch_lasx_xvmini_d:
5720 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5721 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5722 case Intrinsic::loongarch_lsx_vmini_bu:
5723 case Intrinsic::loongarch_lsx_vmini_hu:
5724 case Intrinsic::loongarch_lsx_vmini_wu:
5725 case Intrinsic::loongarch_lsx_vmini_du:
5726 case Intrinsic::loongarch_lasx_xvmini_bu:
5727 case Intrinsic::loongarch_lasx_xvmini_hu:
5728 case Intrinsic::loongarch_lasx_xvmini_wu:
5729 case Intrinsic::loongarch_lasx_xvmini_du:
5730 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5731 lowerVectorSplatImm<5>(N, 2, DAG));
5732 case Intrinsic::loongarch_lsx_vmul_b:
5733 case Intrinsic::loongarch_lsx_vmul_h:
5734 case Intrinsic::loongarch_lsx_vmul_w:
5735 case Intrinsic::loongarch_lsx_vmul_d:
5736 case Intrinsic::loongarch_lasx_xvmul_b:
5737 case Intrinsic::loongarch_lasx_xvmul_h:
5738 case Intrinsic::loongarch_lasx_xvmul_w:
5739 case Intrinsic::loongarch_lasx_xvmul_d:
5740 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5741 N->getOperand(2));
5742 case Intrinsic::loongarch_lsx_vmadd_b:
5743 case Intrinsic::loongarch_lsx_vmadd_h:
5744 case Intrinsic::loongarch_lsx_vmadd_w:
5745 case Intrinsic::loongarch_lsx_vmadd_d:
5746 case Intrinsic::loongarch_lasx_xvmadd_b:
5747 case Intrinsic::loongarch_lasx_xvmadd_h:
5748 case Intrinsic::loongarch_lasx_xvmadd_w:
5749 case Intrinsic::loongarch_lasx_xvmadd_d: {
5750 EVT ResTy = N->getValueType(0);
5751 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5752 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5753 N->getOperand(3)));
5754 }
5755 case Intrinsic::loongarch_lsx_vmsub_b:
5756 case Intrinsic::loongarch_lsx_vmsub_h:
5757 case Intrinsic::loongarch_lsx_vmsub_w:
5758 case Intrinsic::loongarch_lsx_vmsub_d:
5759 case Intrinsic::loongarch_lasx_xvmsub_b:
5760 case Intrinsic::loongarch_lasx_xvmsub_h:
5761 case Intrinsic::loongarch_lasx_xvmsub_w:
5762 case Intrinsic::loongarch_lasx_xvmsub_d: {
5763 EVT ResTy = N->getValueType(0);
5764 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5765 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5766 N->getOperand(3)));
5767 }
5768 case Intrinsic::loongarch_lsx_vdiv_b:
5769 case Intrinsic::loongarch_lsx_vdiv_h:
5770 case Intrinsic::loongarch_lsx_vdiv_w:
5771 case Intrinsic::loongarch_lsx_vdiv_d:
5772 case Intrinsic::loongarch_lasx_xvdiv_b:
5773 case Intrinsic::loongarch_lasx_xvdiv_h:
5774 case Intrinsic::loongarch_lasx_xvdiv_w:
5775 case Intrinsic::loongarch_lasx_xvdiv_d:
5776 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5777 N->getOperand(2));
5778 case Intrinsic::loongarch_lsx_vdiv_bu:
5779 case Intrinsic::loongarch_lsx_vdiv_hu:
5780 case Intrinsic::loongarch_lsx_vdiv_wu:
5781 case Intrinsic::loongarch_lsx_vdiv_du:
5782 case Intrinsic::loongarch_lasx_xvdiv_bu:
5783 case Intrinsic::loongarch_lasx_xvdiv_hu:
5784 case Intrinsic::loongarch_lasx_xvdiv_wu:
5785 case Intrinsic::loongarch_lasx_xvdiv_du:
5786 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5787 N->getOperand(2));
5788 case Intrinsic::loongarch_lsx_vmod_b:
5789 case Intrinsic::loongarch_lsx_vmod_h:
5790 case Intrinsic::loongarch_lsx_vmod_w:
5791 case Intrinsic::loongarch_lsx_vmod_d:
5792 case Intrinsic::loongarch_lasx_xvmod_b:
5793 case Intrinsic::loongarch_lasx_xvmod_h:
5794 case Intrinsic::loongarch_lasx_xvmod_w:
5795 case Intrinsic::loongarch_lasx_xvmod_d:
5796 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5797 N->getOperand(2));
5798 case Intrinsic::loongarch_lsx_vmod_bu:
5799 case Intrinsic::loongarch_lsx_vmod_hu:
5800 case Intrinsic::loongarch_lsx_vmod_wu:
5801 case Intrinsic::loongarch_lsx_vmod_du:
5802 case Intrinsic::loongarch_lasx_xvmod_bu:
5803 case Intrinsic::loongarch_lasx_xvmod_hu:
5804 case Intrinsic::loongarch_lasx_xvmod_wu:
5805 case Intrinsic::loongarch_lasx_xvmod_du:
5806 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5807 N->getOperand(2));
5808 case Intrinsic::loongarch_lsx_vand_v:
5809 case Intrinsic::loongarch_lasx_xvand_v:
5810 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5811 N->getOperand(2));
5812 case Intrinsic::loongarch_lsx_vor_v:
5813 case Intrinsic::loongarch_lasx_xvor_v:
5814 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5815 N->getOperand(2));
5816 case Intrinsic::loongarch_lsx_vxor_v:
5817 case Intrinsic::loongarch_lasx_xvxor_v:
5818 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5819 N->getOperand(2));
5820 case Intrinsic::loongarch_lsx_vnor_v:
5821 case Intrinsic::loongarch_lasx_xvnor_v: {
5822 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5823 N->getOperand(2));
5824 return DAG.getNOT(DL, Res, Res->getValueType(0));
5825 }
5826 case Intrinsic::loongarch_lsx_vandi_b:
5827 case Intrinsic::loongarch_lasx_xvandi_b:
5828 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5829 lowerVectorSplatImm<8>(N, 2, DAG));
5830 case Intrinsic::loongarch_lsx_vori_b:
5831 case Intrinsic::loongarch_lasx_xvori_b:
5832 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5833 lowerVectorSplatImm<8>(N, 2, DAG));
5834 case Intrinsic::loongarch_lsx_vxori_b:
5835 case Intrinsic::loongarch_lasx_xvxori_b:
5836 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5837 lowerVectorSplatImm<8>(N, 2, DAG));
5838 case Intrinsic::loongarch_lsx_vsll_b:
5839 case Intrinsic::loongarch_lsx_vsll_h:
5840 case Intrinsic::loongarch_lsx_vsll_w:
5841 case Intrinsic::loongarch_lsx_vsll_d:
5842 case Intrinsic::loongarch_lasx_xvsll_b:
5843 case Intrinsic::loongarch_lasx_xvsll_h:
5844 case Intrinsic::loongarch_lasx_xvsll_w:
5845 case Intrinsic::loongarch_lasx_xvsll_d:
5846 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5847 truncateVecElts(N, DAG));
5848 case Intrinsic::loongarch_lsx_vslli_b:
5849 case Intrinsic::loongarch_lasx_xvslli_b:
5850 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5851 lowerVectorSplatImm<3>(N, 2, DAG));
5852 case Intrinsic::loongarch_lsx_vslli_h:
5853 case Intrinsic::loongarch_lasx_xvslli_h:
5854 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5855 lowerVectorSplatImm<4>(N, 2, DAG));
5856 case Intrinsic::loongarch_lsx_vslli_w:
5857 case Intrinsic::loongarch_lasx_xvslli_w:
5858 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5859 lowerVectorSplatImm<5>(N, 2, DAG));
5860 case Intrinsic::loongarch_lsx_vslli_d:
5861 case Intrinsic::loongarch_lasx_xvslli_d:
5862 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5863 lowerVectorSplatImm<6>(N, 2, DAG));
5864 case Intrinsic::loongarch_lsx_vsrl_b:
5865 case Intrinsic::loongarch_lsx_vsrl_h:
5866 case Intrinsic::loongarch_lsx_vsrl_w:
5867 case Intrinsic::loongarch_lsx_vsrl_d:
5868 case Intrinsic::loongarch_lasx_xvsrl_b:
5869 case Intrinsic::loongarch_lasx_xvsrl_h:
5870 case Intrinsic::loongarch_lasx_xvsrl_w:
5871 case Intrinsic::loongarch_lasx_xvsrl_d:
5872 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5873 truncateVecElts(N, DAG));
5874 case Intrinsic::loongarch_lsx_vsrli_b:
5875 case Intrinsic::loongarch_lasx_xvsrli_b:
5876 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5877 lowerVectorSplatImm<3>(N, 2, DAG));
5878 case Intrinsic::loongarch_lsx_vsrli_h:
5879 case Intrinsic::loongarch_lasx_xvsrli_h:
5880 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5881 lowerVectorSplatImm<4>(N, 2, DAG));
5882 case Intrinsic::loongarch_lsx_vsrli_w:
5883 case Intrinsic::loongarch_lasx_xvsrli_w:
5884 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5885 lowerVectorSplatImm<5>(N, 2, DAG));
5886 case Intrinsic::loongarch_lsx_vsrli_d:
5887 case Intrinsic::loongarch_lasx_xvsrli_d:
5888 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5889 lowerVectorSplatImm<6>(N, 2, DAG));
5890 case Intrinsic::loongarch_lsx_vsra_b:
5891 case Intrinsic::loongarch_lsx_vsra_h:
5892 case Intrinsic::loongarch_lsx_vsra_w:
5893 case Intrinsic::loongarch_lsx_vsra_d:
5894 case Intrinsic::loongarch_lasx_xvsra_b:
5895 case Intrinsic::loongarch_lasx_xvsra_h:
5896 case Intrinsic::loongarch_lasx_xvsra_w:
5897 case Intrinsic::loongarch_lasx_xvsra_d:
5898 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5899 truncateVecElts(N, DAG));
5900 case Intrinsic::loongarch_lsx_vsrai_b:
5901 case Intrinsic::loongarch_lasx_xvsrai_b:
5902 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5903 lowerVectorSplatImm<3>(N, 2, DAG));
5904 case Intrinsic::loongarch_lsx_vsrai_h:
5905 case Intrinsic::loongarch_lasx_xvsrai_h:
5906 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5907 lowerVectorSplatImm<4>(N, 2, DAG));
5908 case Intrinsic::loongarch_lsx_vsrai_w:
5909 case Intrinsic::loongarch_lasx_xvsrai_w:
5910 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5911 lowerVectorSplatImm<5>(N, 2, DAG));
5912 case Intrinsic::loongarch_lsx_vsrai_d:
5913 case Intrinsic::loongarch_lasx_xvsrai_d:
5914 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5915 lowerVectorSplatImm<6>(N, 2, DAG));
5916 case Intrinsic::loongarch_lsx_vclz_b:
5917 case Intrinsic::loongarch_lsx_vclz_h:
5918 case Intrinsic::loongarch_lsx_vclz_w:
5919 case Intrinsic::loongarch_lsx_vclz_d:
5920 case Intrinsic::loongarch_lasx_xvclz_b:
5921 case Intrinsic::loongarch_lasx_xvclz_h:
5922 case Intrinsic::loongarch_lasx_xvclz_w:
5923 case Intrinsic::loongarch_lasx_xvclz_d:
5924 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
5925 case Intrinsic::loongarch_lsx_vpcnt_b:
5926 case Intrinsic::loongarch_lsx_vpcnt_h:
5927 case Intrinsic::loongarch_lsx_vpcnt_w:
5928 case Intrinsic::loongarch_lsx_vpcnt_d:
5929 case Intrinsic::loongarch_lasx_xvpcnt_b:
5930 case Intrinsic::loongarch_lasx_xvpcnt_h:
5931 case Intrinsic::loongarch_lasx_xvpcnt_w:
5932 case Intrinsic::loongarch_lasx_xvpcnt_d:
5933 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
5934 case Intrinsic::loongarch_lsx_vbitclr_b:
5935 case Intrinsic::loongarch_lsx_vbitclr_h:
5936 case Intrinsic::loongarch_lsx_vbitclr_w:
5937 case Intrinsic::loongarch_lsx_vbitclr_d:
5938 case Intrinsic::loongarch_lasx_xvbitclr_b:
5939 case Intrinsic::loongarch_lasx_xvbitclr_h:
5940 case Intrinsic::loongarch_lasx_xvbitclr_w:
5941 case Intrinsic::loongarch_lasx_xvbitclr_d:
5942 return lowerVectorBitClear(N, DAG);
5943 case Intrinsic::loongarch_lsx_vbitclri_b:
5944 case Intrinsic::loongarch_lasx_xvbitclri_b:
5945 return lowerVectorBitClearImm<3>(N, DAG);
5946 case Intrinsic::loongarch_lsx_vbitclri_h:
5947 case Intrinsic::loongarch_lasx_xvbitclri_h:
5948 return lowerVectorBitClearImm<4>(N, DAG);
5949 case Intrinsic::loongarch_lsx_vbitclri_w:
5950 case Intrinsic::loongarch_lasx_xvbitclri_w:
5951 return lowerVectorBitClearImm<5>(N, DAG);
5952 case Intrinsic::loongarch_lsx_vbitclri_d:
5953 case Intrinsic::loongarch_lasx_xvbitclri_d:
5954 return lowerVectorBitClearImm<6>(N, DAG);
5955 case Intrinsic::loongarch_lsx_vbitset_b:
5956 case Intrinsic::loongarch_lsx_vbitset_h:
5957 case Intrinsic::loongarch_lsx_vbitset_w:
5958 case Intrinsic::loongarch_lsx_vbitset_d:
5959 case Intrinsic::loongarch_lasx_xvbitset_b:
5960 case Intrinsic::loongarch_lasx_xvbitset_h:
5961 case Intrinsic::loongarch_lasx_xvbitset_w:
5962 case Intrinsic::loongarch_lasx_xvbitset_d: {
5963 EVT VecTy = N->getValueType(0);
5964 SDValue One = DAG.getConstant(1, DL, VecTy);
5965 return DAG.getNode(
5966 ISD::OR, DL, VecTy, N->getOperand(1),
5967 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5968 }
5969 case Intrinsic::loongarch_lsx_vbitseti_b:
5970 case Intrinsic::loongarch_lasx_xvbitseti_b:
5971 return lowerVectorBitSetImm<3>(N, DAG);
5972 case Intrinsic::loongarch_lsx_vbitseti_h:
5973 case Intrinsic::loongarch_lasx_xvbitseti_h:
5974 return lowerVectorBitSetImm<4>(N, DAG);
5975 case Intrinsic::loongarch_lsx_vbitseti_w:
5976 case Intrinsic::loongarch_lasx_xvbitseti_w:
5977 return lowerVectorBitSetImm<5>(N, DAG);
5978 case Intrinsic::loongarch_lsx_vbitseti_d:
5979 case Intrinsic::loongarch_lasx_xvbitseti_d:
5980 return lowerVectorBitSetImm<6>(N, DAG);
5981 case Intrinsic::loongarch_lsx_vbitrev_b:
5982 case Intrinsic::loongarch_lsx_vbitrev_h:
5983 case Intrinsic::loongarch_lsx_vbitrev_w:
5984 case Intrinsic::loongarch_lsx_vbitrev_d:
5985 case Intrinsic::loongarch_lasx_xvbitrev_b:
5986 case Intrinsic::loongarch_lasx_xvbitrev_h:
5987 case Intrinsic::loongarch_lasx_xvbitrev_w:
5988 case Intrinsic::loongarch_lasx_xvbitrev_d: {
5989 EVT VecTy = N->getValueType(0);
5990 SDValue One = DAG.getConstant(1, DL, VecTy);
5991 return DAG.getNode(
5992 ISD::XOR, DL, VecTy, N->getOperand(1),
5993 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5994 }
5995 case Intrinsic::loongarch_lsx_vbitrevi_b:
5996 case Intrinsic::loongarch_lasx_xvbitrevi_b:
5997 return lowerVectorBitRevImm<3>(N, DAG);
5998 case Intrinsic::loongarch_lsx_vbitrevi_h:
5999 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6000 return lowerVectorBitRevImm<4>(N, DAG);
6001 case Intrinsic::loongarch_lsx_vbitrevi_w:
6002 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6003 return lowerVectorBitRevImm<5>(N, DAG);
6004 case Intrinsic::loongarch_lsx_vbitrevi_d:
6005 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6006 return lowerVectorBitRevImm<6>(N, DAG);
6007 case Intrinsic::loongarch_lsx_vfadd_s:
6008 case Intrinsic::loongarch_lsx_vfadd_d:
6009 case Intrinsic::loongarch_lasx_xvfadd_s:
6010 case Intrinsic::loongarch_lasx_xvfadd_d:
6011 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6012 N->getOperand(2));
6013 case Intrinsic::loongarch_lsx_vfsub_s:
6014 case Intrinsic::loongarch_lsx_vfsub_d:
6015 case Intrinsic::loongarch_lasx_xvfsub_s:
6016 case Intrinsic::loongarch_lasx_xvfsub_d:
6017 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6018 N->getOperand(2));
6019 case Intrinsic::loongarch_lsx_vfmul_s:
6020 case Intrinsic::loongarch_lsx_vfmul_d:
6021 case Intrinsic::loongarch_lasx_xvfmul_s:
6022 case Intrinsic::loongarch_lasx_xvfmul_d:
6023 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6024 N->getOperand(2));
6025 case Intrinsic::loongarch_lsx_vfdiv_s:
6026 case Intrinsic::loongarch_lsx_vfdiv_d:
6027 case Intrinsic::loongarch_lasx_xvfdiv_s:
6028 case Intrinsic::loongarch_lasx_xvfdiv_d:
6029 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6030 N->getOperand(2));
6031 case Intrinsic::loongarch_lsx_vfmadd_s:
6032 case Intrinsic::loongarch_lsx_vfmadd_d:
6033 case Intrinsic::loongarch_lasx_xvfmadd_s:
6034 case Intrinsic::loongarch_lasx_xvfmadd_d:
6035 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6036 N->getOperand(2), N->getOperand(3));
6037 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6038 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6039 N->getOperand(1), N->getOperand(2),
6040 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6041 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6042 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6043 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6044 N->getOperand(1), N->getOperand(2),
6045 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6046 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6047 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6048 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6049 N->getOperand(1), N->getOperand(2),
6050 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6051 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6052 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6053 N->getOperand(1), N->getOperand(2),
6054 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6055 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6056 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6057 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6058 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6059 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6060 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6061 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6062 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6063 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6064 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6065 N->getOperand(1)));
6066 case Intrinsic::loongarch_lsx_vreplve_b:
6067 case Intrinsic::loongarch_lsx_vreplve_h:
6068 case Intrinsic::loongarch_lsx_vreplve_w:
6069 case Intrinsic::loongarch_lsx_vreplve_d:
6070 case Intrinsic::loongarch_lasx_xvreplve_b:
6071 case Intrinsic::loongarch_lasx_xvreplve_h:
6072 case Intrinsic::loongarch_lasx_xvreplve_w:
6073 case Intrinsic::loongarch_lasx_xvreplve_d:
6074 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6075 N->getOperand(1),
6076 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6077 N->getOperand(2)));
6078 }
6079 return SDValue();
6080}
6081
6084 const LoongArchSubtarget &Subtarget) {
6085 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6086 // conversion is unnecessary and can be replaced with the
6087 // MOVFR2GR_S_LA64 operand.
6088 SDValue Op0 = N->getOperand(0);
6090 return Op0.getOperand(0);
6091 return SDValue();
6092}
6093
6096 const LoongArchSubtarget &Subtarget) {
6097 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6098 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6099 // operand.
6100 SDValue Op0 = N->getOperand(0);
6102 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6103 "Unexpected value type!");
6104 return Op0.getOperand(0);
6105 }
6106 return SDValue();
6107}
6108
6111 const LoongArchSubtarget &Subtarget) {
6112 MVT VT = N->getSimpleValueType(0);
6113 unsigned NumBits = VT.getScalarSizeInBits();
6114
6115 // Simplify the inputs.
6116 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6117 APInt DemandedMask(APInt::getAllOnes(NumBits));
6118 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6119 return SDValue(N, 0);
6120
6121 return SDValue();
6122}
6123
6124static SDValue
6127 const LoongArchSubtarget &Subtarget) {
6128 SDValue Op0 = N->getOperand(0);
6129 SDLoc DL(N);
6130
6131 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6132 // redundant. Instead, use BuildPairF64's operands directly.
6134 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6135
6136 if (Op0->isUndef()) {
6137 SDValue Lo = DAG.getUNDEF(MVT::i32);
6138 SDValue Hi = DAG.getUNDEF(MVT::i32);
6139 return DCI.CombineTo(N, Lo, Hi);
6140 }
6141
6142 // It's cheaper to materialise two 32-bit integers than to load a double
6143 // from the constant pool and transfer it to integer registers through the
6144 // stack.
6145 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
6146 APInt V = C->getValueAPF().bitcastToAPInt();
6147 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6148 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6149 return DCI.CombineTo(N, Lo, Hi);
6150 }
6151
6152 return SDValue();
6153}
6154
6156 DAGCombinerInfo &DCI) const {
6157 SelectionDAG &DAG = DCI.DAG;
6158 switch (N->getOpcode()) {
6159 default:
6160 break;
6161 case ISD::AND:
6162 return performANDCombine(N, DAG, DCI, Subtarget);
6163 case ISD::OR:
6164 return performORCombine(N, DAG, DCI, Subtarget);
6165 case ISD::SETCC:
6166 return performSETCCCombine(N, DAG, DCI, Subtarget);
6167 case ISD::SRL:
6168 return performSRLCombine(N, DAG, DCI, Subtarget);
6169 case ISD::BITCAST:
6170 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6172 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6174 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6176 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6178 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6180 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6182 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6185 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6187 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6188 }
6189 return SDValue();
6190}
6191
6194 if (!ZeroDivCheck)
6195 return MBB;
6196
6197 // Build instructions:
6198 // MBB:
6199 // div(or mod) $dst, $dividend, $divisor
6200 // bne $divisor, $zero, SinkMBB
6201 // BreakMBB:
6202 // break 7 // BRK_DIVZERO
6203 // SinkMBB:
6204 // fallthrough
6205 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6207 MachineFunction *MF = MBB->getParent();
6208 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6209 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6210 MF->insert(It, BreakMBB);
6211 MF->insert(It, SinkMBB);
6212
6213 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6214 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6215 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6216
6217 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6218 DebugLoc DL = MI.getDebugLoc();
6219 MachineOperand &Divisor = MI.getOperand(2);
6220 Register DivisorReg = Divisor.getReg();
6221
6222 // MBB:
6223 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6224 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6225 .addReg(LoongArch::R0)
6226 .addMBB(SinkMBB);
6227 MBB->addSuccessor(BreakMBB);
6228 MBB->addSuccessor(SinkMBB);
6229
6230 // BreakMBB:
6231 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6232 // definition of BRK_DIVZERO.
6233 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6234 BreakMBB->addSuccessor(SinkMBB);
6235
6236 // Clear Divisor's kill flag.
6237 Divisor.setIsKill(false);
6238
6239 return SinkMBB;
6240}
6241
6242static MachineBasicBlock *
6244 const LoongArchSubtarget &Subtarget) {
6245 unsigned CondOpc;
6246 switch (MI.getOpcode()) {
6247 default:
6248 llvm_unreachable("Unexpected opcode");
6249 case LoongArch::PseudoVBZ:
6250 CondOpc = LoongArch::VSETEQZ_V;
6251 break;
6252 case LoongArch::PseudoVBZ_B:
6253 CondOpc = LoongArch::VSETANYEQZ_B;
6254 break;
6255 case LoongArch::PseudoVBZ_H:
6256 CondOpc = LoongArch::VSETANYEQZ_H;
6257 break;
6258 case LoongArch::PseudoVBZ_W:
6259 CondOpc = LoongArch::VSETANYEQZ_W;
6260 break;
6261 case LoongArch::PseudoVBZ_D:
6262 CondOpc = LoongArch::VSETANYEQZ_D;
6263 break;
6264 case LoongArch::PseudoVBNZ:
6265 CondOpc = LoongArch::VSETNEZ_V;
6266 break;
6267 case LoongArch::PseudoVBNZ_B:
6268 CondOpc = LoongArch::VSETALLNEZ_B;
6269 break;
6270 case LoongArch::PseudoVBNZ_H:
6271 CondOpc = LoongArch::VSETALLNEZ_H;
6272 break;
6273 case LoongArch::PseudoVBNZ_W:
6274 CondOpc = LoongArch::VSETALLNEZ_W;
6275 break;
6276 case LoongArch::PseudoVBNZ_D:
6277 CondOpc = LoongArch::VSETALLNEZ_D;
6278 break;
6279 case LoongArch::PseudoXVBZ:
6280 CondOpc = LoongArch::XVSETEQZ_V;
6281 break;
6282 case LoongArch::PseudoXVBZ_B:
6283 CondOpc = LoongArch::XVSETANYEQZ_B;
6284 break;
6285 case LoongArch::PseudoXVBZ_H:
6286 CondOpc = LoongArch::XVSETANYEQZ_H;
6287 break;
6288 case LoongArch::PseudoXVBZ_W:
6289 CondOpc = LoongArch::XVSETANYEQZ_W;
6290 break;
6291 case LoongArch::PseudoXVBZ_D:
6292 CondOpc = LoongArch::XVSETANYEQZ_D;
6293 break;
6294 case LoongArch::PseudoXVBNZ:
6295 CondOpc = LoongArch::XVSETNEZ_V;
6296 break;
6297 case LoongArch::PseudoXVBNZ_B:
6298 CondOpc = LoongArch::XVSETALLNEZ_B;
6299 break;
6300 case LoongArch::PseudoXVBNZ_H:
6301 CondOpc = LoongArch::XVSETALLNEZ_H;
6302 break;
6303 case LoongArch::PseudoXVBNZ_W:
6304 CondOpc = LoongArch::XVSETALLNEZ_W;
6305 break;
6306 case LoongArch::PseudoXVBNZ_D:
6307 CondOpc = LoongArch::XVSETALLNEZ_D;
6308 break;
6309 }
6310
6311 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6312 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6313 DebugLoc DL = MI.getDebugLoc();
6316
6317 MachineFunction *F = BB->getParent();
6318 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6319 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6320 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6321
6322 F->insert(It, FalseBB);
6323 F->insert(It, TrueBB);
6324 F->insert(It, SinkBB);
6325
6326 // Transfer the remainder of MBB and its successor edges to Sink.
6327 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6329
6330 // Insert the real instruction to BB.
6331 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6332 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6333
6334 // Insert branch.
6335 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6336 BB->addSuccessor(FalseBB);
6337 BB->addSuccessor(TrueBB);
6338
6339 // FalseBB.
6340 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6341 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6342 .addReg(LoongArch::R0)
6343 .addImm(0);
6344 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6345 FalseBB->addSuccessor(SinkBB);
6346
6347 // TrueBB.
6348 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6349 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6350 .addReg(LoongArch::R0)
6351 .addImm(1);
6352 TrueBB->addSuccessor(SinkBB);
6353
6354 // SinkBB: merge the results.
6355 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6356 MI.getOperand(0).getReg())
6357 .addReg(RD1)
6358 .addMBB(FalseBB)
6359 .addReg(RD2)
6360 .addMBB(TrueBB);
6361
6362 // The pseudo instruction is gone now.
6363 MI.eraseFromParent();
6364 return SinkBB;
6365}
6366
6367static MachineBasicBlock *
6369 const LoongArchSubtarget &Subtarget) {
6370 unsigned InsOp;
6371 unsigned BroadcastOp;
6372 unsigned HalfSize;
6373 switch (MI.getOpcode()) {
6374 default:
6375 llvm_unreachable("Unexpected opcode");
6376 case LoongArch::PseudoXVINSGR2VR_B:
6377 HalfSize = 16;
6378 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6379 InsOp = LoongArch::XVEXTRINS_B;
6380 break;
6381 case LoongArch::PseudoXVINSGR2VR_H:
6382 HalfSize = 8;
6383 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6384 InsOp = LoongArch::XVEXTRINS_H;
6385 break;
6386 }
6387 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6388 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6389 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6390 DebugLoc DL = MI.getDebugLoc();
6392 // XDst = vector_insert XSrc, Elt, Idx
6393 Register XDst = MI.getOperand(0).getReg();
6394 Register XSrc = MI.getOperand(1).getReg();
6395 Register Elt = MI.getOperand(2).getReg();
6396 unsigned Idx = MI.getOperand(3).getImm();
6397
6398 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6399 Idx < HalfSize) {
6400 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6401 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6402
6403 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6404 .addReg(XSrc, 0, LoongArch::sub_128);
6405 BuildMI(*BB, MI, DL,
6406 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6407 : LoongArch::VINSGR2VR_B),
6408 ScratchSubReg2)
6409 .addReg(ScratchSubReg1)
6410 .addReg(Elt)
6411 .addImm(Idx);
6412
6413 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6414 .addImm(0)
6415 .addReg(ScratchSubReg2)
6416 .addImm(LoongArch::sub_128);
6417 } else {
6418 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6419 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6420
6421 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6422
6423 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6424 .addReg(ScratchReg1)
6425 .addReg(XSrc)
6426 .addImm(Idx >= HalfSize ? 48 : 18);
6427
6428 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6429 .addReg(XSrc)
6430 .addReg(ScratchReg2)
6431 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6432 }
6433
6434 MI.eraseFromParent();
6435 return BB;
6436}
6437
6440 const LoongArchSubtarget &Subtarget) {
6441 assert(Subtarget.hasExtLSX());
6442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6443 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6444 DebugLoc DL = MI.getDebugLoc();
6446 Register Dst = MI.getOperand(0).getReg();
6447 Register Src = MI.getOperand(1).getReg();
6448 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6449 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6450 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6451
6452 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6453 BuildMI(*BB, MI, DL,
6454 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6455 : LoongArch::VINSGR2VR_W),
6456 ScratchReg2)
6457 .addReg(ScratchReg1)
6458 .addReg(Src)
6459 .addImm(0);
6460 BuildMI(
6461 *BB, MI, DL,
6462 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6463 ScratchReg3)
6464 .addReg(ScratchReg2);
6465 BuildMI(*BB, MI, DL,
6466 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6467 : LoongArch::VPICKVE2GR_W),
6468 Dst)
6469 .addReg(ScratchReg3)
6470 .addImm(0);
6471
6472 MI.eraseFromParent();
6473 return BB;
6474}
6475
6476static MachineBasicBlock *
6478 const LoongArchSubtarget &Subtarget) {
6479 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6480 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6481 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6483 Register Dst = MI.getOperand(0).getReg();
6484 Register Src = MI.getOperand(1).getReg();
6485 DebugLoc DL = MI.getDebugLoc();
6486 unsigned EleBits = 8;
6487 unsigned NotOpc = 0;
6488 unsigned MskOpc;
6489
6490 switch (MI.getOpcode()) {
6491 default:
6492 llvm_unreachable("Unexpected opcode");
6493 case LoongArch::PseudoVMSKLTZ_B:
6494 MskOpc = LoongArch::VMSKLTZ_B;
6495 break;
6496 case LoongArch::PseudoVMSKLTZ_H:
6497 MskOpc = LoongArch::VMSKLTZ_H;
6498 EleBits = 16;
6499 break;
6500 case LoongArch::PseudoVMSKLTZ_W:
6501 MskOpc = LoongArch::VMSKLTZ_W;
6502 EleBits = 32;
6503 break;
6504 case LoongArch::PseudoVMSKLTZ_D:
6505 MskOpc = LoongArch::VMSKLTZ_D;
6506 EleBits = 64;
6507 break;
6508 case LoongArch::PseudoVMSKGEZ_B:
6509 MskOpc = LoongArch::VMSKGEZ_B;
6510 break;
6511 case LoongArch::PseudoVMSKEQZ_B:
6512 MskOpc = LoongArch::VMSKNZ_B;
6513 NotOpc = LoongArch::VNOR_V;
6514 break;
6515 case LoongArch::PseudoVMSKNEZ_B:
6516 MskOpc = LoongArch::VMSKNZ_B;
6517 break;
6518 case LoongArch::PseudoXVMSKLTZ_B:
6519 MskOpc = LoongArch::XVMSKLTZ_B;
6520 RC = &LoongArch::LASX256RegClass;
6521 break;
6522 case LoongArch::PseudoXVMSKLTZ_H:
6523 MskOpc = LoongArch::XVMSKLTZ_H;
6524 RC = &LoongArch::LASX256RegClass;
6525 EleBits = 16;
6526 break;
6527 case LoongArch::PseudoXVMSKLTZ_W:
6528 MskOpc = LoongArch::XVMSKLTZ_W;
6529 RC = &LoongArch::LASX256RegClass;
6530 EleBits = 32;
6531 break;
6532 case LoongArch::PseudoXVMSKLTZ_D:
6533 MskOpc = LoongArch::XVMSKLTZ_D;
6534 RC = &LoongArch::LASX256RegClass;
6535 EleBits = 64;
6536 break;
6537 case LoongArch::PseudoXVMSKGEZ_B:
6538 MskOpc = LoongArch::XVMSKGEZ_B;
6539 RC = &LoongArch::LASX256RegClass;
6540 break;
6541 case LoongArch::PseudoXVMSKEQZ_B:
6542 MskOpc = LoongArch::XVMSKNZ_B;
6543 NotOpc = LoongArch::XVNOR_V;
6544 RC = &LoongArch::LASX256RegClass;
6545 break;
6546 case LoongArch::PseudoXVMSKNEZ_B:
6547 MskOpc = LoongArch::XVMSKNZ_B;
6548 RC = &LoongArch::LASX256RegClass;
6549 break;
6550 }
6551
6552 Register Msk = MRI.createVirtualRegister(RC);
6553 if (NotOpc) {
6554 Register Tmp = MRI.createVirtualRegister(RC);
6555 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6556 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6557 .addReg(Tmp, RegState::Kill)
6558 .addReg(Tmp, RegState::Kill);
6559 } else {
6560 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6561 }
6562
6563 if (TRI->getRegSizeInBits(*RC) > 128) {
6564 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6565 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6566 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6567 .addReg(Msk)
6568 .addImm(0);
6569 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6570 .addReg(Msk, RegState::Kill)
6571 .addImm(4);
6572 BuildMI(*BB, MI, DL,
6573 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6574 : LoongArch::BSTRINS_W),
6575 Dst)
6578 .addImm(256 / EleBits - 1)
6579 .addImm(128 / EleBits);
6580 } else {
6581 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6582 .addReg(Msk, RegState::Kill)
6583 .addImm(0);
6584 }
6585
6586 MI.eraseFromParent();
6587 return BB;
6588}
6589
6590static MachineBasicBlock *
6592 const LoongArchSubtarget &Subtarget) {
6593 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6594 "Unexpected instruction");
6595
6596 MachineFunction &MF = *BB->getParent();
6597 DebugLoc DL = MI.getDebugLoc();
6599 Register LoReg = MI.getOperand(0).getReg();
6600 Register HiReg = MI.getOperand(1).getReg();
6601 Register SrcReg = MI.getOperand(2).getReg();
6602
6603 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6604 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6605 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6606 MI.eraseFromParent(); // The pseudo instruction is gone now.
6607 return BB;
6608}
6609
6610static MachineBasicBlock *
6612 const LoongArchSubtarget &Subtarget) {
6613 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6614 "Unexpected instruction");
6615
6616 MachineFunction &MF = *BB->getParent();
6617 DebugLoc DL = MI.getDebugLoc();
6620 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6621 Register DstReg = MI.getOperand(0).getReg();
6622 Register LoReg = MI.getOperand(1).getReg();
6623 Register HiReg = MI.getOperand(2).getReg();
6624
6625 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6626 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6627 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6628 .addReg(TmpReg, RegState::Kill)
6629 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6630 MI.eraseFromParent(); // The pseudo instruction is gone now.
6631 return BB;
6632}
6633
6635 switch (MI.getOpcode()) {
6636 default:
6637 return false;
6638 case LoongArch::Select_GPR_Using_CC_GPR:
6639 return true;
6640 }
6641}
6642
6643static MachineBasicBlock *
6645 const LoongArchSubtarget &Subtarget) {
6646 // To "insert" Select_* instructions, we actually have to insert the triangle
6647 // control-flow pattern. The incoming instructions know the destination vreg
6648 // to set, the condition code register to branch on, the true/false values to
6649 // select between, and the condcode to use to select the appropriate branch.
6650 //
6651 // We produce the following control flow:
6652 // HeadMBB
6653 // | \
6654 // | IfFalseMBB
6655 // | /
6656 // TailMBB
6657 //
6658 // When we find a sequence of selects we attempt to optimize their emission
6659 // by sharing the control flow. Currently we only handle cases where we have
6660 // multiple selects with the exact same condition (same LHS, RHS and CC).
6661 // The selects may be interleaved with other instructions if the other
6662 // instructions meet some requirements we deem safe:
6663 // - They are not pseudo instructions.
6664 // - They are debug instructions. Otherwise,
6665 // - They do not have side-effects, do not access memory and their inputs do
6666 // not depend on the results of the select pseudo-instructions.
6667 // The TrueV/FalseV operands of the selects cannot depend on the result of
6668 // previous selects in the sequence.
6669 // These conditions could be further relaxed. See the X86 target for a
6670 // related approach and more information.
6671
6672 Register LHS = MI.getOperand(1).getReg();
6673 Register RHS;
6674 if (MI.getOperand(2).isReg())
6675 RHS = MI.getOperand(2).getReg();
6676 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6677
6678 SmallVector<MachineInstr *, 4> SelectDebugValues;
6679 SmallSet<Register, 4> SelectDests;
6680 SelectDests.insert(MI.getOperand(0).getReg());
6681
6682 MachineInstr *LastSelectPseudo = &MI;
6683 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6684 SequenceMBBI != E; ++SequenceMBBI) {
6685 if (SequenceMBBI->isDebugInstr())
6686 continue;
6687 if (isSelectPseudo(*SequenceMBBI)) {
6688 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6689 !SequenceMBBI->getOperand(2).isReg() ||
6690 SequenceMBBI->getOperand(2).getReg() != RHS ||
6691 SequenceMBBI->getOperand(3).getImm() != CC ||
6692 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6693 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6694 break;
6695 LastSelectPseudo = &*SequenceMBBI;
6696 SequenceMBBI->collectDebugValues(SelectDebugValues);
6697 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6698 continue;
6699 }
6700 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6701 SequenceMBBI->mayLoadOrStore() ||
6702 SequenceMBBI->usesCustomInsertionHook())
6703 break;
6704 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6705 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6706 }))
6707 break;
6708 }
6709
6710 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6711 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6712 DebugLoc DL = MI.getDebugLoc();
6714
6715 MachineBasicBlock *HeadMBB = BB;
6716 MachineFunction *F = BB->getParent();
6717 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6718 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6719
6720 F->insert(I, IfFalseMBB);
6721 F->insert(I, TailMBB);
6722
6723 // Set the call frame size on entry to the new basic blocks.
6724 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6725 IfFalseMBB->setCallFrameSize(CallFrameSize);
6726 TailMBB->setCallFrameSize(CallFrameSize);
6727
6728 // Transfer debug instructions associated with the selects to TailMBB.
6729 for (MachineInstr *DebugInstr : SelectDebugValues) {
6730 TailMBB->push_back(DebugInstr->removeFromParent());
6731 }
6732
6733 // Move all instructions after the sequence to TailMBB.
6734 TailMBB->splice(TailMBB->end(), HeadMBB,
6735 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6736 // Update machine-CFG edges by transferring all successors of the current
6737 // block to the new block which will contain the Phi nodes for the selects.
6738 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6739 // Set the successors for HeadMBB.
6740 HeadMBB->addSuccessor(IfFalseMBB);
6741 HeadMBB->addSuccessor(TailMBB);
6742
6743 // Insert appropriate branch.
6744 if (MI.getOperand(2).isImm())
6745 BuildMI(HeadMBB, DL, TII.get(CC))
6746 .addReg(LHS)
6747 .addImm(MI.getOperand(2).getImm())
6748 .addMBB(TailMBB);
6749 else
6750 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6751
6752 // IfFalseMBB just falls through to TailMBB.
6753 IfFalseMBB->addSuccessor(TailMBB);
6754
6755 // Create PHIs for all of the select pseudo-instructions.
6756 auto SelectMBBI = MI.getIterator();
6757 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6758 auto InsertionPoint = TailMBB->begin();
6759 while (SelectMBBI != SelectEnd) {
6760 auto Next = std::next(SelectMBBI);
6761 if (isSelectPseudo(*SelectMBBI)) {
6762 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6763 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6764 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6765 .addReg(SelectMBBI->getOperand(4).getReg())
6766 .addMBB(HeadMBB)
6767 .addReg(SelectMBBI->getOperand(5).getReg())
6768 .addMBB(IfFalseMBB);
6769 SelectMBBI->eraseFromParent();
6770 }
6771 SelectMBBI = Next;
6772 }
6773
6774 F->getProperties().resetNoPHIs();
6775 return TailMBB;
6776}
6777
6778MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6779 MachineInstr &MI, MachineBasicBlock *BB) const {
6780 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6781 DebugLoc DL = MI.getDebugLoc();
6782
6783 switch (MI.getOpcode()) {
6784 default:
6785 llvm_unreachable("Unexpected instr type to insert");
6786 case LoongArch::DIV_W:
6787 case LoongArch::DIV_WU:
6788 case LoongArch::MOD_W:
6789 case LoongArch::MOD_WU:
6790 case LoongArch::DIV_D:
6791 case LoongArch::DIV_DU:
6792 case LoongArch::MOD_D:
6793 case LoongArch::MOD_DU:
6794 return insertDivByZeroTrap(MI, BB);
6795 break;
6796 case LoongArch::WRFCSR: {
6797 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6798 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6799 .addReg(MI.getOperand(1).getReg());
6800 MI.eraseFromParent();
6801 return BB;
6802 }
6803 case LoongArch::RDFCSR: {
6804 MachineInstr *ReadFCSR =
6805 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6806 MI.getOperand(0).getReg())
6807 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6808 ReadFCSR->getOperand(1).setIsUndef();
6809 MI.eraseFromParent();
6810 return BB;
6811 }
6812 case LoongArch::Select_GPR_Using_CC_GPR:
6813 return emitSelectPseudo(MI, BB, Subtarget);
6814 case LoongArch::BuildPairF64Pseudo:
6815 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6816 case LoongArch::SplitPairF64Pseudo:
6817 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6818 case LoongArch::PseudoVBZ:
6819 case LoongArch::PseudoVBZ_B:
6820 case LoongArch::PseudoVBZ_H:
6821 case LoongArch::PseudoVBZ_W:
6822 case LoongArch::PseudoVBZ_D:
6823 case LoongArch::PseudoVBNZ:
6824 case LoongArch::PseudoVBNZ_B:
6825 case LoongArch::PseudoVBNZ_H:
6826 case LoongArch::PseudoVBNZ_W:
6827 case LoongArch::PseudoVBNZ_D:
6828 case LoongArch::PseudoXVBZ:
6829 case LoongArch::PseudoXVBZ_B:
6830 case LoongArch::PseudoXVBZ_H:
6831 case LoongArch::PseudoXVBZ_W:
6832 case LoongArch::PseudoXVBZ_D:
6833 case LoongArch::PseudoXVBNZ:
6834 case LoongArch::PseudoXVBNZ_B:
6835 case LoongArch::PseudoXVBNZ_H:
6836 case LoongArch::PseudoXVBNZ_W:
6837 case LoongArch::PseudoXVBNZ_D:
6838 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6839 case LoongArch::PseudoXVINSGR2VR_B:
6840 case LoongArch::PseudoXVINSGR2VR_H:
6841 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6842 case LoongArch::PseudoCTPOP:
6843 return emitPseudoCTPOP(MI, BB, Subtarget);
6844 case LoongArch::PseudoVMSKLTZ_B:
6845 case LoongArch::PseudoVMSKLTZ_H:
6846 case LoongArch::PseudoVMSKLTZ_W:
6847 case LoongArch::PseudoVMSKLTZ_D:
6848 case LoongArch::PseudoVMSKGEZ_B:
6849 case LoongArch::PseudoVMSKEQZ_B:
6850 case LoongArch::PseudoVMSKNEZ_B:
6851 case LoongArch::PseudoXVMSKLTZ_B:
6852 case LoongArch::PseudoXVMSKLTZ_H:
6853 case LoongArch::PseudoXVMSKLTZ_W:
6854 case LoongArch::PseudoXVMSKLTZ_D:
6855 case LoongArch::PseudoXVMSKGEZ_B:
6856 case LoongArch::PseudoXVMSKEQZ_B:
6857 case LoongArch::PseudoXVMSKNEZ_B:
6858 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6859 case TargetOpcode::STATEPOINT:
6860 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6861 // while bl call instruction (where statepoint will be lowered at the
6862 // end) has implicit def. This def is early-clobber as it will be set at
6863 // the moment of the call and earlier than any use is read.
6864 // Add this implicit dead def here as a workaround.
6865 MI.addOperand(*MI.getMF(),
6867 LoongArch::R1, /*isDef*/ true,
6868 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
6869 /*isUndef*/ false, /*isEarlyClobber*/ true));
6870 if (!Subtarget.is64Bit())
6871 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
6872 return emitPatchPoint(MI, BB);
6873 }
6874}
6875
6877 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6878 unsigned *Fast) const {
6879 if (!Subtarget.hasUAL())
6880 return false;
6881
6882 // TODO: set reasonable speed number.
6883 if (Fast)
6884 *Fast = 1;
6885 return true;
6886}
6887
6888const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
6889 switch ((LoongArchISD::NodeType)Opcode) {
6891 break;
6892
6893#define NODE_NAME_CASE(node) \
6894 case LoongArchISD::node: \
6895 return "LoongArchISD::" #node;
6896
6897 // TODO: Add more target-dependent nodes later.
6898 NODE_NAME_CASE(CALL)
6899 NODE_NAME_CASE(CALL_MEDIUM)
6900 NODE_NAME_CASE(CALL_LARGE)
6901 NODE_NAME_CASE(RET)
6902 NODE_NAME_CASE(TAIL)
6903 NODE_NAME_CASE(TAIL_MEDIUM)
6904 NODE_NAME_CASE(TAIL_LARGE)
6905 NODE_NAME_CASE(SELECT_CC)
6906 NODE_NAME_CASE(BR_CC)
6907 NODE_NAME_CASE(BRCOND)
6908 NODE_NAME_CASE(SLL_W)
6909 NODE_NAME_CASE(SRA_W)
6910 NODE_NAME_CASE(SRL_W)
6911 NODE_NAME_CASE(BSTRINS)
6912 NODE_NAME_CASE(BSTRPICK)
6913 NODE_NAME_CASE(MOVGR2FR_W_LA64)
6914 NODE_NAME_CASE(MOVFR2GR_S_LA64)
6915 NODE_NAME_CASE(FTINT)
6916 NODE_NAME_CASE(BUILD_PAIR_F64)
6917 NODE_NAME_CASE(SPLIT_PAIR_F64)
6918 NODE_NAME_CASE(REVB_2H)
6919 NODE_NAME_CASE(REVB_2W)
6920 NODE_NAME_CASE(BITREV_4B)
6921 NODE_NAME_CASE(BITREV_8B)
6922 NODE_NAME_CASE(BITREV_W)
6923 NODE_NAME_CASE(ROTR_W)
6924 NODE_NAME_CASE(ROTL_W)
6925 NODE_NAME_CASE(DIV_W)
6926 NODE_NAME_CASE(DIV_WU)
6927 NODE_NAME_CASE(MOD_W)
6928 NODE_NAME_CASE(MOD_WU)
6929 NODE_NAME_CASE(CLZ_W)
6930 NODE_NAME_CASE(CTZ_W)
6931 NODE_NAME_CASE(DBAR)
6932 NODE_NAME_CASE(IBAR)
6933 NODE_NAME_CASE(BREAK)
6934 NODE_NAME_CASE(SYSCALL)
6935 NODE_NAME_CASE(CRC_W_B_W)
6936 NODE_NAME_CASE(CRC_W_H_W)
6937 NODE_NAME_CASE(CRC_W_W_W)
6938 NODE_NAME_CASE(CRC_W_D_W)
6939 NODE_NAME_CASE(CRCC_W_B_W)
6940 NODE_NAME_CASE(CRCC_W_H_W)
6941 NODE_NAME_CASE(CRCC_W_W_W)
6942 NODE_NAME_CASE(CRCC_W_D_W)
6943 NODE_NAME_CASE(CSRRD)
6944 NODE_NAME_CASE(CSRWR)
6945 NODE_NAME_CASE(CSRXCHG)
6946 NODE_NAME_CASE(IOCSRRD_B)
6947 NODE_NAME_CASE(IOCSRRD_H)
6948 NODE_NAME_CASE(IOCSRRD_W)
6949 NODE_NAME_CASE(IOCSRRD_D)
6950 NODE_NAME_CASE(IOCSRWR_B)
6951 NODE_NAME_CASE(IOCSRWR_H)
6952 NODE_NAME_CASE(IOCSRWR_W)
6953 NODE_NAME_CASE(IOCSRWR_D)
6954 NODE_NAME_CASE(CPUCFG)
6955 NODE_NAME_CASE(MOVGR2FCSR)
6956 NODE_NAME_CASE(MOVFCSR2GR)
6957 NODE_NAME_CASE(CACOP_D)
6958 NODE_NAME_CASE(CACOP_W)
6959 NODE_NAME_CASE(VSHUF)
6960 NODE_NAME_CASE(VPICKEV)
6961 NODE_NAME_CASE(VPICKOD)
6962 NODE_NAME_CASE(VPACKEV)
6963 NODE_NAME_CASE(VPACKOD)
6964 NODE_NAME_CASE(VILVL)
6965 NODE_NAME_CASE(VILVH)
6966 NODE_NAME_CASE(VSHUF4I)
6967 NODE_NAME_CASE(VREPLVEI)
6968 NODE_NAME_CASE(VREPLGR2VR)
6969 NODE_NAME_CASE(XVPERMI)
6970 NODE_NAME_CASE(VPICK_SEXT_ELT)
6971 NODE_NAME_CASE(VPICK_ZEXT_ELT)
6972 NODE_NAME_CASE(VREPLVE)
6973 NODE_NAME_CASE(VALL_ZERO)
6974 NODE_NAME_CASE(VANY_ZERO)
6975 NODE_NAME_CASE(VALL_NONZERO)
6976 NODE_NAME_CASE(VANY_NONZERO)
6977 NODE_NAME_CASE(FRECIPE)
6978 NODE_NAME_CASE(FRSQRTE)
6979 NODE_NAME_CASE(VSLLI)
6980 NODE_NAME_CASE(VSRLI)
6981 NODE_NAME_CASE(VBSLL)
6982 NODE_NAME_CASE(VBSRL)
6983 NODE_NAME_CASE(VLDREPL)
6984 NODE_NAME_CASE(VMSKLTZ)
6985 NODE_NAME_CASE(VMSKGEZ)
6986 NODE_NAME_CASE(VMSKEQZ)
6987 NODE_NAME_CASE(VMSKNEZ)
6988 NODE_NAME_CASE(XVMSKLTZ)
6989 NODE_NAME_CASE(XVMSKGEZ)
6990 NODE_NAME_CASE(XVMSKEQZ)
6991 NODE_NAME_CASE(XVMSKNEZ)
6992 NODE_NAME_CASE(VHADDW)
6993 }
6994#undef NODE_NAME_CASE
6995 return nullptr;
6996}
6997
6998//===----------------------------------------------------------------------===//
6999// Calling Convention Implementation
7000//===----------------------------------------------------------------------===//
7001
7002// Eight general-purpose registers a0-a7 used for passing integer arguments,
7003// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7004// fixed-point arguments, and floating-point arguments when no FPR is available
7005// or with soft float ABI.
7006const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7007 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7008 LoongArch::R10, LoongArch::R11};
7009// Eight floating-point registers fa0-fa7 used for passing floating-point
7010// arguments, and fa0-fa1 are also used to return values.
7011const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7012 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7013 LoongArch::F6, LoongArch::F7};
7014// FPR32 and FPR64 alias each other.
7016 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7017 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7018
7019const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7020 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7021 LoongArch::VR6, LoongArch::VR7};
7022
7023const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7024 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7025 LoongArch::XR6, LoongArch::XR7};
7026
7027// Pass a 2*GRLen argument that has been split into two GRLen values through
7028// registers or the stack as necessary.
7029static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7030 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7031 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7032 ISD::ArgFlagsTy ArgFlags2) {
7033 unsigned GRLenInBytes = GRLen / 8;
7034 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7035 // At least one half can be passed via register.
7036 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7037 VA1.getLocVT(), CCValAssign::Full));
7038 } else {
7039 // Both halves must be passed on the stack, with proper alignment.
7040 Align StackAlign =
7041 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7042 State.addLoc(
7044 State.AllocateStack(GRLenInBytes, StackAlign),
7045 VA1.getLocVT(), CCValAssign::Full));
7047 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7048 LocVT2, CCValAssign::Full));
7049 return false;
7050 }
7051 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7052 // The second half can also be passed via register.
7053 State.addLoc(
7054 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7055 } else {
7056 // The second half is passed via the stack, without additional alignment.
7058 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7059 LocVT2, CCValAssign::Full));
7060 }
7061 return false;
7062}
7063
7064// Implements the LoongArch calling convention. Returns true upon failure.
7066 unsigned ValNo, MVT ValVT,
7067 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7068 CCState &State, bool IsRet, Type *OrigTy) {
7069 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7070 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7071 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7072 MVT LocVT = ValVT;
7073
7074 // Any return value split into more than two values can't be returned
7075 // directly.
7076 if (IsRet && ValNo > 1)
7077 return true;
7078
7079 // If passing a variadic argument, or if no FPR is available.
7080 bool UseGPRForFloat = true;
7081
7082 switch (ABI) {
7083 default:
7084 llvm_unreachable("Unexpected ABI");
7085 break;
7090 UseGPRForFloat = ArgFlags.isVarArg();
7091 break;
7094 break;
7095 }
7096
7097 // If this is a variadic argument, the LoongArch calling convention requires
7098 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7099 // byte alignment. An aligned register should be used regardless of whether
7100 // the original argument was split during legalisation or not. The argument
7101 // will not be passed by registers if the original type is larger than
7102 // 2*GRLen, so the register alignment rule does not apply.
7103 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7104 if (ArgFlags.isVarArg() &&
7105 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7106 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7107 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7108 // Skip 'odd' register if necessary.
7109 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7110 State.AllocateReg(ArgGPRs);
7111 }
7112
7113 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7114 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7115 State.getPendingArgFlags();
7116
7117 assert(PendingLocs.size() == PendingArgFlags.size() &&
7118 "PendingLocs and PendingArgFlags out of sync");
7119
7120 // FPR32 and FPR64 alias each other.
7121 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7122 UseGPRForFloat = true;
7123
7124 if (UseGPRForFloat && ValVT == MVT::f32) {
7125 LocVT = GRLenVT;
7126 LocInfo = CCValAssign::BCvt;
7127 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7128 LocVT = MVT::i64;
7129 LocInfo = CCValAssign::BCvt;
7130 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7131 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7132 // registers are exhausted.
7133 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7134 // Depending on available argument GPRS, f64 may be passed in a pair of
7135 // GPRs, split between a GPR and the stack, or passed completely on the
7136 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7137 // cases.
7138 MCRegister Reg = State.AllocateReg(ArgGPRs);
7139 if (!Reg) {
7140 int64_t StackOffset = State.AllocateStack(8, Align(8));
7141 State.addLoc(
7142 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7143 return false;
7144 }
7145 LocVT = MVT::i32;
7146 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7147 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7148 if (HiReg) {
7149 State.addLoc(
7150 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7151 } else {
7152 int64_t StackOffset = State.AllocateStack(4, Align(4));
7153 State.addLoc(
7154 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7155 }
7156 return false;
7157 }
7158
7159 // Split arguments might be passed indirectly, so keep track of the pending
7160 // values.
7161 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7162 LocVT = GRLenVT;
7163 LocInfo = CCValAssign::Indirect;
7164 PendingLocs.push_back(
7165 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7166 PendingArgFlags.push_back(ArgFlags);
7167 if (!ArgFlags.isSplitEnd()) {
7168 return false;
7169 }
7170 }
7171
7172 // If the split argument only had two elements, it should be passed directly
7173 // in registers or on the stack.
7174 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7175 PendingLocs.size() <= 2) {
7176 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7177 // Apply the normal calling convention rules to the first half of the
7178 // split argument.
7179 CCValAssign VA = PendingLocs[0];
7180 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7181 PendingLocs.clear();
7182 PendingArgFlags.clear();
7183 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7184 ArgFlags);
7185 }
7186
7187 // Allocate to a register if possible, or else a stack slot.
7188 Register Reg;
7189 unsigned StoreSizeBytes = GRLen / 8;
7190 Align StackAlign = Align(GRLen / 8);
7191
7192 if (ValVT == MVT::f32 && !UseGPRForFloat)
7193 Reg = State.AllocateReg(ArgFPR32s);
7194 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7195 Reg = State.AllocateReg(ArgFPR64s);
7196 else if (ValVT.is128BitVector())
7197 Reg = State.AllocateReg(ArgVRs);
7198 else if (ValVT.is256BitVector())
7199 Reg = State.AllocateReg(ArgXRs);
7200 else
7201 Reg = State.AllocateReg(ArgGPRs);
7202
7203 unsigned StackOffset =
7204 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7205
7206 // If we reach this point and PendingLocs is non-empty, we must be at the
7207 // end of a split argument that must be passed indirectly.
7208 if (!PendingLocs.empty()) {
7209 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7210 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7211 for (auto &It : PendingLocs) {
7212 if (Reg)
7213 It.convertToReg(Reg);
7214 else
7215 It.convertToMem(StackOffset);
7216 State.addLoc(It);
7217 }
7218 PendingLocs.clear();
7219 PendingArgFlags.clear();
7220 return false;
7221 }
7222 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7223 "Expected an GRLenVT at this stage");
7224
7225 if (Reg) {
7226 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7227 return false;
7228 }
7229
7230 // When a floating-point value is passed on the stack, no bit-cast is needed.
7231 if (ValVT.isFloatingPoint()) {
7232 LocVT = ValVT;
7233 LocInfo = CCValAssign::Full;
7234 }
7235
7236 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7237 return false;
7238}
7239
7240void LoongArchTargetLowering::analyzeInputArgs(
7241 MachineFunction &MF, CCState &CCInfo,
7242 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7243 LoongArchCCAssignFn Fn) const {
7245 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7246 MVT ArgVT = Ins[i].VT;
7247 Type *ArgTy = nullptr;
7248 if (IsRet)
7249 ArgTy = FType->getReturnType();
7250 else if (Ins[i].isOrigArg())
7251 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7254 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7255 CCInfo, IsRet, ArgTy)) {
7256 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7257 << '\n');
7258 llvm_unreachable("");
7259 }
7260 }
7261}
7262
7263void LoongArchTargetLowering::analyzeOutputArgs(
7264 MachineFunction &MF, CCState &CCInfo,
7265 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7266 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7267 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7268 MVT ArgVT = Outs[i].VT;
7269 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7272 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7273 CCInfo, IsRet, OrigTy)) {
7274 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7275 << "\n");
7276 llvm_unreachable("");
7277 }
7278 }
7279}
7280
7281// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7282// values.
7284 const CCValAssign &VA, const SDLoc &DL) {
7285 switch (VA.getLocInfo()) {
7286 default:
7287 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7288 case CCValAssign::Full:
7290 break;
7291 case CCValAssign::BCvt:
7292 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7293 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7294 else
7295 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7296 break;
7297 }
7298 return Val;
7299}
7300
7302 const CCValAssign &VA, const SDLoc &DL,
7303 const ISD::InputArg &In,
7304 const LoongArchTargetLowering &TLI) {
7307 EVT LocVT = VA.getLocVT();
7308 SDValue Val;
7309 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7310 Register VReg = RegInfo.createVirtualRegister(RC);
7311 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7312 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7313
7314 // If input is sign extended from 32 bits, note it for the OptW pass.
7315 if (In.isOrigArg()) {
7316 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7317 if (OrigArg->getType()->isIntegerTy()) {
7318 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7319 // An input zero extended from i31 can also be considered sign extended.
7320 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7321 (BitWidth < 32 && In.Flags.isZExt())) {
7324 LAFI->addSExt32Register(VReg);
7325 }
7326 }
7327 }
7328
7329 return convertLocVTToValVT(DAG, Val, VA, DL);
7330}
7331
7332// The caller is responsible for loading the full value if the argument is
7333// passed with CCValAssign::Indirect.
7335 const CCValAssign &VA, const SDLoc &DL) {
7337 MachineFrameInfo &MFI = MF.getFrameInfo();
7338 EVT ValVT = VA.getValVT();
7339 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7340 /*IsImmutable=*/true);
7341 SDValue FIN = DAG.getFrameIndex(
7343
7344 ISD::LoadExtType ExtType;
7345 switch (VA.getLocInfo()) {
7346 default:
7347 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7348 case CCValAssign::Full:
7350 case CCValAssign::BCvt:
7351 ExtType = ISD::NON_EXTLOAD;
7352 break;
7353 }
7354 return DAG.getExtLoad(
7355 ExtType, DL, VA.getLocVT(), Chain, FIN,
7357}
7358
7360 const CCValAssign &VA,
7361 const CCValAssign &HiVA,
7362 const SDLoc &DL) {
7363 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7364 "Unexpected VA");
7366 MachineFrameInfo &MFI = MF.getFrameInfo();
7368
7369 assert(VA.isRegLoc() && "Expected register VA assignment");
7370
7371 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7372 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7373 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7374 SDValue Hi;
7375 if (HiVA.isMemLoc()) {
7376 // Second half of f64 is passed on the stack.
7377 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7378 /*IsImmutable=*/true);
7379 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7380 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7382 } else {
7383 // Second half of f64 is passed in another GPR.
7384 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7385 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7386 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7387 }
7388 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7389}
7390
7392 const CCValAssign &VA, const SDLoc &DL) {
7393 EVT LocVT = VA.getLocVT();
7394
7395 switch (VA.getLocInfo()) {
7396 default:
7397 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7398 case CCValAssign::Full:
7399 break;
7400 case CCValAssign::BCvt:
7401 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7402 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7403 else
7404 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7405 break;
7406 }
7407 return Val;
7408}
7409
7410static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7411 CCValAssign::LocInfo LocInfo,
7412 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7413 CCState &State) {
7414 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7415 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7416 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7417 static const MCPhysReg GPRList[] = {
7418 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7419 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7420 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7421 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7422 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7423 return false;
7424 }
7425 }
7426
7427 if (LocVT == MVT::f32) {
7428 // Pass in STG registers: F1, F2, F3, F4
7429 // fs0,fs1,fs2,fs3
7430 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7431 LoongArch::F26, LoongArch::F27};
7432 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7433 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7434 return false;
7435 }
7436 }
7437
7438 if (LocVT == MVT::f64) {
7439 // Pass in STG registers: D1, D2, D3, D4
7440 // fs4,fs5,fs6,fs7
7441 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7442 LoongArch::F30_64, LoongArch::F31_64};
7443 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7444 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7445 return false;
7446 }
7447 }
7448
7449 report_fatal_error("No registers left in GHC calling convention");
7450 return true;
7451}
7452
7453// Transform physical registers into virtual registers.
7455 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7456 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7457 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7458
7460
7461 switch (CallConv) {
7462 default:
7463 llvm_unreachable("Unsupported calling convention");
7464 case CallingConv::C:
7465 case CallingConv::Fast:
7467 break;
7468 case CallingConv::GHC:
7469 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7470 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7472 "GHC calling convention requires the F and D extensions");
7473 }
7474
7475 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7476 MVT GRLenVT = Subtarget.getGRLenVT();
7477 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7478 // Used with varargs to acumulate store chains.
7479 std::vector<SDValue> OutChains;
7480
7481 // Assign locations to all of the incoming arguments.
7483 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7484
7485 if (CallConv == CallingConv::GHC)
7487 else
7488 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7489
7490 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7491 CCValAssign &VA = ArgLocs[i];
7492 SDValue ArgValue;
7493 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7494 // case.
7495 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7496 assert(VA.needsCustom());
7497 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7498 } else if (VA.isRegLoc())
7499 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7500 else
7501 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7502 if (VA.getLocInfo() == CCValAssign::Indirect) {
7503 // If the original argument was split and passed by reference, we need to
7504 // load all parts of it here (using the same address).
7505 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7507 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7508 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7509 assert(ArgPartOffset == 0);
7510 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7511 CCValAssign &PartVA = ArgLocs[i + 1];
7512 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7513 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7514 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7515 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7517 ++i;
7518 ++InsIdx;
7519 }
7520 continue;
7521 }
7522 InVals.push_back(ArgValue);
7523 }
7524
7525 if (IsVarArg) {
7527 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7528 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7529 MachineFrameInfo &MFI = MF.getFrameInfo();
7530 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7531 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7532
7533 // Offset of the first variable argument from stack pointer, and size of
7534 // the vararg save area. For now, the varargs save area is either zero or
7535 // large enough to hold a0-a7.
7536 int VaArgOffset, VarArgsSaveSize;
7537
7538 // If all registers are allocated, then all varargs must be passed on the
7539 // stack and we don't need to save any argregs.
7540 if (ArgRegs.size() == Idx) {
7541 VaArgOffset = CCInfo.getStackSize();
7542 VarArgsSaveSize = 0;
7543 } else {
7544 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7545 VaArgOffset = -VarArgsSaveSize;
7546 }
7547
7548 // Record the frame index of the first variable argument
7549 // which is a value necessary to VASTART.
7550 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7551 LoongArchFI->setVarArgsFrameIndex(FI);
7552
7553 // If saving an odd number of registers then create an extra stack slot to
7554 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7555 // offsets to even-numbered registered remain 2*GRLen-aligned.
7556 if (Idx % 2) {
7557 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7558 true);
7559 VarArgsSaveSize += GRLenInBytes;
7560 }
7561
7562 // Copy the integer registers that may have been used for passing varargs
7563 // to the vararg save area.
7564 for (unsigned I = Idx; I < ArgRegs.size();
7565 ++I, VaArgOffset += GRLenInBytes) {
7566 const Register Reg = RegInfo.createVirtualRegister(RC);
7567 RegInfo.addLiveIn(ArgRegs[I], Reg);
7568 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7569 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7570 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7571 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7573 cast<StoreSDNode>(Store.getNode())
7574 ->getMemOperand()
7575 ->setValue((Value *)nullptr);
7576 OutChains.push_back(Store);
7577 }
7578 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7579 }
7580
7581 // All stores are grouped in one node to allow the matching between
7582 // the size of Ins and InVals. This only happens for vararg functions.
7583 if (!OutChains.empty()) {
7584 OutChains.push_back(Chain);
7585 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7586 }
7587
7588 return Chain;
7589}
7590
7592 return CI->isTailCall();
7593}
7594
7595// Check if the return value is used as only a return value, as otherwise
7596// we can't perform a tail-call.
7598 SDValue &Chain) const {
7599 if (N->getNumValues() != 1)
7600 return false;
7601 if (!N->hasNUsesOfValue(1, 0))
7602 return false;
7603
7604 SDNode *Copy = *N->user_begin();
7605 if (Copy->getOpcode() != ISD::CopyToReg)
7606 return false;
7607
7608 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7609 // isn't safe to perform a tail call.
7610 if (Copy->getGluedNode())
7611 return false;
7612
7613 // The copy must be used by a LoongArchISD::RET, and nothing else.
7614 bool HasRet = false;
7615 for (SDNode *Node : Copy->users()) {
7616 if (Node->getOpcode() != LoongArchISD::RET)
7617 return false;
7618 HasRet = true;
7619 }
7620
7621 if (!HasRet)
7622 return false;
7623
7624 Chain = Copy->getOperand(0);
7625 return true;
7626}
7627
7628// Check whether the call is eligible for tail call optimization.
7629bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7630 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7631 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7632
7633 auto CalleeCC = CLI.CallConv;
7634 auto &Outs = CLI.Outs;
7635 auto &Caller = MF.getFunction();
7636 auto CallerCC = Caller.getCallingConv();
7637
7638 // Do not tail call opt if the stack is used to pass parameters.
7639 if (CCInfo.getStackSize() != 0)
7640 return false;
7641
7642 // Do not tail call opt if any parameters need to be passed indirectly.
7643 for (auto &VA : ArgLocs)
7644 if (VA.getLocInfo() == CCValAssign::Indirect)
7645 return false;
7646
7647 // Do not tail call opt if either caller or callee uses struct return
7648 // semantics.
7649 auto IsCallerStructRet = Caller.hasStructRetAttr();
7650 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7651 if (IsCallerStructRet || IsCalleeStructRet)
7652 return false;
7653
7654 // Do not tail call opt if either the callee or caller has a byval argument.
7655 for (auto &Arg : Outs)
7656 if (Arg.Flags.isByVal())
7657 return false;
7658
7659 // The callee has to preserve all registers the caller needs to preserve.
7660 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7661 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7662 if (CalleeCC != CallerCC) {
7663 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7664 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7665 return false;
7666 }
7667 return true;
7668}
7669
7671 return DAG.getDataLayout().getPrefTypeAlign(
7672 VT.getTypeForEVT(*DAG.getContext()));
7673}
7674
7675// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7676// and output parameter nodes.
7677SDValue
7679 SmallVectorImpl<SDValue> &InVals) const {
7680 SelectionDAG &DAG = CLI.DAG;
7681 SDLoc &DL = CLI.DL;
7683 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7685 SDValue Chain = CLI.Chain;
7686 SDValue Callee = CLI.Callee;
7687 CallingConv::ID CallConv = CLI.CallConv;
7688 bool IsVarArg = CLI.IsVarArg;
7689 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7690 MVT GRLenVT = Subtarget.getGRLenVT();
7691 bool &IsTailCall = CLI.IsTailCall;
7692
7694
7695 // Analyze the operands of the call, assigning locations to each operand.
7697 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7698
7699 if (CallConv == CallingConv::GHC)
7700 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7701 else
7702 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7703
7704 // Check if it's really possible to do a tail call.
7705 if (IsTailCall)
7706 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7707
7708 if (IsTailCall)
7709 ++NumTailCalls;
7710 else if (CLI.CB && CLI.CB->isMustTailCall())
7711 report_fatal_error("failed to perform tail call elimination on a call "
7712 "site marked musttail");
7713
7714 // Get a count of how many bytes are to be pushed on the stack.
7715 unsigned NumBytes = ArgCCInfo.getStackSize();
7716
7717 // Create local copies for byval args.
7718 SmallVector<SDValue> ByValArgs;
7719 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7720 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7721 if (!Flags.isByVal())
7722 continue;
7723
7724 SDValue Arg = OutVals[i];
7725 unsigned Size = Flags.getByValSize();
7726 Align Alignment = Flags.getNonZeroByValAlign();
7727
7728 int FI =
7729 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7730 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7731 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7732
7733 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7734 /*IsVolatile=*/false,
7735 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7737 ByValArgs.push_back(FIPtr);
7738 }
7739
7740 if (!IsTailCall)
7741 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7742
7743 // Copy argument values to their designated locations.
7745 SmallVector<SDValue> MemOpChains;
7746 SDValue StackPtr;
7747 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7748 ++i, ++OutIdx) {
7749 CCValAssign &VA = ArgLocs[i];
7750 SDValue ArgValue = OutVals[OutIdx];
7751 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7752
7753 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7754 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7755 assert(VA.isRegLoc() && "Expected register VA assignment");
7756 assert(VA.needsCustom());
7757 SDValue SplitF64 =
7759 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7760 SDValue Lo = SplitF64.getValue(0);
7761 SDValue Hi = SplitF64.getValue(1);
7762
7763 Register RegLo = VA.getLocReg();
7764 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7765
7766 // Get the CCValAssign for the Hi part.
7767 CCValAssign &HiVA = ArgLocs[++i];
7768
7769 if (HiVA.isMemLoc()) {
7770 // Second half of f64 is passed on the stack.
7771 if (!StackPtr.getNode())
7772 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7774 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7775 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7776 // Emit the store.
7777 MemOpChains.push_back(DAG.getStore(
7778 Chain, DL, Hi, Address,
7780 } else {
7781 // Second half of f64 is passed in another GPR.
7782 Register RegHigh = HiVA.getLocReg();
7783 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7784 }
7785 continue;
7786 }
7787
7788 // Promote the value if needed.
7789 // For now, only handle fully promoted and indirect arguments.
7790 if (VA.getLocInfo() == CCValAssign::Indirect) {
7791 // Store the argument in a stack slot and pass its address.
7792 Align StackAlign =
7793 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7794 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7795 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7796 // If the original argument was split and passed by reference, we need to
7797 // store the required parts of it here (and pass just one address).
7798 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7799 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7800 assert(ArgPartOffset == 0);
7801 // Calculate the total size to store. We don't have access to what we're
7802 // actually storing other than performing the loop and collecting the
7803 // info.
7805 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7806 SDValue PartValue = OutVals[OutIdx + 1];
7807 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7808 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7809 EVT PartVT = PartValue.getValueType();
7810
7811 StoredSize += PartVT.getStoreSize();
7812 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7813 Parts.push_back(std::make_pair(PartValue, Offset));
7814 ++i;
7815 ++OutIdx;
7816 }
7817 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7818 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7819 MemOpChains.push_back(
7820 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7822 for (const auto &Part : Parts) {
7823 SDValue PartValue = Part.first;
7824 SDValue PartOffset = Part.second;
7826 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7827 MemOpChains.push_back(
7828 DAG.getStore(Chain, DL, PartValue, Address,
7830 }
7831 ArgValue = SpillSlot;
7832 } else {
7833 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7834 }
7835
7836 // Use local copy if it is a byval arg.
7837 if (Flags.isByVal())
7838 ArgValue = ByValArgs[j++];
7839
7840 if (VA.isRegLoc()) {
7841 // Queue up the argument copies and emit them at the end.
7842 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7843 } else {
7844 assert(VA.isMemLoc() && "Argument not register or memory");
7845 assert(!IsTailCall && "Tail call not allowed if stack is used "
7846 "for passing parameters");
7847
7848 // Work out the address of the stack slot.
7849 if (!StackPtr.getNode())
7850 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7852 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7854
7855 // Emit the store.
7856 MemOpChains.push_back(
7857 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7858 }
7859 }
7860
7861 // Join the stores, which are independent of one another.
7862 if (!MemOpChains.empty())
7863 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7864
7865 SDValue Glue;
7866
7867 // Build a sequence of copy-to-reg nodes, chained and glued together.
7868 for (auto &Reg : RegsToPass) {
7869 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7870 Glue = Chain.getValue(1);
7871 }
7872
7873 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7874 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7875 // split it and then direct call can be matched by PseudoCALL.
7876 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7877 const GlobalValue *GV = S->getGlobal();
7878 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7881 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
7882 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7883 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
7886 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7887 }
7888
7889 // The first call operand is the chain and the second is the target address.
7891 Ops.push_back(Chain);
7892 Ops.push_back(Callee);
7893
7894 // Add argument registers to the end of the list so that they are
7895 // known live into the call.
7896 for (auto &Reg : RegsToPass)
7897 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7898
7899 if (!IsTailCall) {
7900 // Add a register mask operand representing the call-preserved registers.
7901 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7902 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7903 assert(Mask && "Missing call preserved mask for calling convention");
7904 Ops.push_back(DAG.getRegisterMask(Mask));
7905 }
7906
7907 // Glue the call to the argument copies, if any.
7908 if (Glue.getNode())
7909 Ops.push_back(Glue);
7910
7911 // Emit the call.
7912 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7913 unsigned Op;
7914 switch (DAG.getTarget().getCodeModel()) {
7915 default:
7916 report_fatal_error("Unsupported code model");
7917 case CodeModel::Small:
7918 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7919 break;
7920 case CodeModel::Medium:
7921 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7923 break;
7924 case CodeModel::Large:
7925 assert(Subtarget.is64Bit() && "Large code model requires LA64");
7927 break;
7928 }
7929
7930 if (IsTailCall) {
7932 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
7933 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
7934 return Ret;
7935 }
7936
7937 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
7938 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7939 Glue = Chain.getValue(1);
7940
7941 // Mark the end of the call, which is glued to the call itself.
7942 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
7943 Glue = Chain.getValue(1);
7944
7945 // Assign locations to each value returned by this call.
7947 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7948 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
7949
7950 // Copy all of the result registers out of their specified physreg.
7951 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
7952 auto &VA = RVLocs[i];
7953 // Copy the value out.
7954 SDValue RetValue =
7955 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7956 // Glue the RetValue to the end of the call sequence.
7957 Chain = RetValue.getValue(1);
7958 Glue = RetValue.getValue(2);
7959
7960 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7961 assert(VA.needsCustom());
7962 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
7963 MVT::i32, Glue);
7964 Chain = RetValue2.getValue(1);
7965 Glue = RetValue2.getValue(2);
7966 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
7967 RetValue, RetValue2);
7968 } else
7969 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
7970
7971 InVals.push_back(RetValue);
7972 }
7973
7974 return Chain;
7975}
7976
7978 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7979 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7980 const Type *RetTy) const {
7982 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7983
7984 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7985 LoongArchABI::ABI ABI =
7986 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7987 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
7988 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
7989 return false;
7990 }
7991 return true;
7992}
7993
7995 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7997 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7998 SelectionDAG &DAG) const {
7999 // Stores the assignment of the return value to a location.
8001
8002 // Info about the registers and stack slot.
8003 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8004 *DAG.getContext());
8005
8006 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8007 nullptr, CC_LoongArch);
8008 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8009 report_fatal_error("GHC functions return void only");
8010 SDValue Glue;
8011 SmallVector<SDValue, 4> RetOps(1, Chain);
8012
8013 // Copy the result values into the output registers.
8014 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8015 SDValue Val = OutVals[OutIdx];
8016 CCValAssign &VA = RVLocs[i];
8017 assert(VA.isRegLoc() && "Can only return in registers!");
8018
8019 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8020 // Handle returning f64 on LA32D with a soft float ABI.
8021 assert(VA.isRegLoc() && "Expected return via registers");
8022 assert(VA.needsCustom());
8024 DAG.getVTList(MVT::i32, MVT::i32), Val);
8025 SDValue Lo = SplitF64.getValue(0);
8026 SDValue Hi = SplitF64.getValue(1);
8027 Register RegLo = VA.getLocReg();
8028 Register RegHi = RVLocs[++i].getLocReg();
8029
8030 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8031 Glue = Chain.getValue(1);
8032 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8033 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8034 Glue = Chain.getValue(1);
8035 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8036 } else {
8037 // Handle a 'normal' return.
8038 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8039 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8040
8041 // Guarantee that all emitted copies are stuck together.
8042 Glue = Chain.getValue(1);
8043 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8044 }
8045 }
8046
8047 RetOps[0] = Chain; // Update chain.
8048
8049 // Add the glue node if we have it.
8050 if (Glue.getNode())
8051 RetOps.push_back(Glue);
8052
8053 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8054}
8055
8057 EVT VT) const {
8058 if (!Subtarget.hasExtLSX())
8059 return false;
8060
8061 if (VT == MVT::f32) {
8062 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8063 return (masked == 0x3e000000 || masked == 0x40000000);
8064 }
8065
8066 if (VT == MVT::f64) {
8067 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8068 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8069 }
8070
8071 return false;
8072}
8073
8074bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8075 bool ForCodeSize) const {
8076 // TODO: Maybe need more checks here after vector extension is supported.
8077 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8078 return false;
8079 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8080 return false;
8081 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8082}
8083
8085 return true;
8086}
8087
8089 return true;
8090}
8091
8092bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8093 const Instruction *I) const {
8094 if (!Subtarget.is64Bit())
8095 return isa<LoadInst>(I) || isa<StoreInst>(I);
8096
8097 if (isa<LoadInst>(I))
8098 return true;
8099
8100 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8101 // require fences beacuse we can use amswap_db.[w/d].
8102 Type *Ty = I->getOperand(0)->getType();
8103 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8104 unsigned Size = Ty->getIntegerBitWidth();
8105 return (Size == 8 || Size == 16);
8106 }
8107
8108 return false;
8109}
8110
8112 LLVMContext &Context,
8113 EVT VT) const {
8114 if (!VT.isVector())
8115 return getPointerTy(DL);
8117}
8118
8120 // TODO: Support vectors.
8121 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8122}
8123
8125 const CallInst &I,
8126 MachineFunction &MF,
8127 unsigned Intrinsic) const {
8128 switch (Intrinsic) {
8129 default:
8130 return false;
8131 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8132 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8133 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8134 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8136 Info.memVT = MVT::i32;
8137 Info.ptrVal = I.getArgOperand(0);
8138 Info.offset = 0;
8139 Info.align = Align(4);
8142 return true;
8143 // TODO: Add more Intrinsics later.
8144 }
8145}
8146
8147// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8148// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8149// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8150// regression, we need to implement it manually.
8153
8155 Op == AtomicRMWInst::And) &&
8156 "Unable to expand");
8157 unsigned MinWordSize = 4;
8158
8159 IRBuilder<> Builder(AI);
8160 LLVMContext &Ctx = Builder.getContext();
8161 const DataLayout &DL = AI->getDataLayout();
8162 Type *ValueType = AI->getType();
8163 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8164
8165 Value *Addr = AI->getPointerOperand();
8166 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8167 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8168
8169 Value *AlignedAddr = Builder.CreateIntrinsic(
8170 Intrinsic::ptrmask, {PtrTy, IntTy},
8171 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8172 "AlignedAddr");
8173
8174 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8175 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8176 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8177 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8178 Value *Mask = Builder.CreateShl(
8179 ConstantInt::get(WordType,
8180 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8181 ShiftAmt, "Mask");
8182 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8183 Value *ValOperand_Shifted =
8184 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8185 ShiftAmt, "ValOperand_Shifted");
8186 Value *NewOperand;
8187 if (Op == AtomicRMWInst::And)
8188 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8189 else
8190 NewOperand = ValOperand_Shifted;
8191
8192 AtomicRMWInst *NewAI =
8193 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8194 AI->getOrdering(), AI->getSyncScopeID());
8195
8196 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8197 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8198 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8199 AI->replaceAllUsesWith(FinalOldResult);
8200 AI->eraseFromParent();
8201}
8202
8205 // TODO: Add more AtomicRMWInst that needs to be extended.
8206
8207 // Since floating-point operation requires a non-trivial set of data
8208 // operations, use CmpXChg to expand.
8209 if (AI->isFloatingPointOperation() ||
8215
8216 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8219 AI->getOperation() == AtomicRMWInst::Sub)) {
8221 }
8222
8223 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8224 if (Subtarget.hasLAMCAS()) {
8225 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8229 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8231 }
8232
8233 if (Size == 8 || Size == 16)
8236}
8237
8238static Intrinsic::ID
8240 AtomicRMWInst::BinOp BinOp) {
8241 if (GRLen == 64) {
8242 switch (BinOp) {
8243 default:
8244 llvm_unreachable("Unexpected AtomicRMW BinOp");
8246 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8247 case AtomicRMWInst::Add:
8248 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8249 case AtomicRMWInst::Sub:
8250 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8252 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8254 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8256 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8257 case AtomicRMWInst::Max:
8258 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8259 case AtomicRMWInst::Min:
8260 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8261 // TODO: support other AtomicRMWInst.
8262 }
8263 }
8264
8265 if (GRLen == 32) {
8266 switch (BinOp) {
8267 default:
8268 llvm_unreachable("Unexpected AtomicRMW BinOp");
8270 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8271 case AtomicRMWInst::Add:
8272 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8273 case AtomicRMWInst::Sub:
8274 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8276 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8278 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8280 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8281 case AtomicRMWInst::Max:
8282 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8283 case AtomicRMWInst::Min:
8284 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8285 // TODO: support other AtomicRMWInst.
8286 }
8287 }
8288
8289 llvm_unreachable("Unexpected GRLen\n");
8290}
8291
8294 AtomicCmpXchgInst *CI) const {
8295
8296 if (Subtarget.hasLAMCAS())
8298
8300 if (Size == 8 || Size == 16)
8303}
8304
8306 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8307 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8308 unsigned GRLen = Subtarget.getGRLen();
8309 AtomicOrdering FailOrd = CI->getFailureOrdering();
8310 Value *FailureOrdering =
8311 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8312 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8313 if (GRLen == 64) {
8314 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8315 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8316 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8317 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8318 }
8319 Type *Tys[] = {AlignedAddr->getType()};
8320 Value *Result = Builder.CreateIntrinsic(
8321 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8322 if (GRLen == 64)
8323 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8324 return Result;
8325}
8326
8328 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8329 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8330 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8331 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8332 // mask, as this produces better code than the LL/SC loop emitted by
8333 // int_loongarch_masked_atomicrmw_xchg.
8334 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8335 isa<ConstantInt>(AI->getValOperand())) {
8336 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
8337 if (CVal->isZero())
8338 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8339 Builder.CreateNot(Mask, "Inv_Mask"),
8340 AI->getAlign(), Ord);
8341 if (CVal->isMinusOne())
8342 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8343 AI->getAlign(), Ord);
8344 }
8345
8346 unsigned GRLen = Subtarget.getGRLen();
8347 Value *Ordering =
8348 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8349 Type *Tys[] = {AlignedAddr->getType()};
8351 AI->getModule(),
8353
8354 if (GRLen == 64) {
8355 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8356 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8357 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8358 }
8359
8360 Value *Result;
8361
8362 // Must pass the shift amount needed to sign extend the loaded value prior
8363 // to performing a signed comparison for min/max. ShiftAmt is the number of
8364 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8365 // is the number of bits to left+right shift the value in order to
8366 // sign-extend.
8367 if (AI->getOperation() == AtomicRMWInst::Min ||
8369 const DataLayout &DL = AI->getDataLayout();
8370 unsigned ValWidth =
8371 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8372 Value *SextShamt =
8373 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8374 Result = Builder.CreateCall(LlwOpScwLoop,
8375 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8376 } else {
8377 Result =
8378 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8379 }
8380
8381 if (GRLen == 64)
8382 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8383 return Result;
8384}
8385
8387 const MachineFunction &MF, EVT VT) const {
8388 VT = VT.getScalarType();
8389
8390 if (!VT.isSimple())
8391 return false;
8392
8393 switch (VT.getSimpleVT().SimpleTy) {
8394 case MVT::f32:
8395 case MVT::f64:
8396 return true;
8397 default:
8398 break;
8399 }
8400
8401 return false;
8402}
8403
8405 const Constant *PersonalityFn) const {
8406 return LoongArch::R4;
8407}
8408
8410 const Constant *PersonalityFn) const {
8411 return LoongArch::R5;
8412}
8413
8414//===----------------------------------------------------------------------===//
8415// Target Optimization Hooks
8416//===----------------------------------------------------------------------===//
8417
8419 const LoongArchSubtarget &Subtarget) {
8420 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8421 // IEEE float has 23 digits and double has 52 digits.
8422 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8423 return RefinementSteps;
8424}
8425
8427 SelectionDAG &DAG, int Enabled,
8428 int &RefinementSteps,
8429 bool &UseOneConstNR,
8430 bool Reciprocal) const {
8431 if (Subtarget.hasFrecipe()) {
8432 SDLoc DL(Operand);
8433 EVT VT = Operand.getValueType();
8434
8435 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8436 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8437 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8438 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8439 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8440
8441 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8442 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8443
8444 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8445 if (Reciprocal)
8446 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8447
8448 return Estimate;
8449 }
8450 }
8451
8452 return SDValue();
8453}
8454
8456 SelectionDAG &DAG,
8457 int Enabled,
8458 int &RefinementSteps) const {
8459 if (Subtarget.hasFrecipe()) {
8460 SDLoc DL(Operand);
8461 EVT VT = Operand.getValueType();
8462
8463 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8464 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8465 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8466 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8467 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8468
8469 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8470 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8471
8472 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8473 }
8474 }
8475
8476 return SDValue();
8477}
8478
8479//===----------------------------------------------------------------------===//
8480// LoongArch Inline Assembly Support
8481//===----------------------------------------------------------------------===//
8482
8484LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8485 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8486 //
8487 // 'f': A floating-point register (if available).
8488 // 'k': A memory operand whose address is formed by a base register and
8489 // (optionally scaled) index register.
8490 // 'l': A signed 16-bit constant.
8491 // 'm': A memory operand whose address is formed by a base register and
8492 // offset that is suitable for use in instructions with the same
8493 // addressing mode as st.w and ld.w.
8494 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8495 // instruction)
8496 // 'I': A signed 12-bit constant (for arithmetic instructions).
8497 // 'J': Integer zero.
8498 // 'K': An unsigned 12-bit constant (for logic instructions).
8499 // "ZB": An address that is held in a general-purpose register. The offset is
8500 // zero.
8501 // "ZC": A memory operand whose address is formed by a base register and
8502 // offset that is suitable for use in instructions with the same
8503 // addressing mode as ll.w and sc.w.
8504 if (Constraint.size() == 1) {
8505 switch (Constraint[0]) {
8506 default:
8507 break;
8508 case 'f':
8509 case 'q':
8510 return C_RegisterClass;
8511 case 'l':
8512 case 'I':
8513 case 'J':
8514 case 'K':
8515 return C_Immediate;
8516 case 'k':
8517 return C_Memory;
8518 }
8519 }
8520
8521 if (Constraint == "ZC" || Constraint == "ZB")
8522 return C_Memory;
8523
8524 // 'm' is handled here.
8525 return TargetLowering::getConstraintType(Constraint);
8526}
8527
8528InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8529 StringRef ConstraintCode) const {
8530 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8535}
8536
8537std::pair<unsigned, const TargetRegisterClass *>
8538LoongArchTargetLowering::getRegForInlineAsmConstraint(
8539 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8540 // First, see if this is a constraint that directly corresponds to a LoongArch
8541 // register class.
8542 if (Constraint.size() == 1) {
8543 switch (Constraint[0]) {
8544 case 'r':
8545 // TODO: Support fixed vectors up to GRLen?
8546 if (VT.isVector())
8547 break;
8548 return std::make_pair(0U, &LoongArch::GPRRegClass);
8549 case 'q':
8550 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8551 case 'f':
8552 if (Subtarget.hasBasicF() && VT == MVT::f32)
8553 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8554 if (Subtarget.hasBasicD() && VT == MVT::f64)
8555 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8556 if (Subtarget.hasExtLSX() &&
8557 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8558 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8559 if (Subtarget.hasExtLASX() &&
8560 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8561 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8562 break;
8563 default:
8564 break;
8565 }
8566 }
8567
8568 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8569 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8570 // constraints while the official register name is prefixed with a '$'. So we
8571 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8572 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8573 // case insensitive, so no need to convert the constraint to upper case here.
8574 //
8575 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8576 // decode the usage of register name aliases into their official names. And
8577 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8578 // official register names.
8579 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8580 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8581 bool IsFP = Constraint[2] == 'f';
8582 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8583 std::pair<unsigned, const TargetRegisterClass *> R;
8585 TRI, join_items("", Temp.first, Temp.second), VT);
8586 // Match those names to the widest floating point register type available.
8587 if (IsFP) {
8588 unsigned RegNo = R.first;
8589 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8590 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8591 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8592 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8593 }
8594 }
8595 }
8596 return R;
8597 }
8598
8599 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8600}
8601
8602void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8603 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8604 SelectionDAG &DAG) const {
8605 // Currently only support length 1 constraints.
8606 if (Constraint.size() == 1) {
8607 switch (Constraint[0]) {
8608 case 'l':
8609 // Validate & create a 16-bit signed immediate operand.
8610 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8611 uint64_t CVal = C->getSExtValue();
8612 if (isInt<16>(CVal))
8613 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8614 Subtarget.getGRLenVT()));
8615 }
8616 return;
8617 case 'I':
8618 // Validate & create a 12-bit signed immediate operand.
8619 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8620 uint64_t CVal = C->getSExtValue();
8621 if (isInt<12>(CVal))
8622 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8623 Subtarget.getGRLenVT()));
8624 }
8625 return;
8626 case 'J':
8627 // Validate & create an integer zero operand.
8628 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8629 if (C->getZExtValue() == 0)
8630 Ops.push_back(
8631 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8632 return;
8633 case 'K':
8634 // Validate & create a 12-bit unsigned immediate operand.
8635 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8636 uint64_t CVal = C->getZExtValue();
8637 if (isUInt<12>(CVal))
8638 Ops.push_back(
8639 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8640 }
8641 return;
8642 default:
8643 break;
8644 }
8645 }
8646 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8647}
8648
8649#define GET_REGISTER_MATCHER
8650#include "LoongArchGenAsmMatcher.inc"
8651
8654 const MachineFunction &MF) const {
8655 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8656 std::string NewRegName = Name.second.str();
8657 Register Reg = MatchRegisterAltName(NewRegName);
8658 if (!Reg)
8659 Reg = MatchRegisterName(NewRegName);
8660 if (!Reg)
8661 return Reg;
8662 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8663 if (!ReservedRegs.test(Reg))
8664 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8665 StringRef(RegName) + "\"."));
8666 return Reg;
8667}
8668
8670 EVT VT, SDValue C) const {
8671 // TODO: Support vectors.
8672 if (!VT.isScalarInteger())
8673 return false;
8674
8675 // Omit the optimization if the data size exceeds GRLen.
8676 if (VT.getSizeInBits() > Subtarget.getGRLen())
8677 return false;
8678
8679 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8680 const APInt &Imm = ConstNode->getAPIntValue();
8681 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8682 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8683 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8684 return true;
8685 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8686 if (ConstNode->hasOneUse() &&
8687 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8688 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8689 return true;
8690 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8691 // in which the immediate has two set bits. Or Break (MUL x, imm)
8692 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8693 // equals to (1 << s0) - (1 << s1).
8694 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8695 unsigned Shifts = Imm.countr_zero();
8696 // Reject immediates which can be composed via a single LUI.
8697 if (Shifts >= 12)
8698 return false;
8699 // Reject multiplications can be optimized to
8700 // (SLLI (ALSL x, x, 1/2/3/4), s).
8701 APInt ImmPop = Imm.ashr(Shifts);
8702 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8703 return false;
8704 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8705 // since it needs one more instruction than other 3 cases.
8706 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8707 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8708 (ImmSmall - Imm).isPowerOf2())
8709 return true;
8710 }
8711 }
8712
8713 return false;
8714}
8715
8717 const AddrMode &AM,
8718 Type *Ty, unsigned AS,
8719 Instruction *I) const {
8720 // LoongArch has four basic addressing modes:
8721 // 1. reg
8722 // 2. reg + 12-bit signed offset
8723 // 3. reg + 14-bit signed offset left-shifted by 2
8724 // 4. reg1 + reg2
8725 // TODO: Add more checks after support vector extension.
8726
8727 // No global is ever allowed as a base.
8728 if (AM.BaseGV)
8729 return false;
8730
8731 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8732 // with `UAL` feature.
8733 if (!isInt<12>(AM.BaseOffs) &&
8734 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8735 return false;
8736
8737 switch (AM.Scale) {
8738 case 0:
8739 // "r+i" or just "i", depending on HasBaseReg.
8740 break;
8741 case 1:
8742 // "r+r+i" is not allowed.
8743 if (AM.HasBaseReg && AM.BaseOffs)
8744 return false;
8745 // Otherwise we have "r+r" or "r+i".
8746 break;
8747 case 2:
8748 // "2*r+r" or "2*r+i" is not allowed.
8749 if (AM.HasBaseReg || AM.BaseOffs)
8750 return false;
8751 // Allow "2*r" as "r+r".
8752 break;
8753 default:
8754 return false;
8755 }
8756
8757 return true;
8758}
8759
8761 return isInt<12>(Imm);
8762}
8763
8765 return isInt<12>(Imm);
8766}
8767
8769 // Zexts are free if they can be combined with a load.
8770 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8771 // poorly with type legalization of compares preferring sext.
8772 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8773 EVT MemVT = LD->getMemoryVT();
8774 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8775 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8776 LD->getExtensionType() == ISD::ZEXTLOAD))
8777 return true;
8778 }
8779
8780 return TargetLowering::isZExtFree(Val, VT2);
8781}
8782
8784 EVT DstVT) const {
8785 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8786}
8787
8789 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8790}
8791
8793 // TODO: Support vectors.
8794 if (Y.getValueType().isVector())
8795 return false;
8796
8797 return !isa<ConstantSDNode>(Y);
8798}
8799
8801 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8802 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8803}
8804
8806 Type *Ty, bool IsSigned) const {
8807 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8808 return true;
8809
8810 return IsSigned;
8811}
8812
8814 // Return false to suppress the unnecessary extensions if the LibCall
8815 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8816 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8817 Type.getSizeInBits() < Subtarget.getGRLen()))
8818 return false;
8819 return true;
8820}
8821
8822// memcpy, and other memory intrinsics, typically tries to use wider load/store
8823// if the source/dest is aligned and the copy size is large enough. We therefore
8824// want to align such objects passed to memory intrinsics.
8826 unsigned &MinSize,
8827 Align &PrefAlign) const {
8828 if (!isa<MemIntrinsic>(CI))
8829 return false;
8830
8831 if (Subtarget.is64Bit()) {
8832 MinSize = 8;
8833 PrefAlign = Align(8);
8834 } else {
8835 MinSize = 4;
8836 PrefAlign = Align(4);
8837 }
8838
8839 return true;
8840}
8841
8844 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
8845 VT.getVectorElementType() != MVT::i1)
8846 return TypeWidenVector;
8847
8849}
8850
8851bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8852 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8853 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8854 bool IsABIRegCopy = CC.has_value();
8855 EVT ValueVT = Val.getValueType();
8856
8857 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8858 PartVT == MVT::f32) {
8859 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8860 // nan, and cast to f32.
8861 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8862 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8863 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8864 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8865 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8866 Parts[0] = Val;
8867 return true;
8868 }
8869
8870 return false;
8871}
8872
8873SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8874 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8875 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8876 bool IsABIRegCopy = CC.has_value();
8877
8878 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8879 PartVT == MVT::f32) {
8880 SDValue Val = Parts[0];
8881
8882 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8883 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8884 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8885 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8886 return Val;
8887 }
8888
8889 return SDValue();
8890}
8891
8892MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8893 CallingConv::ID CC,
8894 EVT VT) const {
8895 // Use f32 to pass f16.
8896 if (VT == MVT::f16 && Subtarget.hasBasicF())
8897 return MVT::f32;
8898
8899 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8900}
8901
8902unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8903 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8904 // Use f32 to pass f16.
8905 if (VT == MVT::f16 && Subtarget.hasBasicF())
8906 return 1;
8907
8908 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8909}
8910
8912 SDValue Op, const APInt &OriginalDemandedBits,
8913 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8914 unsigned Depth) const {
8915 EVT VT = Op.getValueType();
8916 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8917 unsigned Opc = Op.getOpcode();
8918 switch (Opc) {
8919 default:
8920 break;
8923 SDValue Src = Op.getOperand(0);
8924 MVT SrcVT = Src.getSimpleValueType();
8925 unsigned SrcBits = SrcVT.getScalarSizeInBits();
8926 unsigned NumElts = SrcVT.getVectorNumElements();
8927
8928 // If we don't need the sign bits at all just return zero.
8929 if (OriginalDemandedBits.countr_zero() >= NumElts)
8930 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
8931
8932 // Only demand the vector elements of the sign bits we need.
8933 APInt KnownUndef, KnownZero;
8934 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
8935 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
8936 TLO, Depth + 1))
8937 return true;
8938
8939 Known.Zero = KnownZero.zext(BitWidth);
8940 Known.Zero.setHighBits(BitWidth - NumElts);
8941
8942 // [X]VMSKLTZ only uses the MSB from each vector element.
8943 KnownBits KnownSrc;
8944 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
8945 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
8946 Depth + 1))
8947 return true;
8948
8949 if (KnownSrc.One[SrcBits - 1])
8950 Known.One.setLowBits(NumElts);
8951 else if (KnownSrc.Zero[SrcBits - 1])
8952 Known.Zero.setLowBits(NumElts);
8953
8954 // Attempt to avoid multi-use ops if we don't need anything from it.
8956 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
8957 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
8958 return false;
8959 }
8960 }
8961
8963 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
8964}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:599
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:843
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
Value * getPointerOperand()
Definition: Instructions.h:886
bool isFloatingPointOperation() const
Definition: Instructions.h:898
BinOp getOperation() const
Definition: Instructions.h:819
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:877
Value * getValOperand()
Definition: Instructions.h:890
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:863
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition: DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
A debug info location.
Definition: DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool isDSOLocal() const
Definition: GlobalValue.h:307
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:533
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
LLVMContext & getContext() const
Definition: IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
Class to represent pointers.
Definition: DerivedTypes.h:700
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:740
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1338
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1343
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1490
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1574
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ WRITE_REGISTER
Definition: ISDOpcodes.h:135
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1492
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1321
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1493
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:994
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ AssertZext
Definition: ISDOpcodes.h:63
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1691
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1671
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1736
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:270
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
@ None
Definition: CodeGenData.h:107
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:198
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)