LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
97
100
104
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
158
162 Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
176 }
177
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
314 }
315 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
317 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
319 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
322 }
323 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
331 VT, Expand);
333 }
335 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
336 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
337 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
338 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
339
340 for (MVT VT :
341 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
342 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
345 }
346 }
347
348 // Set operations for 'LASX' feature.
349
350 if (Subtarget.hasExtLASX()) {
351 for (MVT VT : LASXVTs) {
355
361
365 }
366 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
369 Legal);
371 VT, Legal);
378 Expand);
383 }
384 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
386 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
388 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
391 }
392 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
400 VT, Expand);
402 }
403 }
404
405 // Set DAG combine for LA32 and LA64.
406
411
412 // Set DAG combine for 'LSX' feature.
413
414 if (Subtarget.hasExtLSX()) {
417 }
418
419 // Compute derived properties from the register classes.
421
423
426
428
430
431 // Function alignments.
433 // Set preferred alignments.
437
438 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
439 if (Subtarget.hasLAMCAS())
441
442 if (Subtarget.hasSCQ()) {
445 }
446}
447
449 const GlobalAddressSDNode *GA) const {
450 // In order to maximise the opportunity for common subexpression elimination,
451 // keep a separate ADD node for the global address offset instead of folding
452 // it in the global address node. Later peephole optimisations may choose to
453 // fold it back in when profitable.
454 return false;
455}
456
458 SelectionDAG &DAG) const {
459 switch (Op.getOpcode()) {
461 return lowerATOMIC_FENCE(Op, DAG);
463 return lowerEH_DWARF_CFA(Op, DAG);
465 return lowerGlobalAddress(Op, DAG);
467 return lowerGlobalTLSAddress(Op, DAG);
469 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
471 return lowerINTRINSIC_W_CHAIN(Op, DAG);
473 return lowerINTRINSIC_VOID(Op, DAG);
475 return lowerBlockAddress(Op, DAG);
476 case ISD::JumpTable:
477 return lowerJumpTable(Op, DAG);
478 case ISD::SHL_PARTS:
479 return lowerShiftLeftParts(Op, DAG);
480 case ISD::SRA_PARTS:
481 return lowerShiftRightParts(Op, DAG, true);
482 case ISD::SRL_PARTS:
483 return lowerShiftRightParts(Op, DAG, false);
485 return lowerConstantPool(Op, DAG);
486 case ISD::FP_TO_SINT:
487 return lowerFP_TO_SINT(Op, DAG);
488 case ISD::BITCAST:
489 return lowerBITCAST(Op, DAG);
490 case ISD::UINT_TO_FP:
491 return lowerUINT_TO_FP(Op, DAG);
492 case ISD::SINT_TO_FP:
493 return lowerSINT_TO_FP(Op, DAG);
494 case ISD::VASTART:
495 return lowerVASTART(Op, DAG);
496 case ISD::FRAMEADDR:
497 return lowerFRAMEADDR(Op, DAG);
498 case ISD::RETURNADDR:
499 return lowerRETURNADDR(Op, DAG);
501 return lowerWRITE_REGISTER(Op, DAG);
503 return lowerINSERT_VECTOR_ELT(Op, DAG);
505 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
507 return lowerBUILD_VECTOR(Op, DAG);
509 return lowerCONCAT_VECTORS(Op, DAG);
511 return lowerVECTOR_SHUFFLE(Op, DAG);
512 case ISD::BITREVERSE:
513 return lowerBITREVERSE(Op, DAG);
515 return lowerSCALAR_TO_VECTOR(Op, DAG);
516 case ISD::PREFETCH:
517 return lowerPREFETCH(Op, DAG);
518 case ISD::SELECT:
519 return lowerSELECT(Op, DAG);
520 case ISD::BRCOND:
521 return lowerBRCOND(Op, DAG);
522 case ISD::FP_TO_FP16:
523 return lowerFP_TO_FP16(Op, DAG);
524 case ISD::FP16_TO_FP:
525 return lowerFP16_TO_FP(Op, DAG);
526 case ISD::FP_TO_BF16:
527 return lowerFP_TO_BF16(Op, DAG);
528 case ISD::BF16_TO_FP:
529 return lowerBF16_TO_FP(Op, DAG);
531 return lowerVECREDUCE_ADD(Op, DAG);
532 }
533 return SDValue();
534}
535
536// Lower vecreduce_add using vhaddw instructions.
537// For Example:
538// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
539// can be lowered to:
540// VHADDW_D_W vr0, vr0, vr0
541// VHADDW_Q_D vr0, vr0, vr0
542// VPICKVE2GR_D a0, vr0, 0
543// ADDI_W a0, a0, 0
544SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
545 SelectionDAG &DAG) const {
546
547 SDLoc DL(Op);
548 MVT OpVT = Op.getSimpleValueType();
549 SDValue Val = Op.getOperand(0);
550
551 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
552 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
553
554 unsigned LegalVecSize = 128;
555 bool isLASX256Vector =
556 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
557
558 // Ensure operand type legal or enable it legal.
559 while (!isTypeLegal(Val.getSimpleValueType())) {
560 Val = DAG.WidenVector(Val, DL);
561 }
562
563 // NumEles is designed for iterations count, v4i32 for LSX
564 // and v8i32 for LASX should have the same count.
565 if (isLASX256Vector) {
566 NumEles /= 2;
567 LegalVecSize = 256;
568 }
569
570 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
571 MVT IntTy = MVT::getIntegerVT(EleBits);
572 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
573 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
574 }
575
576 if (isLASX256Vector) {
577 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
578 DAG.getConstant(2, DL, MVT::i64));
579 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
580 }
581
582 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
583 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
584}
585
586SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
587 SelectionDAG &DAG) const {
588 unsigned IsData = Op.getConstantOperandVal(4);
589
590 // We don't support non-data prefetch.
591 // Just preserve the chain.
592 if (!IsData)
593 return Op.getOperand(0);
594
595 return Op;
596}
597
598// Return true if Val is equal to (setcc LHS, RHS, CC).
599// Return false if Val is the inverse of (setcc LHS, RHS, CC).
600// Otherwise, return std::nullopt.
601static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
602 ISD::CondCode CC, SDValue Val) {
603 assert(Val->getOpcode() == ISD::SETCC);
604 SDValue LHS2 = Val.getOperand(0);
605 SDValue RHS2 = Val.getOperand(1);
606 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
607
608 if (LHS == LHS2 && RHS == RHS2) {
609 if (CC == CC2)
610 return true;
611 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
612 return false;
613 } else if (LHS == RHS2 && RHS == LHS2) {
615 if (CC == CC2)
616 return true;
617 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
618 return false;
619 }
620
621 return std::nullopt;
622}
623
625 const LoongArchSubtarget &Subtarget) {
626 SDValue CondV = N->getOperand(0);
627 SDValue TrueV = N->getOperand(1);
628 SDValue FalseV = N->getOperand(2);
629 MVT VT = N->getSimpleValueType(0);
630 SDLoc DL(N);
631
632 // (select c, -1, y) -> -c | y
633 if (isAllOnesConstant(TrueV)) {
634 SDValue Neg = DAG.getNegative(CondV, DL, VT);
635 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
636 }
637 // (select c, y, -1) -> (c-1) | y
638 if (isAllOnesConstant(FalseV)) {
639 SDValue Neg =
640 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
641 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
642 }
643
644 // (select c, 0, y) -> (c-1) & y
645 if (isNullConstant(TrueV)) {
646 SDValue Neg =
647 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
648 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
649 }
650 // (select c, y, 0) -> -c & y
651 if (isNullConstant(FalseV)) {
652 SDValue Neg = DAG.getNegative(CondV, DL, VT);
653 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
654 }
655
656 // select c, ~x, x --> xor -c, x
657 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
658 const APInt &TrueVal = TrueV->getAsAPIntVal();
659 const APInt &FalseVal = FalseV->getAsAPIntVal();
660 if (~TrueVal == FalseVal) {
661 SDValue Neg = DAG.getNegative(CondV, DL, VT);
662 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
663 }
664 }
665
666 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
667 // when both truev and falsev are also setcc.
668 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
669 FalseV.getOpcode() == ISD::SETCC) {
670 SDValue LHS = CondV.getOperand(0);
671 SDValue RHS = CondV.getOperand(1);
672 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
673
674 // (select x, x, y) -> x | y
675 // (select !x, x, y) -> x & y
676 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
677 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
678 DAG.getFreeze(FalseV));
679 }
680 // (select x, y, x) -> x & y
681 // (select !x, y, x) -> x | y
682 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
683 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
684 DAG.getFreeze(TrueV), FalseV);
685 }
686 }
687
688 return SDValue();
689}
690
691// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
692// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
693// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
694// being `0` or `-1`. In such cases we can replace `select` with `and`.
695// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
696// than `c0`?
697static SDValue
699 const LoongArchSubtarget &Subtarget) {
700 unsigned SelOpNo = 0;
701 SDValue Sel = BO->getOperand(0);
702 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
703 SelOpNo = 1;
704 Sel = BO->getOperand(1);
705 }
706
707 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
708 return SDValue();
709
710 unsigned ConstSelOpNo = 1;
711 unsigned OtherSelOpNo = 2;
712 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
713 ConstSelOpNo = 2;
714 OtherSelOpNo = 1;
715 }
716 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
717 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
718 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
719 return SDValue();
720
721 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
722 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
723 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
724 return SDValue();
725
726 SDLoc DL(Sel);
727 EVT VT = BO->getValueType(0);
728
729 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
730 if (SelOpNo == 1)
731 std::swap(NewConstOps[0], NewConstOps[1]);
732
733 SDValue NewConstOp =
734 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
735 if (!NewConstOp)
736 return SDValue();
737
738 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
739 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
740 return SDValue();
741
742 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
743 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
744 if (SelOpNo == 1)
745 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
746 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
747
748 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
749 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
750 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
751}
752
753// Changes the condition code and swaps operands if necessary, so the SetCC
754// operation matches one of the comparisons supported directly by branches
755// in the LoongArch ISA. May adjust compares to favor compare with 0 over
756// compare with 1/-1.
757static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
758 ISD::CondCode &CC, SelectionDAG &DAG) {
759 // If this is a single bit test that can't be handled by ANDI, shift the
760 // bit to be tested to the MSB and perform a signed compare with 0.
761 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
762 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
763 isa<ConstantSDNode>(LHS.getOperand(1))) {
764 uint64_t Mask = LHS.getConstantOperandVal(1);
765 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
766 unsigned ShAmt = 0;
767 if (isPowerOf2_64(Mask)) {
768 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
769 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
770 } else {
771 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
772 }
773
774 LHS = LHS.getOperand(0);
775 if (ShAmt != 0)
776 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
777 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
778 return;
779 }
780 }
781
782 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
783 int64_t C = RHSC->getSExtValue();
784 switch (CC) {
785 default:
786 break;
787 case ISD::SETGT:
788 // Convert X > -1 to X >= 0.
789 if (C == -1) {
790 RHS = DAG.getConstant(0, DL, RHS.getValueType());
791 CC = ISD::SETGE;
792 return;
793 }
794 break;
795 case ISD::SETLT:
796 // Convert X < 1 to 0 >= X.
797 if (C == 1) {
798 RHS = LHS;
799 LHS = DAG.getConstant(0, DL, RHS.getValueType());
800 CC = ISD::SETGE;
801 return;
802 }
803 break;
804 }
805 }
806
807 switch (CC) {
808 default:
809 break;
810 case ISD::SETGT:
811 case ISD::SETLE:
812 case ISD::SETUGT:
813 case ISD::SETULE:
815 std::swap(LHS, RHS);
816 break;
817 }
818}
819
820SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
821 SelectionDAG &DAG) const {
822 SDValue CondV = Op.getOperand(0);
823 SDValue TrueV = Op.getOperand(1);
824 SDValue FalseV = Op.getOperand(2);
825 SDLoc DL(Op);
826 MVT VT = Op.getSimpleValueType();
827 MVT GRLenVT = Subtarget.getGRLenVT();
828
829 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
830 return V;
831
832 if (Op.hasOneUse()) {
833 unsigned UseOpc = Op->user_begin()->getOpcode();
834 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
835 SDNode *BinOp = *Op->user_begin();
836 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
837 DAG, Subtarget)) {
838 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
839 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
840 // may return a constant node and cause crash in lowerSELECT.
841 if (NewSel.getOpcode() == ISD::SELECT)
842 return lowerSELECT(NewSel, DAG);
843 return NewSel;
844 }
845 }
846 }
847
848 // If the condition is not an integer SETCC which operates on GRLenVT, we need
849 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
850 // (select condv, truev, falsev)
851 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
852 if (CondV.getOpcode() != ISD::SETCC ||
853 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
854 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
855 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
856
857 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
858
859 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
860 }
861
862 // If the CondV is the output of a SETCC node which operates on GRLenVT
863 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
864 // to take advantage of the integer compare+branch instructions. i.e.: (select
865 // (setcc lhs, rhs, cc), truev, falsev)
866 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
867 SDValue LHS = CondV.getOperand(0);
868 SDValue RHS = CondV.getOperand(1);
869 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
870
871 // Special case for a select of 2 constants that have a difference of 1.
872 // Normally this is done by DAGCombine, but if the select is introduced by
873 // type legalization or op legalization, we miss it. Restricting to SETLT
874 // case for now because that is what signed saturating add/sub need.
875 // FIXME: We don't need the condition to be SETLT or even a SETCC,
876 // but we would probably want to swap the true/false values if the condition
877 // is SETGE/SETLE to avoid an XORI.
878 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
879 CCVal == ISD::SETLT) {
880 const APInt &TrueVal = TrueV->getAsAPIntVal();
881 const APInt &FalseVal = FalseV->getAsAPIntVal();
882 if (TrueVal - 1 == FalseVal)
883 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
884 if (TrueVal + 1 == FalseVal)
885 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
886 }
887
888 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
889 // 1 < x ? x : 1 -> 0 < x ? x : 1
890 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
891 RHS == TrueV && LHS == FalseV) {
892 LHS = DAG.getConstant(0, DL, VT);
893 // 0 <u x is the same as x != 0.
894 if (CCVal == ISD::SETULT) {
895 std::swap(LHS, RHS);
896 CCVal = ISD::SETNE;
897 }
898 }
899
900 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
901 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
902 RHS == FalseV) {
903 RHS = DAG.getConstant(0, DL, VT);
904 }
905
906 SDValue TargetCC = DAG.getCondCode(CCVal);
907
908 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
909 // (select (setcc lhs, rhs, CC), constant, falsev)
910 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
911 std::swap(TrueV, FalseV);
912 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
913 }
914
915 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
916 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
917}
918
919SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
920 SelectionDAG &DAG) const {
921 SDValue CondV = Op.getOperand(1);
922 SDLoc DL(Op);
923 MVT GRLenVT = Subtarget.getGRLenVT();
924
925 if (CondV.getOpcode() == ISD::SETCC) {
926 if (CondV.getOperand(0).getValueType() == GRLenVT) {
927 SDValue LHS = CondV.getOperand(0);
928 SDValue RHS = CondV.getOperand(1);
929 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
930
931 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
932
933 SDValue TargetCC = DAG.getCondCode(CCVal);
934 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
935 Op.getOperand(0), LHS, RHS, TargetCC,
936 Op.getOperand(2));
937 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
938 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
939 Op.getOperand(0), CondV, Op.getOperand(2));
940 }
941 }
942
943 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
944 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
945 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
946}
947
949LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
950 SelectionDAG &DAG) const {
951 SDLoc DL(Op);
952 MVT OpVT = Op.getSimpleValueType();
953
954 SDValue Vector = DAG.getUNDEF(OpVT);
955 SDValue Val = Op.getOperand(0);
956 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
957
958 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
959}
960
961SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
962 SelectionDAG &DAG) const {
963 EVT ResTy = Op->getValueType(0);
964 SDValue Src = Op->getOperand(0);
965 SDLoc DL(Op);
966
967 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
968 unsigned int OrigEltNum = ResTy.getVectorNumElements();
969 unsigned int NewEltNum = NewVT.getVectorNumElements();
970
971 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
972
974 for (unsigned int i = 0; i < NewEltNum; i++) {
975 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
976 DAG.getConstant(i, DL, MVT::i64));
977 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
978 ? (unsigned)LoongArchISD::BITREV_8B
980 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
981 }
982 SDValue Res =
983 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
984
985 switch (ResTy.getSimpleVT().SimpleTy) {
986 default:
987 return SDValue();
988 case MVT::v16i8:
989 case MVT::v32i8:
990 return Res;
991 case MVT::v8i16:
992 case MVT::v16i16:
993 case MVT::v4i32:
994 case MVT::v8i32: {
996 for (unsigned int i = 0; i < NewEltNum; i++)
997 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
998 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
999 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1000 }
1001 }
1002}
1003
1004// Widen element type to get a new mask value (if possible).
1005// For example:
1006// shufflevector <4 x i32> %a, <4 x i32> %b,
1007// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1008// is equivalent to:
1009// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1010// can be lowered to:
1011// VPACKOD_D vr0, vr0, vr1
1013 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1014 unsigned EltBits = VT.getScalarSizeInBits();
1015
1016 if (EltBits > 32 || EltBits == 1)
1017 return SDValue();
1018
1019 SmallVector<int, 8> NewMask;
1020 if (widenShuffleMaskElts(Mask, NewMask)) {
1021 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1022 : MVT::getIntegerVT(EltBits * 2);
1023 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1024 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1025 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1026 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1027 return DAG.getBitcast(
1028 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1029 }
1030 }
1031
1032 return SDValue();
1033}
1034
1035/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1036/// instruction.
1037// The funciton matches elements from one of the input vector shuffled to the
1038// left or right with zeroable elements 'shifted in'. It handles both the
1039// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1040// lane.
1041// Mostly copied from X86.
1042static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1043 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1044 int MaskOffset, const APInt &Zeroable) {
1045 int Size = Mask.size();
1046 unsigned SizeInBits = Size * ScalarSizeInBits;
1047
1048 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1049 for (int i = 0; i < Size; i += Scale)
1050 for (int j = 0; j < Shift; ++j)
1051 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1052 return false;
1053
1054 return true;
1055 };
1056
1057 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1058 int Step = 1) {
1059 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1060 if (!(Mask[i] == -1 || Mask[i] == Low))
1061 return false;
1062 return true;
1063 };
1064
1065 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1066 for (int i = 0; i != Size; i += Scale) {
1067 unsigned Pos = Left ? i + Shift : i;
1068 unsigned Low = Left ? i : i + Shift;
1069 unsigned Len = Scale - Shift;
1070 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1071 return -1;
1072 }
1073
1074 int ShiftEltBits = ScalarSizeInBits * Scale;
1075 bool ByteShift = ShiftEltBits > 64;
1076 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1077 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1078 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1079
1080 // Normalize the scale for byte shifts to still produce an i64 element
1081 // type.
1082 Scale = ByteShift ? Scale / 2 : Scale;
1083
1084 // We need to round trip through the appropriate type for the shift.
1085 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1086 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1087 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1088 return (int)ShiftAmt;
1089 };
1090
1091 unsigned MaxWidth = 128;
1092 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1093 for (int Shift = 1; Shift != Scale; ++Shift)
1094 for (bool Left : {true, false})
1095 if (CheckZeros(Shift, Scale, Left)) {
1096 int ShiftAmt = MatchShift(Shift, Scale, Left);
1097 if (0 < ShiftAmt)
1098 return ShiftAmt;
1099 }
1100
1101 // no match
1102 return -1;
1103}
1104
1105/// Lower VECTOR_SHUFFLE as shift (if possible).
1106///
1107/// For example:
1108/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1109/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1110/// is lowered to:
1111/// (VBSLL_V $v0, $v0, 4)
1112///
1113/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1114/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1115/// is lowered to:
1116/// (VSLLI_D $v0, $v0, 32)
1118 MVT VT, SDValue V1, SDValue V2,
1119 SelectionDAG &DAG,
1120 const APInt &Zeroable) {
1121 int Size = Mask.size();
1122 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1123
1124 MVT ShiftVT;
1125 SDValue V = V1;
1126 unsigned Opcode;
1127
1128 // Try to match shuffle against V1 shift.
1129 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1130 Mask, 0, Zeroable);
1131
1132 // If V1 failed, try to match shuffle against V2 shift.
1133 if (ShiftAmt < 0) {
1134 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1135 Mask, Size, Zeroable);
1136 V = V2;
1137 }
1138
1139 if (ShiftAmt < 0)
1140 return SDValue();
1141
1142 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1143 "Illegal integer vector type");
1144 V = DAG.getBitcast(ShiftVT, V);
1145 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1146 DAG.getConstant(ShiftAmt, DL, MVT::i64));
1147 return DAG.getBitcast(VT, V);
1148}
1149
1150/// Determine whether a range fits a regular pattern of values.
1151/// This function accounts for the possibility of jumping over the End iterator.
1152template <typename ValType>
1153static bool
1155 unsigned CheckStride,
1157 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1158 auto &I = Begin;
1159
1160 while (I != End) {
1161 if (*I != -1 && *I != ExpectedIndex)
1162 return false;
1163 ExpectedIndex += ExpectedIndexStride;
1164
1165 // Incrementing past End is undefined behaviour so we must increment one
1166 // step at a time and check for End at each step.
1167 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1168 ; // Empty loop body.
1169 }
1170 return true;
1171}
1172
1173/// Compute whether each element of a shuffle is zeroable.
1174///
1175/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1177 SDValue V2, APInt &KnownUndef,
1178 APInt &KnownZero) {
1179 int Size = Mask.size();
1180 KnownUndef = KnownZero = APInt::getZero(Size);
1181
1182 V1 = peekThroughBitcasts(V1);
1183 V2 = peekThroughBitcasts(V2);
1184
1185 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1186 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1187
1188 int VectorSizeInBits = V1.getValueSizeInBits();
1189 int ScalarSizeInBits = VectorSizeInBits / Size;
1190 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1191 (void)ScalarSizeInBits;
1192
1193 for (int i = 0; i < Size; ++i) {
1194 int M = Mask[i];
1195 if (M < 0) {
1196 KnownUndef.setBit(i);
1197 continue;
1198 }
1199 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1200 KnownZero.setBit(i);
1201 continue;
1202 }
1203 }
1204}
1205
1206/// Test whether a shuffle mask is equivalent within each sub-lane.
1207///
1208/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1209/// non-trivial to compute in the face of undef lanes. The representation is
1210/// suitable for use with existing 128-bit shuffles as entries from the second
1211/// vector have been remapped to [LaneSize, 2*LaneSize).
1212static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1213 ArrayRef<int> Mask,
1214 SmallVectorImpl<int> &RepeatedMask) {
1215 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1216 RepeatedMask.assign(LaneSize, -1);
1217 int Size = Mask.size();
1218 for (int i = 0; i < Size; ++i) {
1219 assert(Mask[i] == -1 || Mask[i] >= 0);
1220 if (Mask[i] < 0)
1221 continue;
1222 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1223 // This entry crosses lanes, so there is no way to model this shuffle.
1224 return false;
1225
1226 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1227 // Adjust second vector indices to start at LaneSize instead of Size.
1228 int LocalM =
1229 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1230 if (RepeatedMask[i % LaneSize] < 0)
1231 // This is the first non-undef entry in this slot of a 128-bit lane.
1232 RepeatedMask[i % LaneSize] = LocalM;
1233 else if (RepeatedMask[i % LaneSize] != LocalM)
1234 // Found a mismatch with the repeated mask.
1235 return false;
1236 }
1237 return true;
1238}
1239
1240/// Attempts to match vector shuffle as byte rotation.
1242 ArrayRef<int> Mask) {
1243
1244 SDValue Lo, Hi;
1245 SmallVector<int, 16> RepeatedMask;
1246
1247 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1248 return -1;
1249
1250 int NumElts = RepeatedMask.size();
1251 int Rotation = 0;
1252 int Scale = 16 / NumElts;
1253
1254 for (int i = 0; i < NumElts; ++i) {
1255 int M = RepeatedMask[i];
1256 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1257 "Unexpected mask index.");
1258 if (M < 0)
1259 continue;
1260
1261 // Determine where a rotated vector would have started.
1262 int StartIdx = i - (M % NumElts);
1263 if (StartIdx == 0)
1264 return -1;
1265
1266 // If we found the tail of a vector the rotation must be the missing
1267 // front. If we found the head of a vector, it must be how much of the
1268 // head.
1269 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1270
1271 if (Rotation == 0)
1272 Rotation = CandidateRotation;
1273 else if (Rotation != CandidateRotation)
1274 return -1;
1275
1276 // Compute which value this mask is pointing at.
1277 SDValue MaskV = M < NumElts ? V1 : V2;
1278
1279 // Compute which of the two target values this index should be assigned
1280 // to. This reflects whether the high elements are remaining or the low
1281 // elements are remaining.
1282 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1283
1284 // Either set up this value if we've not encountered it before, or check
1285 // that it remains consistent.
1286 if (!TargetV)
1287 TargetV = MaskV;
1288 else if (TargetV != MaskV)
1289 return -1;
1290 }
1291
1292 // Check that we successfully analyzed the mask, and normalize the results.
1293 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1294 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1295 if (!Lo)
1296 Lo = Hi;
1297 else if (!Hi)
1298 Hi = Lo;
1299
1300 V1 = Lo;
1301 V2 = Hi;
1302
1303 return Rotation * Scale;
1304}
1305
1306/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1307///
1308/// For example:
1309/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1310/// <2 x i32> <i32 3, i32 0>
1311/// is lowered to:
1312/// (VBSRL_V $v1, $v1, 8)
1313/// (VBSLL_V $v0, $v0, 8)
1314/// (VOR_V $v0, $V0, $v1)
1316 ArrayRef<int> Mask, MVT VT,
1317 SDValue V1, SDValue V2,
1318 SelectionDAG &DAG) {
1319
1320 SDValue Lo = V1, Hi = V2;
1321 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1322 if (ByteRotation <= 0)
1323 return SDValue();
1324
1325 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1326 Lo = DAG.getBitcast(ByteVT, Lo);
1327 Hi = DAG.getBitcast(ByteVT, Hi);
1328
1329 int LoByteShift = 16 - ByteRotation;
1330 int HiByteShift = ByteRotation;
1331
1332 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1333 DAG.getConstant(LoByteShift, DL, MVT::i64));
1334 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1335 DAG.getConstant(HiByteShift, DL, MVT::i64));
1336 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1337}
1338
1339/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1340///
1341/// For example:
1342/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1343/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1344/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1345/// is lowered to:
1346/// (VREPLI $v1, 0)
1347/// (VILVL $v0, $v1, $v0)
1349 ArrayRef<int> Mask, MVT VT,
1350 SDValue V1, SDValue V2,
1351 SelectionDAG &DAG,
1352 const APInt &Zeroable) {
1353 int Bits = VT.getSizeInBits();
1354 int EltBits = VT.getScalarSizeInBits();
1355 int NumElements = VT.getVectorNumElements();
1356
1357 if (Zeroable.isAllOnes())
1358 return DAG.getConstant(0, DL, VT);
1359
1360 // Define a helper function to check a particular ext-scale and lower to it if
1361 // valid.
1362 auto Lower = [&](int Scale) -> SDValue {
1363 SDValue InputV;
1364 bool AnyExt = true;
1365 int Offset = 0;
1366 for (int i = 0; i < NumElements; i++) {
1367 int M = Mask[i];
1368 if (M < 0)
1369 continue;
1370 if (i % Scale != 0) {
1371 // Each of the extended elements need to be zeroable.
1372 if (!Zeroable[i])
1373 return SDValue();
1374
1375 AnyExt = false;
1376 continue;
1377 }
1378
1379 // Each of the base elements needs to be consecutive indices into the
1380 // same input vector.
1381 SDValue V = M < NumElements ? V1 : V2;
1382 M = M % NumElements;
1383 if (!InputV) {
1384 InputV = V;
1385 Offset = M - (i / Scale);
1386
1387 // These offset can't be handled
1388 if (Offset % (NumElements / Scale))
1389 return SDValue();
1390 } else if (InputV != V)
1391 return SDValue();
1392
1393 if (M != (Offset + (i / Scale)))
1394 return SDValue(); // Non-consecutive strided elements.
1395 }
1396
1397 // If we fail to find an input, we have a zero-shuffle which should always
1398 // have already been handled.
1399 if (!InputV)
1400 return SDValue();
1401
1402 do {
1403 unsigned VilVLoHi = LoongArchISD::VILVL;
1404 if (Offset >= (NumElements / 2)) {
1405 VilVLoHi = LoongArchISD::VILVH;
1406 Offset -= (NumElements / 2);
1407 }
1408
1409 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1410 SDValue Ext =
1411 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1412 InputV = DAG.getBitcast(InputVT, InputV);
1413 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1414 Scale /= 2;
1415 EltBits *= 2;
1416 NumElements /= 2;
1417 } while (Scale > 1);
1418 return DAG.getBitcast(VT, InputV);
1419 };
1420
1421 // Each iteration, try extending the elements half as much, but into twice as
1422 // many elements.
1423 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1424 NumExtElements *= 2) {
1425 if (SDValue V = Lower(NumElements / NumExtElements))
1426 return V;
1427 }
1428 return SDValue();
1429}
1430
1431/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1432///
1433/// VREPLVEI performs vector broadcast based on an element specified by an
1434/// integer immediate, with its mask being similar to:
1435/// <x, x, x, ...>
1436/// where x is any valid index.
1437///
1438/// When undef's appear in the mask they are treated as if they were whatever
1439/// value is necessary in order to fit the above form.
1441 MVT VT, SDValue V1, SDValue V2,
1442 SelectionDAG &DAG) {
1443 int SplatIndex = -1;
1444 for (const auto &M : Mask) {
1445 if (M != -1) {
1446 SplatIndex = M;
1447 break;
1448 }
1449 }
1450
1451 if (SplatIndex == -1)
1452 return DAG.getUNDEF(VT);
1453
1454 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1455 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1456 APInt Imm(64, SplatIndex);
1457 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1458 DAG.getConstant(Imm, DL, MVT::i64));
1459 }
1460
1461 return SDValue();
1462}
1463
1464/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1465///
1466/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1467/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1468///
1469/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1470/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1471/// When undef's appear they are treated as if they were whatever value is
1472/// necessary in order to fit the above forms.
1473///
1474/// For example:
1475/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1476/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1477/// i32 7, i32 6, i32 5, i32 4>
1478/// is lowered to:
1479/// (VSHUF4I_H $v0, $v1, 27)
1480/// where the 27 comes from:
1481/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1483 MVT VT, SDValue V1, SDValue V2,
1484 SelectionDAG &DAG) {
1485
1486 unsigned SubVecSize = 4;
1487 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1488 SubVecSize = 2;
1489
1490 int SubMask[4] = {-1, -1, -1, -1};
1491 for (unsigned i = 0; i < SubVecSize; ++i) {
1492 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1493 int M = Mask[j];
1494
1495 // Convert from vector index to 4-element subvector index
1496 // If an index refers to an element outside of the subvector then give up
1497 if (M != -1) {
1498 M -= 4 * (j / SubVecSize);
1499 if (M < 0 || M >= 4)
1500 return SDValue();
1501 }
1502
1503 // If the mask has an undef, replace it with the current index.
1504 // Note that it might still be undef if the current index is also undef
1505 if (SubMask[i] == -1)
1506 SubMask[i] = M;
1507 // Check that non-undef values are the same as in the mask. If they
1508 // aren't then give up
1509 else if (M != -1 && M != SubMask[i])
1510 return SDValue();
1511 }
1512 }
1513
1514 // Calculate the immediate. Replace any remaining undefs with zero
1515 APInt Imm(64, 0);
1516 for (int i = SubVecSize - 1; i >= 0; --i) {
1517 int M = SubMask[i];
1518
1519 if (M == -1)
1520 M = 0;
1521
1522 Imm <<= 2;
1523 Imm |= M & 0x3;
1524 }
1525
1526 // Return vshuf4i.d
1527 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1528 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1529 DAG.getConstant(Imm, DL, MVT::i64));
1530
1531 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1532 DAG.getConstant(Imm, DL, MVT::i64));
1533}
1534
1535/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1536///
1537/// VPACKEV interleaves the even elements from each vector.
1538///
1539/// It is possible to lower into VPACKEV when the mask consists of two of the
1540/// following forms interleaved:
1541/// <0, 2, 4, ...>
1542/// <n, n+2, n+4, ...>
1543/// where n is the number of elements in the vector.
1544/// For example:
1545/// <0, 0, 2, 2, 4, 4, ...>
1546/// <0, n, 2, n+2, 4, n+4, ...>
1547///
1548/// When undef's appear in the mask they are treated as if they were whatever
1549/// value is necessary in order to fit the above forms.
1551 MVT VT, SDValue V1, SDValue V2,
1552 SelectionDAG &DAG) {
1553
1554 const auto &Begin = Mask.begin();
1555 const auto &End = Mask.end();
1556 SDValue OriV1 = V1, OriV2 = V2;
1557
1558 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1559 V1 = OriV1;
1560 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1561 V1 = OriV2;
1562 else
1563 return SDValue();
1564
1565 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1566 V2 = OriV1;
1567 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1568 V2 = OriV2;
1569 else
1570 return SDValue();
1571
1572 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1573}
1574
1575/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1576///
1577/// VPACKOD interleaves the odd elements from each vector.
1578///
1579/// It is possible to lower into VPACKOD when the mask consists of two of the
1580/// following forms interleaved:
1581/// <1, 3, 5, ...>
1582/// <n+1, n+3, n+5, ...>
1583/// where n is the number of elements in the vector.
1584/// For example:
1585/// <1, 1, 3, 3, 5, 5, ...>
1586/// <1, n+1, 3, n+3, 5, n+5, ...>
1587///
1588/// When undef's appear in the mask they are treated as if they were whatever
1589/// value is necessary in order to fit the above forms.
1591 MVT VT, SDValue V1, SDValue V2,
1592 SelectionDAG &DAG) {
1593
1594 const auto &Begin = Mask.begin();
1595 const auto &End = Mask.end();
1596 SDValue OriV1 = V1, OriV2 = V2;
1597
1598 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1599 V1 = OriV1;
1600 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1601 V1 = OriV2;
1602 else
1603 return SDValue();
1604
1605 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1606 V2 = OriV1;
1607 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1608 V2 = OriV2;
1609 else
1610 return SDValue();
1611
1612 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1613}
1614
1615/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1616///
1617/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1618/// of each vector.
1619///
1620/// It is possible to lower into VILVH when the mask consists of two of the
1621/// following forms interleaved:
1622/// <x, x+1, x+2, ...>
1623/// <n+x, n+x+1, n+x+2, ...>
1624/// where n is the number of elements in the vector and x is half n.
1625/// For example:
1626/// <x, x, x+1, x+1, x+2, x+2, ...>
1627/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1628///
1629/// When undef's appear in the mask they are treated as if they were whatever
1630/// value is necessary in order to fit the above forms.
1632 MVT VT, SDValue V1, SDValue V2,
1633 SelectionDAG &DAG) {
1634
1635 const auto &Begin = Mask.begin();
1636 const auto &End = Mask.end();
1637 unsigned HalfSize = Mask.size() / 2;
1638 SDValue OriV1 = V1, OriV2 = V2;
1639
1640 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1641 V1 = OriV1;
1642 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1643 V1 = OriV2;
1644 else
1645 return SDValue();
1646
1647 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1648 V2 = OriV1;
1649 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1650 1))
1651 V2 = OriV2;
1652 else
1653 return SDValue();
1654
1655 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1656}
1657
1658/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1659///
1660/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1661/// of each vector.
1662///
1663/// It is possible to lower into VILVL when the mask consists of two of the
1664/// following forms interleaved:
1665/// <0, 1, 2, ...>
1666/// <n, n+1, n+2, ...>
1667/// where n is the number of elements in the vector.
1668/// For example:
1669/// <0, 0, 1, 1, 2, 2, ...>
1670/// <0, n, 1, n+1, 2, n+2, ...>
1671///
1672/// When undef's appear in the mask they are treated as if they were whatever
1673/// value is necessary in order to fit the above forms.
1675 MVT VT, SDValue V1, SDValue V2,
1676 SelectionDAG &DAG) {
1677
1678 const auto &Begin = Mask.begin();
1679 const auto &End = Mask.end();
1680 SDValue OriV1 = V1, OriV2 = V2;
1681
1682 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1683 V1 = OriV1;
1684 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1685 V1 = OriV2;
1686 else
1687 return SDValue();
1688
1689 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1690 V2 = OriV1;
1691 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1692 V2 = OriV2;
1693 else
1694 return SDValue();
1695
1696 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1697}
1698
1699/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1700///
1701/// VPICKEV copies the even elements of each vector into the result vector.
1702///
1703/// It is possible to lower into VPICKEV when the mask consists of two of the
1704/// following forms concatenated:
1705/// <0, 2, 4, ...>
1706/// <n, n+2, n+4, ...>
1707/// where n is the number of elements in the vector.
1708/// For example:
1709/// <0, 2, 4, ..., 0, 2, 4, ...>
1710/// <0, 2, 4, ..., n, n+2, n+4, ...>
1711///
1712/// When undef's appear in the mask they are treated as if they were whatever
1713/// value is necessary in order to fit the above forms.
1715 MVT VT, SDValue V1, SDValue V2,
1716 SelectionDAG &DAG) {
1717
1718 const auto &Begin = Mask.begin();
1719 const auto &Mid = Mask.begin() + Mask.size() / 2;
1720 const auto &End = Mask.end();
1721 SDValue OriV1 = V1, OriV2 = V2;
1722
1723 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1724 V1 = OriV1;
1725 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1726 V1 = OriV2;
1727 else
1728 return SDValue();
1729
1730 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1731 V2 = OriV1;
1732 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1733 V2 = OriV2;
1734
1735 else
1736 return SDValue();
1737
1738 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1739}
1740
1741/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1742///
1743/// VPICKOD copies the odd elements of each vector into the result vector.
1744///
1745/// It is possible to lower into VPICKOD when the mask consists of two of the
1746/// following forms concatenated:
1747/// <1, 3, 5, ...>
1748/// <n+1, n+3, n+5, ...>
1749/// where n is the number of elements in the vector.
1750/// For example:
1751/// <1, 3, 5, ..., 1, 3, 5, ...>
1752/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1753///
1754/// When undef's appear in the mask they are treated as if they were whatever
1755/// value is necessary in order to fit the above forms.
1757 MVT VT, SDValue V1, SDValue V2,
1758 SelectionDAG &DAG) {
1759
1760 const auto &Begin = Mask.begin();
1761 const auto &Mid = Mask.begin() + Mask.size() / 2;
1762 const auto &End = Mask.end();
1763 SDValue OriV1 = V1, OriV2 = V2;
1764
1765 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1766 V1 = OriV1;
1767 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1768 V1 = OriV2;
1769 else
1770 return SDValue();
1771
1772 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1773 V2 = OriV1;
1774 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1775 V2 = OriV2;
1776 else
1777 return SDValue();
1778
1779 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1780}
1781
1782/// Lower VECTOR_SHUFFLE into VSHUF.
1783///
1784/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1785/// adding it as an operand to the resulting VSHUF.
1787 MVT VT, SDValue V1, SDValue V2,
1788 SelectionDAG &DAG) {
1789
1791 for (auto M : Mask)
1792 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1793
1794 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1795 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1796
1797 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1798 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1799 // VSHF concatenates the vectors in a bitwise fashion:
1800 // <0b00, 0b01> + <0b10, 0b11> ->
1801 // 0b0100 + 0b1110 -> 0b01001110
1802 // <0b10, 0b11, 0b00, 0b01>
1803 // We must therefore swap the operands to get the correct result.
1804 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1805}
1806
1807/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1808///
1809/// This routine breaks down the specific type of 128-bit shuffle and
1810/// dispatches to the lowering routines accordingly.
1812 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1813 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1814 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1815 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1816 "Vector type is unsupported for lsx!");
1818 "Two operands have different types!");
1819 assert(VT.getVectorNumElements() == Mask.size() &&
1820 "Unexpected mask size for shuffle!");
1821 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1822
1823 APInt KnownUndef, KnownZero;
1824 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1825 APInt Zeroable = KnownUndef | KnownZero;
1826
1827 SDValue Result;
1828 // TODO: Add more comparison patterns.
1829 if (V2.isUndef()) {
1830 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
1831 return Result;
1832 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1833 return Result;
1834
1835 // TODO: This comment may be enabled in the future to better match the
1836 // pattern for instruction selection.
1837 /* V2 = V1; */
1838 }
1839
1840 // It is recommended not to change the pattern comparison order for better
1841 // performance.
1842 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1843 return Result;
1844 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1845 return Result;
1846 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1847 return Result;
1848 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1849 return Result;
1850 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1851 return Result;
1852 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1853 return Result;
1854 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1855 (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1856 return Result;
1857 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1858 Zeroable)))
1859 return Result;
1860 if ((Result =
1861 lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
1862 return Result;
1863 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
1864 return Result;
1865 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1866 return NewShuffle;
1867 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1868 return Result;
1869 return SDValue();
1870}
1871
1872/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1873///
1874/// It is a XVREPLVEI when the mask is:
1875/// <x, x, x, ..., x+n, x+n, x+n, ...>
1876/// where the number of x is equal to n and n is half the length of vector.
1877///
1878/// When undef's appear in the mask they are treated as if they were whatever
1879/// value is necessary in order to fit the above form.
1881 ArrayRef<int> Mask, MVT VT,
1882 SDValue V1, SDValue V2,
1883 SelectionDAG &DAG) {
1884 int SplatIndex = -1;
1885 for (const auto &M : Mask) {
1886 if (M != -1) {
1887 SplatIndex = M;
1888 break;
1889 }
1890 }
1891
1892 if (SplatIndex == -1)
1893 return DAG.getUNDEF(VT);
1894
1895 const auto &Begin = Mask.begin();
1896 const auto &End = Mask.end();
1897 unsigned HalfSize = Mask.size() / 2;
1898
1899 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1900 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1901 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1902 0)) {
1903 APInt Imm(64, SplatIndex);
1904 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1905 DAG.getConstant(Imm, DL, MVT::i64));
1906 }
1907
1908 return SDValue();
1909}
1910
1911/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1913 MVT VT, SDValue V1, SDValue V2,
1914 SelectionDAG &DAG) {
1915 // When the size is less than or equal to 4, lower cost instructions may be
1916 // used.
1917 if (Mask.size() <= 4)
1918 return SDValue();
1919 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1920}
1921
1922/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1924 MVT VT, SDValue V1, SDValue V2,
1925 SelectionDAG &DAG) {
1926 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1927}
1928
1929/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1931 MVT VT, SDValue V1, SDValue V2,
1932 SelectionDAG &DAG) {
1933 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1934}
1935
1936/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1938 MVT VT, SDValue V1, SDValue V2,
1939 SelectionDAG &DAG) {
1940
1941 const auto &Begin = Mask.begin();
1942 const auto &End = Mask.end();
1943 unsigned HalfSize = Mask.size() / 2;
1944 unsigned LeftSize = HalfSize / 2;
1945 SDValue OriV1 = V1, OriV2 = V2;
1946
1947 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1948 1) &&
1949 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1950 V1 = OriV1;
1951 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1952 Mask.size() + HalfSize - LeftSize, 1) &&
1953 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1954 Mask.size() + HalfSize + LeftSize, 1))
1955 V1 = OriV2;
1956 else
1957 return SDValue();
1958
1959 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1960 1) &&
1961 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1962 1))
1963 V2 = OriV1;
1964 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1965 Mask.size() + HalfSize - LeftSize, 1) &&
1966 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1967 Mask.size() + HalfSize + LeftSize, 1))
1968 V2 = OriV2;
1969 else
1970 return SDValue();
1971
1972 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1973}
1974
1975/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1977 MVT VT, SDValue V1, SDValue V2,
1978 SelectionDAG &DAG) {
1979
1980 const auto &Begin = Mask.begin();
1981 const auto &End = Mask.end();
1982 unsigned HalfSize = Mask.size() / 2;
1983 SDValue OriV1 = V1, OriV2 = V2;
1984
1985 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1986 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1987 V1 = OriV1;
1988 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1989 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1990 Mask.size() + HalfSize, 1))
1991 V1 = OriV2;
1992 else
1993 return SDValue();
1994
1995 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1996 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1997 V2 = OriV1;
1998 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1999 1) &&
2000 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2001 Mask.size() + HalfSize, 1))
2002 V2 = OriV2;
2003 else
2004 return SDValue();
2005
2006 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2007}
2008
2009/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2011 MVT VT, SDValue V1, SDValue V2,
2012 SelectionDAG &DAG) {
2013
2014 const auto &Begin = Mask.begin();
2015 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2016 const auto &Mid = Mask.begin() + Mask.size() / 2;
2017 const auto &RightMid = Mask.end() - Mask.size() / 4;
2018 const auto &End = Mask.end();
2019 unsigned HalfSize = Mask.size() / 2;
2020 SDValue OriV1 = V1, OriV2 = V2;
2021
2022 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2023 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2024 V1 = OriV1;
2025 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2026 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2027 V1 = OriV2;
2028 else
2029 return SDValue();
2030
2031 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2032 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2033 V2 = OriV1;
2034 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2035 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2036 V2 = OriV2;
2037
2038 else
2039 return SDValue();
2040
2041 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2042}
2043
2044/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2046 MVT VT, SDValue V1, SDValue V2,
2047 SelectionDAG &DAG) {
2048
2049 const auto &Begin = Mask.begin();
2050 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2051 const auto &Mid = Mask.begin() + Mask.size() / 2;
2052 const auto &RightMid = Mask.end() - Mask.size() / 4;
2053 const auto &End = Mask.end();
2054 unsigned HalfSize = Mask.size() / 2;
2055 SDValue OriV1 = V1, OriV2 = V2;
2056
2057 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2058 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2059 V1 = OriV1;
2060 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2061 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2062 2))
2063 V1 = OriV2;
2064 else
2065 return SDValue();
2066
2067 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2068 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2069 V2 = OriV1;
2070 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2071 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2072 2))
2073 V2 = OriV2;
2074 else
2075 return SDValue();
2076
2077 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2078}
2079
2080/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2082 MVT VT, SDValue V1, SDValue V2,
2083 SelectionDAG &DAG) {
2084
2085 int MaskSize = Mask.size();
2086 int HalfSize = Mask.size() / 2;
2087 const auto &Begin = Mask.begin();
2088 const auto &Mid = Mask.begin() + HalfSize;
2089 const auto &End = Mask.end();
2090
2091 // VECTOR_SHUFFLE concatenates the vectors:
2092 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2093 // shuffling ->
2094 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2095 //
2096 // XVSHUF concatenates the vectors:
2097 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2098 // shuffling ->
2099 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2100 SmallVector<SDValue, 8> MaskAlloc;
2101 for (auto it = Begin; it < Mid; it++) {
2102 if (*it < 0) // UNDEF
2103 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2104 else if ((*it >= 0 && *it < HalfSize) ||
2105 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2106 int M = *it < HalfSize ? *it : *it - HalfSize;
2107 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2108 } else
2109 return SDValue();
2110 }
2111 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2112
2113 for (auto it = Mid; it < End; it++) {
2114 if (*it < 0) // UNDEF
2115 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2116 else if ((*it >= HalfSize && *it < MaskSize) ||
2117 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2118 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2119 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2120 } else
2121 return SDValue();
2122 }
2123 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2124
2125 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2126 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2127 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2128}
2129
2130/// Shuffle vectors by lane to generate more optimized instructions.
2131/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2132///
2133/// Therefore, except for the following four cases, other cases are regarded
2134/// as cross-lane shuffles, where optimization is relatively limited.
2135///
2136/// - Shuffle high, low lanes of two inputs vector
2137/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2138/// - Shuffle low, high lanes of two inputs vector
2139/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2140/// - Shuffle low, low lanes of two inputs vector
2141/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2142/// - Shuffle high, high lanes of two inputs vector
2143/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2144///
2145/// The first case is the closest to LoongArch instructions and the other
2146/// cases need to be converted to it for processing.
2147///
2148/// This function may modify V1, V2 and Mask
2150 MutableArrayRef<int> Mask, MVT VT,
2151 SDValue &V1, SDValue &V2,
2152 SelectionDAG &DAG) {
2153
2154 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2155
2156 int MaskSize = Mask.size();
2157 int HalfSize = Mask.size() / 2;
2158
2159 HalfMaskType preMask = None, postMask = None;
2160
2161 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2162 return M < 0 || (M >= 0 && M < HalfSize) ||
2163 (M >= MaskSize && M < MaskSize + HalfSize);
2164 }))
2165 preMask = HighLaneTy;
2166 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2167 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2168 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2169 }))
2170 preMask = LowLaneTy;
2171
2172 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2173 return M < 0 || (M >= 0 && M < HalfSize) ||
2174 (M >= MaskSize && M < MaskSize + HalfSize);
2175 }))
2176 postMask = HighLaneTy;
2177 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2178 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2179 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2180 }))
2181 postMask = LowLaneTy;
2182
2183 // The pre-half of mask is high lane type, and the post-half of mask
2184 // is low lane type, which is closest to the LoongArch instructions.
2185 //
2186 // Note: In the LoongArch architecture, the high lane of mask corresponds
2187 // to the lower 128-bit of vector register, and the low lane of mask
2188 // corresponds the higher 128-bit of vector register.
2189 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2190 return;
2191 }
2192 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2193 V1 = DAG.getBitcast(MVT::v4i64, V1);
2194 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2195 DAG.getConstant(0b01001110, DL, MVT::i64));
2196 V1 = DAG.getBitcast(VT, V1);
2197
2198 if (!V2.isUndef()) {
2199 V2 = DAG.getBitcast(MVT::v4i64, V2);
2200 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2201 DAG.getConstant(0b01001110, DL, MVT::i64));
2202 V2 = DAG.getBitcast(VT, V2);
2203 }
2204
2205 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2206 *it = *it < 0 ? *it : *it - HalfSize;
2207 }
2208 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2209 *it = *it < 0 ? *it : *it + HalfSize;
2210 }
2211 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2212 V1 = DAG.getBitcast(MVT::v4i64, V1);
2213 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2214 DAG.getConstant(0b11101110, DL, MVT::i64));
2215 V1 = DAG.getBitcast(VT, V1);
2216
2217 if (!V2.isUndef()) {
2218 V2 = DAG.getBitcast(MVT::v4i64, V2);
2219 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2220 DAG.getConstant(0b11101110, DL, MVT::i64));
2221 V2 = DAG.getBitcast(VT, V2);
2222 }
2223
2224 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2225 *it = *it < 0 ? *it : *it - HalfSize;
2226 }
2227 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2228 V1 = DAG.getBitcast(MVT::v4i64, V1);
2229 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2230 DAG.getConstant(0b01000100, DL, MVT::i64));
2231 V1 = DAG.getBitcast(VT, V1);
2232
2233 if (!V2.isUndef()) {
2234 V2 = DAG.getBitcast(MVT::v4i64, V2);
2235 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2236 DAG.getConstant(0b01000100, DL, MVT::i64));
2237 V2 = DAG.getBitcast(VT, V2);
2238 }
2239
2240 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2241 *it = *it < 0 ? *it : *it + HalfSize;
2242 }
2243 } else { // cross-lane
2244 return;
2245 }
2246}
2247
2248/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2249/// Only for 256-bit vector.
2250///
2251/// For example:
2252/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2253/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2254/// is lowerded to:
2255/// (XVPERMI $xr2, $xr0, 78)
2256/// (XVSHUF $xr1, $xr2, $xr0)
2257/// (XVORI $xr0, $xr1, 0)
2259 ArrayRef<int> Mask,
2260 MVT VT, SDValue V1,
2261 SDValue V2,
2262 SelectionDAG &DAG) {
2263 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2264 int Size = Mask.size();
2265 int LaneSize = Size / 2;
2266
2267 bool LaneCrossing[2] = {false, false};
2268 for (int i = 0; i < Size; ++i)
2269 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2270 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2271
2272 // Ensure that all lanes ared involved.
2273 if (!LaneCrossing[0] && !LaneCrossing[1])
2274 return SDValue();
2275
2276 SmallVector<int> InLaneMask;
2277 InLaneMask.assign(Mask.begin(), Mask.end());
2278 for (int i = 0; i < Size; ++i) {
2279 int &M = InLaneMask[i];
2280 if (M < 0)
2281 continue;
2282 if (((M % Size) / LaneSize) != (i / LaneSize))
2283 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2284 }
2285
2286 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2287 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2288 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2289 Flipped = DAG.getBitcast(VT, Flipped);
2290 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2291}
2292
2293/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2294///
2295/// This routine breaks down the specific type of 256-bit shuffle and
2296/// dispatches to the lowering routines accordingly.
2298 SDValue V1, SDValue V2, SelectionDAG &DAG) {
2299 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2300 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2301 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2302 "Vector type is unsupported for lasx!");
2304 "Two operands have different types!");
2305 assert(VT.getVectorNumElements() == Mask.size() &&
2306 "Unexpected mask size for shuffle!");
2307 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2308 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2309
2310 // canonicalize non cross-lane shuffle vector
2311 SmallVector<int> NewMask(Mask);
2312 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
2313
2314 APInt KnownUndef, KnownZero;
2315 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2316 APInt Zeroable = KnownUndef | KnownZero;
2317
2318 SDValue Result;
2319 // TODO: Add more comparison patterns.
2320 if (V2.isUndef()) {
2321 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
2322 return Result;
2323 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
2324 return Result;
2325 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2326 V1, V2, DAG)))
2327 return Result;
2328
2329 // TODO: This comment may be enabled in the future to better match the
2330 // pattern for instruction selection.
2331 /* V2 = V1; */
2332 }
2333
2334 // It is recommended not to change the pattern comparison order for better
2335 // performance.
2336 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2337 return Result;
2338 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2339 return Result;
2340 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2341 return Result;
2342 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2343 return Result;
2344 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2345 return Result;
2346 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2347 return Result;
2348 if ((Result =
2349 lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
2350 return Result;
2351 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
2352 return Result;
2353 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2354 return NewShuffle;
2355 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2356 return Result;
2357
2358 return SDValue();
2359}
2360
2361SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2362 SelectionDAG &DAG) const {
2363 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2364 ArrayRef<int> OrigMask = SVOp->getMask();
2365 SDValue V1 = Op.getOperand(0);
2366 SDValue V2 = Op.getOperand(1);
2367 MVT VT = Op.getSimpleValueType();
2368 int NumElements = VT.getVectorNumElements();
2369 SDLoc DL(Op);
2370
2371 bool V1IsUndef = V1.isUndef();
2372 bool V2IsUndef = V2.isUndef();
2373 if (V1IsUndef && V2IsUndef)
2374 return DAG.getUNDEF(VT);
2375
2376 // When we create a shuffle node we put the UNDEF node to second operand,
2377 // but in some cases the first operand may be transformed to UNDEF.
2378 // In this case we should just commute the node.
2379 if (V1IsUndef)
2380 return DAG.getCommutedVectorShuffle(*SVOp);
2381
2382 // Check for non-undef masks pointing at an undef vector and make the masks
2383 // undef as well. This makes it easier to match the shuffle based solely on
2384 // the mask.
2385 if (V2IsUndef &&
2386 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2387 SmallVector<int, 8> NewMask(OrigMask);
2388 for (int &M : NewMask)
2389 if (M >= NumElements)
2390 M = -1;
2391 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2392 }
2393
2394 // Check for illegal shuffle mask element index values.
2395 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2396 (void)MaskUpperLimit;
2397 assert(llvm::all_of(OrigMask,
2398 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2399 "Out of bounds shuffle index");
2400
2401 // For each vector width, delegate to a specialized lowering routine.
2402 if (VT.is128BitVector())
2403 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2404
2405 if (VT.is256BitVector())
2406 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
2407
2408 return SDValue();
2409}
2410
2411SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2412 SelectionDAG &DAG) const {
2413 // Custom lower to ensure the libcall return is passed in an FPR on hard
2414 // float ABIs.
2415 SDLoc DL(Op);
2416 MakeLibCallOptions CallOptions;
2417 SDValue Op0 = Op.getOperand(0);
2418 SDValue Chain = SDValue();
2419 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2420 SDValue Res;
2421 std::tie(Res, Chain) =
2422 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2423 if (Subtarget.is64Bit())
2424 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2425 return DAG.getBitcast(MVT::i32, Res);
2426}
2427
2428SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2429 SelectionDAG &DAG) const {
2430 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2431 // float ABIs.
2432 SDLoc DL(Op);
2433 MakeLibCallOptions CallOptions;
2434 SDValue Op0 = Op.getOperand(0);
2435 SDValue Chain = SDValue();
2436 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2437 DL, MVT::f32, Op0)
2438 : DAG.getBitcast(MVT::f32, Op0);
2439 SDValue Res;
2440 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2441 CallOptions, DL, Chain);
2442 return Res;
2443}
2444
2445SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2446 SelectionDAG &DAG) const {
2447 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2448 SDLoc DL(Op);
2449 MakeLibCallOptions CallOptions;
2450 RTLIB::Libcall LC =
2451 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2452 SDValue Res =
2453 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2454 if (Subtarget.is64Bit())
2455 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2456 return DAG.getBitcast(MVT::i32, Res);
2457}
2458
2459SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2460 SelectionDAG &DAG) const {
2461 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2462 MVT VT = Op.getSimpleValueType();
2463 SDLoc DL(Op);
2464 Op = DAG.getNode(
2465 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2466 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2467 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2468 DL, MVT::f32, Op)
2469 : DAG.getBitcast(MVT::f32, Op);
2470 if (VT != MVT::f32)
2471 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2472 return Res;
2473}
2474
2475// Lower BUILD_VECTOR as broadcast load (if possible).
2476// For example:
2477// %a = load i8, ptr %ptr
2478// %b = build_vector %a, %a, %a, %a
2479// is lowered to :
2480// (VLDREPL_B $a0, 0)
2482 const SDLoc &DL,
2483 SelectionDAG &DAG) {
2484 MVT VT = BVOp->getSimpleValueType(0);
2485 int NumOps = BVOp->getNumOperands();
2486
2487 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2488 "Unsupported vector type for broadcast.");
2489
2490 SDValue IdentitySrc;
2491 bool IsIdeneity = true;
2492
2493 for (int i = 0; i != NumOps; i++) {
2494 SDValue Op = BVOp->getOperand(i);
2495 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2496 IsIdeneity = false;
2497 break;
2498 }
2499 IdentitySrc = BVOp->getOperand(0);
2500 }
2501
2502 // make sure that this load is valid and only has one user.
2503 if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2504 return SDValue();
2505
2506 if (IsIdeneity) {
2507 auto *LN = cast<LoadSDNode>(IdentitySrc);
2508 SDVTList Tys =
2509 LN->isIndexed()
2510 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2511 : DAG.getVTList(VT, MVT::Other);
2512 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2513 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2514 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2515 return BCast;
2516 }
2517 return SDValue();
2518}
2519
2520SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2521 SelectionDAG &DAG) const {
2522 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2523 EVT ResTy = Op->getValueType(0);
2524 unsigned NumElts = ResTy.getVectorNumElements();
2525 SDLoc DL(Op);
2526 APInt SplatValue, SplatUndef;
2527 unsigned SplatBitSize;
2528 bool HasAnyUndefs;
2529 bool IsConstant = false;
2530 bool UseSameConstant = true;
2531 SDValue ConstantValue;
2532 bool Is128Vec = ResTy.is128BitVector();
2533 bool Is256Vec = ResTy.is256BitVector();
2534
2535 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2536 (!Subtarget.hasExtLASX() || !Is256Vec))
2537 return SDValue();
2538
2539 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2540 return Result;
2541
2542 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2543 /*MinSplatBits=*/8) &&
2544 SplatBitSize <= 64) {
2545 // We can only cope with 8, 16, 32, or 64-bit elements.
2546 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2547 SplatBitSize != 64)
2548 return SDValue();
2549
2550 EVT ViaVecTy;
2551
2552 switch (SplatBitSize) {
2553 default:
2554 return SDValue();
2555 case 8:
2556 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2557 break;
2558 case 16:
2559 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2560 break;
2561 case 32:
2562 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2563 break;
2564 case 64:
2565 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2566 break;
2567 }
2568
2569 // SelectionDAG::getConstant will promote SplatValue appropriately.
2570 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2571
2572 // Bitcast to the type we originally wanted.
2573 if (ViaVecTy != ResTy)
2574 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2575
2576 return Result;
2577 }
2578
2579 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2580 return Op;
2581
2582 for (unsigned i = 0; i < NumElts; ++i) {
2583 SDValue Opi = Node->getOperand(i);
2584 if (isIntOrFPConstant(Opi)) {
2585 IsConstant = true;
2586 if (!ConstantValue.getNode())
2587 ConstantValue = Opi;
2588 else if (ConstantValue != Opi)
2589 UseSameConstant = false;
2590 }
2591 }
2592
2593 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2594 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2595 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2596 for (unsigned i = 0; i < NumElts; ++i) {
2597 SDValue Opi = Node->getOperand(i);
2598 if (!isIntOrFPConstant(Opi))
2599 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2600 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2601 }
2602 return Result;
2603 }
2604
2605 if (!IsConstant) {
2606 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2607 // The resulting code is the same length as the expansion, but it doesn't
2608 // use memory operations.
2609 assert(ResTy.isVector());
2610
2611 SDValue Op0 = Node->getOperand(0);
2612 SDValue Vector = DAG.getUNDEF(ResTy);
2613
2614 if (!Op0.isUndef())
2615 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2616 for (unsigned i = 1; i < NumElts; ++i) {
2617 SDValue Opi = Node->getOperand(i);
2618 if (Opi.isUndef())
2619 continue;
2620 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2621 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2622 }
2623 return Vector;
2624 }
2625
2626 return SDValue();
2627}
2628
2629SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2630 SelectionDAG &DAG) const {
2631 SDLoc DL(Op);
2632 MVT ResVT = Op.getSimpleValueType();
2633 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2634
2635 unsigned NumOperands = Op.getNumOperands();
2636 unsigned NumFreezeUndef = 0;
2637 unsigned NumZero = 0;
2638 unsigned NumNonZero = 0;
2639 unsigned NonZeros = 0;
2640 SmallSet<SDValue, 4> Undefs;
2641 for (unsigned i = 0; i != NumOperands; ++i) {
2642 SDValue SubVec = Op.getOperand(i);
2643 if (SubVec.isUndef())
2644 continue;
2645 if (ISD::isFreezeUndef(SubVec.getNode())) {
2646 // If the freeze(undef) has multiple uses then we must fold to zero.
2647 if (SubVec.hasOneUse()) {
2648 ++NumFreezeUndef;
2649 } else {
2650 ++NumZero;
2651 Undefs.insert(SubVec);
2652 }
2653 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2654 ++NumZero;
2655 else {
2656 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2657 NonZeros |= 1 << i;
2658 ++NumNonZero;
2659 }
2660 }
2661
2662 // If we have more than 2 non-zeros, build each half separately.
2663 if (NumNonZero > 2) {
2664 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2665 ArrayRef<SDUse> Ops = Op->ops();
2666 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2667 Ops.slice(0, NumOperands / 2));
2668 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2669 Ops.slice(NumOperands / 2));
2670 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2671 }
2672
2673 // Otherwise, build it up through insert_subvectors.
2674 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2675 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2676 : DAG.getUNDEF(ResVT));
2677
2678 // Replace Undef operands with ZeroVector.
2679 for (SDValue U : Undefs)
2680 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2681
2682 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2683 unsigned NumSubElems = SubVT.getVectorNumElements();
2684 for (unsigned i = 0; i != NumOperands; ++i) {
2685 if ((NonZeros & (1 << i)) == 0)
2686 continue;
2687
2688 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2689 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2690 }
2691
2692 return Vec;
2693}
2694
2695SDValue
2696LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2697 SelectionDAG &DAG) const {
2698 EVT VecTy = Op->getOperand(0)->getValueType(0);
2699 SDValue Idx = Op->getOperand(1);
2700 unsigned NumElts = VecTy.getVectorNumElements();
2701
2702 if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
2703 return Op;
2704
2705 return SDValue();
2706}
2707
2708SDValue
2709LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2710 SelectionDAG &DAG) const {
2711 MVT VT = Op.getSimpleValueType();
2712 MVT EltVT = VT.getVectorElementType();
2713 unsigned NumElts = VT.getVectorNumElements();
2714 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2715 SDLoc DL(Op);
2716 SDValue Op0 = Op.getOperand(0);
2717 SDValue Op1 = Op.getOperand(1);
2718 SDValue Op2 = Op.getOperand(2);
2719
2720 if (isa<ConstantSDNode>(Op2))
2721 return Op;
2722
2723 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2724 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2725
2726 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2727 return SDValue();
2728
2729 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2730 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2731
2732 SmallVector<SDValue, 32> RawIndices;
2733 for (unsigned i = 0; i < NumElts; ++i)
2734 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2735 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2736
2737 // insert vec, elt, idx
2738 // =>
2739 // select (splatidx == {0,1,2...}) ? splatelt : vec
2740 SDValue SelectCC =
2741 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2742 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2743}
2744
2745SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2746 SelectionDAG &DAG) const {
2747 SDLoc DL(Op);
2748 SyncScope::ID FenceSSID =
2749 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2750
2751 // singlethread fences only synchronize with signal handlers on the same
2752 // thread and thus only need to preserve instruction order, not actually
2753 // enforce memory ordering.
2754 if (FenceSSID == SyncScope::SingleThread)
2755 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2756 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2757
2758 return Op;
2759}
2760
2761SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2762 SelectionDAG &DAG) const {
2763
2764 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2765 DAG.getContext()->emitError(
2766 "On LA64, only 64-bit registers can be written.");
2767 return Op.getOperand(0);
2768 }
2769
2770 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2771 DAG.getContext()->emitError(
2772 "On LA32, only 32-bit registers can be written.");
2773 return Op.getOperand(0);
2774 }
2775
2776 return Op;
2777}
2778
2779SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2780 SelectionDAG &DAG) const {
2781 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2782 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2783 "be a constant integer");
2784 return SDValue();
2785 }
2786
2789 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2790 EVT VT = Op.getValueType();
2791 SDLoc DL(Op);
2792 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2793 unsigned Depth = Op.getConstantOperandVal(0);
2794 int GRLenInBytes = Subtarget.getGRLen() / 8;
2795
2796 while (Depth--) {
2797 int Offset = -(GRLenInBytes * 2);
2798 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2799 DAG.getSignedConstant(Offset, DL, VT));
2800 FrameAddr =
2801 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2802 }
2803 return FrameAddr;
2804}
2805
2806SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2807 SelectionDAG &DAG) const {
2808 // Currently only support lowering return address for current frame.
2809 if (Op.getConstantOperandVal(0) != 0) {
2810 DAG.getContext()->emitError(
2811 "return address can only be determined for the current frame");
2812 return SDValue();
2813 }
2814
2817 MVT GRLenVT = Subtarget.getGRLenVT();
2818
2819 // Return the value of the return address register, marking it an implicit
2820 // live-in.
2821 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
2822 getRegClassFor(GRLenVT));
2823 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
2824}
2825
2826SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2827 SelectionDAG &DAG) const {
2829 auto Size = Subtarget.getGRLen() / 8;
2830 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
2831 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2832}
2833
2834SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2835 SelectionDAG &DAG) const {
2837 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2838
2839 SDLoc DL(Op);
2840 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2842
2843 // vastart just stores the address of the VarArgsFrameIndex slot into the
2844 // memory location argument.
2845 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2846 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
2847 MachinePointerInfo(SV));
2848}
2849
2850SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2851 SelectionDAG &DAG) const {
2852 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2853 !Subtarget.hasBasicD() && "unexpected target features");
2854
2855 SDLoc DL(Op);
2856 SDValue Op0 = Op.getOperand(0);
2857 if (Op0->getOpcode() == ISD::AND) {
2858 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
2859 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
2860 return Op;
2861 }
2862
2863 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
2864 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
2865 Op0.getConstantOperandVal(2) == UINT64_C(0))
2866 return Op;
2867
2868 if (Op0.getOpcode() == ISD::AssertZext &&
2869 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
2870 return Op;
2871
2872 EVT OpVT = Op0.getValueType();
2873 EVT RetVT = Op.getValueType();
2874 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2875 MakeLibCallOptions CallOptions;
2876 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
2877 SDValue Chain = SDValue();
2879 std::tie(Result, Chain) =
2880 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2881 return Result;
2882}
2883
2884SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2885 SelectionDAG &DAG) const {
2886 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2887 !Subtarget.hasBasicD() && "unexpected target features");
2888
2889 SDLoc DL(Op);
2890 SDValue Op0 = Op.getOperand(0);
2891
2892 if ((Op0.getOpcode() == ISD::AssertSext ||
2894 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
2895 return Op;
2896
2897 EVT OpVT = Op0.getValueType();
2898 EVT RetVT = Op.getValueType();
2899 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2900 MakeLibCallOptions CallOptions;
2901 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
2902 SDValue Chain = SDValue();
2904 std::tie(Result, Chain) =
2905 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
2906 return Result;
2907}
2908
2909SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2910 SelectionDAG &DAG) const {
2911
2912 SDLoc DL(Op);
2913 EVT VT = Op.getValueType();
2914 SDValue Op0 = Op.getOperand(0);
2915 EVT Op0VT = Op0.getValueType();
2916
2917 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
2918 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
2919 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2920 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
2921 }
2922 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
2923 SDValue Lo, Hi;
2924 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
2925 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
2926 }
2927 return Op;
2928}
2929
2930SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
2931 SelectionDAG &DAG) const {
2932
2933 SDLoc DL(Op);
2934 SDValue Op0 = Op.getOperand(0);
2935
2936 if (Op0.getValueType() == MVT::f16)
2937 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
2938
2939 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
2940 !Subtarget.hasBasicD()) {
2941 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
2942 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
2943 }
2944
2945 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
2946 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
2947 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
2948}
2949
2951 SelectionDAG &DAG, unsigned Flags) {
2952 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2953}
2954
2956 SelectionDAG &DAG, unsigned Flags) {
2957 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2958 Flags);
2959}
2960
2962 SelectionDAG &DAG, unsigned Flags) {
2963 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2964 N->getOffset(), Flags);
2965}
2966
2968 SelectionDAG &DAG, unsigned Flags) {
2969 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2970}
2971
2972template <class NodeTy>
2973SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2975 bool IsLocal) const {
2976 SDLoc DL(N);
2977 EVT Ty = getPointerTy(DAG.getDataLayout());
2978 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2979 SDValue Load;
2980
2981 switch (M) {
2982 default:
2983 report_fatal_error("Unsupported code model");
2984
2985 case CodeModel::Large: {
2986 assert(Subtarget.is64Bit() && "Large code model requires LA64");
2987
2988 // This is not actually used, but is necessary for successfully matching
2989 // the PseudoLA_*_LARGE nodes.
2990 SDValue Tmp = DAG.getConstant(0, DL, Ty);
2991 if (IsLocal) {
2992 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
2993 // eventually becomes the desired 5-insn code sequence.
2994 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
2995 Tmp, Addr),
2996 0);
2997 } else {
2998 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
2999 // eventually becomes the desired 5-insn code sequence.
3000 Load = SDValue(
3001 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3002 0);
3003 }
3004 break;
3005 }
3006
3007 case CodeModel::Small:
3008 case CodeModel::Medium:
3009 if (IsLocal) {
3010 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3011 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3012 Load = SDValue(
3013 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3014 } else {
3015 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3016 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3017 Load =
3018 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3019 }
3020 }
3021
3022 if (!IsLocal) {
3023 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3029 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3030 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3031 }
3032
3033 return Load;
3034}
3035
3036SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3037 SelectionDAG &DAG) const {
3038 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3039 DAG.getTarget().getCodeModel());
3040}
3041
3042SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3043 SelectionDAG &DAG) const {
3044 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3045 DAG.getTarget().getCodeModel());
3046}
3047
3048SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3049 SelectionDAG &DAG) const {
3050 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3051 DAG.getTarget().getCodeModel());
3052}
3053
3054SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3055 SelectionDAG &DAG) const {
3056 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3057 assert(N->getOffset() == 0 && "unexpected offset in global node");
3058 auto CM = DAG.getTarget().getCodeModel();
3059 const GlobalValue *GV = N->getGlobal();
3060
3061 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3062 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3063 CM = *GCM;
3064 }
3065
3066 return getAddr(N, DAG, CM, GV->isDSOLocal());
3067}
3068
3069SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3070 SelectionDAG &DAG,
3071 unsigned Opc, bool UseGOT,
3072 bool Large) const {
3073 SDLoc DL(N);
3074 EVT Ty = getPointerTy(DAG.getDataLayout());
3075 MVT GRLenVT = Subtarget.getGRLenVT();
3076
3077 // This is not actually used, but is necessary for successfully matching the
3078 // PseudoLA_*_LARGE nodes.
3079 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3080 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3081
3082 // Only IE needs an extra argument for large code model.
3083 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3084 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3085 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3086
3087 // If it is LE for normal/medium code model, the add tp operation will occur
3088 // during the pseudo-instruction expansion.
3089 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3090 return Offset;
3091
3092 if (UseGOT) {
3093 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3099 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3100 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3101 }
3102
3103 // Add the thread pointer.
3104 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3105 DAG.getRegister(LoongArch::R2, GRLenVT));
3106}
3107
3108SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3109 SelectionDAG &DAG,
3110 unsigned Opc,
3111 bool Large) const {
3112 SDLoc DL(N);
3113 EVT Ty = getPointerTy(DAG.getDataLayout());
3114 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3115
3116 // This is not actually used, but is necessary for successfully matching the
3117 // PseudoLA_*_LARGE nodes.
3118 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3119
3120 // Use a PC-relative addressing mode to access the dynamic GOT address.
3121 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3122 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3123 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3124
3125 // Prepare argument list to generate call.
3127 Args.emplace_back(Load, CallTy);
3128
3129 // Setup call to __tls_get_addr.
3131 CLI.setDebugLoc(DL)
3132 .setChain(DAG.getEntryNode())
3133 .setLibCallee(CallingConv::C, CallTy,
3134 DAG.getExternalSymbol("__tls_get_addr", Ty),
3135 std::move(Args));
3136
3137 return LowerCallTo(CLI).first;
3138}
3139
3140SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3141 SelectionDAG &DAG, unsigned Opc,
3142 bool Large) const {
3143 SDLoc DL(N);
3144 EVT Ty = getPointerTy(DAG.getDataLayout());
3145 const GlobalValue *GV = N->getGlobal();
3146
3147 // This is not actually used, but is necessary for successfully matching the
3148 // PseudoLA_*_LARGE nodes.
3149 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3150
3151 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3152 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3153 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3154 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3155 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3156}
3157
3158SDValue
3159LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3160 SelectionDAG &DAG) const {
3163 report_fatal_error("In GHC calling convention TLS is not supported");
3164
3165 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3166 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3167
3168 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3169 assert(N->getOffset() == 0 && "unexpected offset in global node");
3170
3171 if (DAG.getTarget().useEmulatedTLS())
3172 reportFatalUsageError("the emulated TLS is prohibited");
3173
3174 bool IsDesc = DAG.getTarget().useTLSDESC();
3175
3176 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3178 // In this model, application code calls the dynamic linker function
3179 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3180 // runtime.
3181 if (!IsDesc)
3182 return getDynamicTLSAddr(N, DAG,
3183 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3184 : LoongArch::PseudoLA_TLS_GD,
3185 Large);
3186 break;
3188 // Same as GeneralDynamic, except for assembly modifiers and relocation
3189 // records.
3190 if (!IsDesc)
3191 return getDynamicTLSAddr(N, DAG,
3192 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3193 : LoongArch::PseudoLA_TLS_LD,
3194 Large);
3195 break;
3197 // This model uses the GOT to resolve TLS offsets.
3198 return getStaticTLSAddr(N, DAG,
3199 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3200 : LoongArch::PseudoLA_TLS_IE,
3201 /*UseGOT=*/true, Large);
3203 // This model is used when static linking as the TLS offsets are resolved
3204 // during program linking.
3205 //
3206 // This node doesn't need an extra argument for the large code model.
3207 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3208 /*UseGOT=*/false, Large);
3209 }
3210
3211 return getTLSDescAddr(N, DAG,
3212 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3213 : LoongArch::PseudoLA_TLS_DESC,
3214 Large);
3215}
3216
3217template <unsigned N>
3219 SelectionDAG &DAG, bool IsSigned = false) {
3220 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3221 // Check the ImmArg.
3222 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3223 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3224 DAG.getContext()->emitError(Op->getOperationName(0) +
3225 ": argument out of range.");
3226 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3227 }
3228 return SDValue();
3229}
3230
3231SDValue
3232LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3233 SelectionDAG &DAG) const {
3234 switch (Op.getConstantOperandVal(0)) {
3235 default:
3236 return SDValue(); // Don't custom lower most intrinsics.
3237 case Intrinsic::thread_pointer: {
3238 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3239 return DAG.getRegister(LoongArch::R2, PtrVT);
3240 }
3241 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3242 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3243 case Intrinsic::loongarch_lsx_vreplvei_d:
3244 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3245 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3246 case Intrinsic::loongarch_lsx_vreplvei_w:
3247 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3248 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3249 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3250 case Intrinsic::loongarch_lasx_xvpickve_d:
3251 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3252 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3253 case Intrinsic::loongarch_lasx_xvinsve0_d:
3254 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3255 case Intrinsic::loongarch_lsx_vsat_b:
3256 case Intrinsic::loongarch_lsx_vsat_bu:
3257 case Intrinsic::loongarch_lsx_vrotri_b:
3258 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3259 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3260 case Intrinsic::loongarch_lsx_vsrlri_b:
3261 case Intrinsic::loongarch_lsx_vsrari_b:
3262 case Intrinsic::loongarch_lsx_vreplvei_h:
3263 case Intrinsic::loongarch_lasx_xvsat_b:
3264 case Intrinsic::loongarch_lasx_xvsat_bu:
3265 case Intrinsic::loongarch_lasx_xvrotri_b:
3266 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3267 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3268 case Intrinsic::loongarch_lasx_xvsrlri_b:
3269 case Intrinsic::loongarch_lasx_xvsrari_b:
3270 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3271 case Intrinsic::loongarch_lasx_xvpickve_w:
3272 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3273 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3274 case Intrinsic::loongarch_lasx_xvinsve0_w:
3275 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3276 case Intrinsic::loongarch_lsx_vsat_h:
3277 case Intrinsic::loongarch_lsx_vsat_hu:
3278 case Intrinsic::loongarch_lsx_vrotri_h:
3279 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3280 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3281 case Intrinsic::loongarch_lsx_vsrlri_h:
3282 case Intrinsic::loongarch_lsx_vsrari_h:
3283 case Intrinsic::loongarch_lsx_vreplvei_b:
3284 case Intrinsic::loongarch_lasx_xvsat_h:
3285 case Intrinsic::loongarch_lasx_xvsat_hu:
3286 case Intrinsic::loongarch_lasx_xvrotri_h:
3287 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3288 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3289 case Intrinsic::loongarch_lasx_xvsrlri_h:
3290 case Intrinsic::loongarch_lasx_xvsrari_h:
3291 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3292 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3293 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3294 case Intrinsic::loongarch_lsx_vsrani_b_h:
3295 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3296 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3297 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3298 case Intrinsic::loongarch_lsx_vssrani_b_h:
3299 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3300 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3301 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3302 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3303 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3304 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3305 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3306 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3307 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3308 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3309 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3310 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3311 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3312 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3313 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3314 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3315 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3316 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3317 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3318 case Intrinsic::loongarch_lsx_vsat_w:
3319 case Intrinsic::loongarch_lsx_vsat_wu:
3320 case Intrinsic::loongarch_lsx_vrotri_w:
3321 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3322 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3323 case Intrinsic::loongarch_lsx_vsrlri_w:
3324 case Intrinsic::loongarch_lsx_vsrari_w:
3325 case Intrinsic::loongarch_lsx_vslei_bu:
3326 case Intrinsic::loongarch_lsx_vslei_hu:
3327 case Intrinsic::loongarch_lsx_vslei_wu:
3328 case Intrinsic::loongarch_lsx_vslei_du:
3329 case Intrinsic::loongarch_lsx_vslti_bu:
3330 case Intrinsic::loongarch_lsx_vslti_hu:
3331 case Intrinsic::loongarch_lsx_vslti_wu:
3332 case Intrinsic::loongarch_lsx_vslti_du:
3333 case Intrinsic::loongarch_lsx_vbsll_v:
3334 case Intrinsic::loongarch_lsx_vbsrl_v:
3335 case Intrinsic::loongarch_lasx_xvsat_w:
3336 case Intrinsic::loongarch_lasx_xvsat_wu:
3337 case Intrinsic::loongarch_lasx_xvrotri_w:
3338 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3339 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3340 case Intrinsic::loongarch_lasx_xvsrlri_w:
3341 case Intrinsic::loongarch_lasx_xvsrari_w:
3342 case Intrinsic::loongarch_lasx_xvslei_bu:
3343 case Intrinsic::loongarch_lasx_xvslei_hu:
3344 case Intrinsic::loongarch_lasx_xvslei_wu:
3345 case Intrinsic::loongarch_lasx_xvslei_du:
3346 case Intrinsic::loongarch_lasx_xvslti_bu:
3347 case Intrinsic::loongarch_lasx_xvslti_hu:
3348 case Intrinsic::loongarch_lasx_xvslti_wu:
3349 case Intrinsic::loongarch_lasx_xvslti_du:
3350 case Intrinsic::loongarch_lasx_xvbsll_v:
3351 case Intrinsic::loongarch_lasx_xvbsrl_v:
3352 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3353 case Intrinsic::loongarch_lsx_vseqi_b:
3354 case Intrinsic::loongarch_lsx_vseqi_h:
3355 case Intrinsic::loongarch_lsx_vseqi_w:
3356 case Intrinsic::loongarch_lsx_vseqi_d:
3357 case Intrinsic::loongarch_lsx_vslei_b:
3358 case Intrinsic::loongarch_lsx_vslei_h:
3359 case Intrinsic::loongarch_lsx_vslei_w:
3360 case Intrinsic::loongarch_lsx_vslei_d:
3361 case Intrinsic::loongarch_lsx_vslti_b:
3362 case Intrinsic::loongarch_lsx_vslti_h:
3363 case Intrinsic::loongarch_lsx_vslti_w:
3364 case Intrinsic::loongarch_lsx_vslti_d:
3365 case Intrinsic::loongarch_lasx_xvseqi_b:
3366 case Intrinsic::loongarch_lasx_xvseqi_h:
3367 case Intrinsic::loongarch_lasx_xvseqi_w:
3368 case Intrinsic::loongarch_lasx_xvseqi_d:
3369 case Intrinsic::loongarch_lasx_xvslei_b:
3370 case Intrinsic::loongarch_lasx_xvslei_h:
3371 case Intrinsic::loongarch_lasx_xvslei_w:
3372 case Intrinsic::loongarch_lasx_xvslei_d:
3373 case Intrinsic::loongarch_lasx_xvslti_b:
3374 case Intrinsic::loongarch_lasx_xvslti_h:
3375 case Intrinsic::loongarch_lasx_xvslti_w:
3376 case Intrinsic::loongarch_lasx_xvslti_d:
3377 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3378 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3379 case Intrinsic::loongarch_lsx_vsrani_h_w:
3380 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3381 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3382 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3383 case Intrinsic::loongarch_lsx_vssrani_h_w:
3384 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3385 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3386 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3387 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3388 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3389 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3390 case Intrinsic::loongarch_lsx_vfrstpi_b:
3391 case Intrinsic::loongarch_lsx_vfrstpi_h:
3392 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3393 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3394 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3395 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3396 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3397 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3398 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3399 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3400 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3401 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3402 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3403 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3404 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3405 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3406 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3407 case Intrinsic::loongarch_lsx_vsat_d:
3408 case Intrinsic::loongarch_lsx_vsat_du:
3409 case Intrinsic::loongarch_lsx_vrotri_d:
3410 case Intrinsic::loongarch_lsx_vsrlri_d:
3411 case Intrinsic::loongarch_lsx_vsrari_d:
3412 case Intrinsic::loongarch_lasx_xvsat_d:
3413 case Intrinsic::loongarch_lasx_xvsat_du:
3414 case Intrinsic::loongarch_lasx_xvrotri_d:
3415 case Intrinsic::loongarch_lasx_xvsrlri_d:
3416 case Intrinsic::loongarch_lasx_xvsrari_d:
3417 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3418 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3419 case Intrinsic::loongarch_lsx_vsrani_w_d:
3420 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3421 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3422 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3423 case Intrinsic::loongarch_lsx_vssrani_w_d:
3424 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3425 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3426 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3427 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3428 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3429 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3430 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3431 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3432 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3433 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3434 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3435 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3436 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3437 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3438 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3439 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3440 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3441 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3442 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3443 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3444 case Intrinsic::loongarch_lsx_vsrani_d_q:
3445 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3446 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3447 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3448 case Intrinsic::loongarch_lsx_vssrani_d_q:
3449 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3450 case Intrinsic::loongarch_lsx_vssrani_du_q:
3451 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3452 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3453 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3454 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3455 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3456 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3457 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3458 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3459 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3460 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3461 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3462 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3463 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3464 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3465 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3466 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3467 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3468 case Intrinsic::loongarch_lsx_vnori_b:
3469 case Intrinsic::loongarch_lsx_vshuf4i_b:
3470 case Intrinsic::loongarch_lsx_vshuf4i_h:
3471 case Intrinsic::loongarch_lsx_vshuf4i_w:
3472 case Intrinsic::loongarch_lasx_xvnori_b:
3473 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3474 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3475 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3476 case Intrinsic::loongarch_lasx_xvpermi_d:
3477 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3478 case Intrinsic::loongarch_lsx_vshuf4i_d:
3479 case Intrinsic::loongarch_lsx_vpermi_w:
3480 case Intrinsic::loongarch_lsx_vbitseli_b:
3481 case Intrinsic::loongarch_lsx_vextrins_b:
3482 case Intrinsic::loongarch_lsx_vextrins_h:
3483 case Intrinsic::loongarch_lsx_vextrins_w:
3484 case Intrinsic::loongarch_lsx_vextrins_d:
3485 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3486 case Intrinsic::loongarch_lasx_xvpermi_w:
3487 case Intrinsic::loongarch_lasx_xvpermi_q:
3488 case Intrinsic::loongarch_lasx_xvbitseli_b:
3489 case Intrinsic::loongarch_lasx_xvextrins_b:
3490 case Intrinsic::loongarch_lasx_xvextrins_h:
3491 case Intrinsic::loongarch_lasx_xvextrins_w:
3492 case Intrinsic::loongarch_lasx_xvextrins_d:
3493 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3494 case Intrinsic::loongarch_lsx_vrepli_b:
3495 case Intrinsic::loongarch_lsx_vrepli_h:
3496 case Intrinsic::loongarch_lsx_vrepli_w:
3497 case Intrinsic::loongarch_lsx_vrepli_d:
3498 case Intrinsic::loongarch_lasx_xvrepli_b:
3499 case Intrinsic::loongarch_lasx_xvrepli_h:
3500 case Intrinsic::loongarch_lasx_xvrepli_w:
3501 case Intrinsic::loongarch_lasx_xvrepli_d:
3502 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3503 case Intrinsic::loongarch_lsx_vldi:
3504 case Intrinsic::loongarch_lasx_xvldi:
3505 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3506 }
3507}
3508
3509// Helper function that emits error message for intrinsics with chain and return
3510// merge values of a UNDEF and the chain.
3512 StringRef ErrorMsg,
3513 SelectionDAG &DAG) {
3514 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3515 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3516 SDLoc(Op));
3517}
3518
3519SDValue
3520LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3521 SelectionDAG &DAG) const {
3522 SDLoc DL(Op);
3523 MVT GRLenVT = Subtarget.getGRLenVT();
3524 EVT VT = Op.getValueType();
3525 SDValue Chain = Op.getOperand(0);
3526 const StringRef ErrorMsgOOR = "argument out of range";
3527 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3528 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3529
3530 switch (Op.getConstantOperandVal(1)) {
3531 default:
3532 return Op;
3533 case Intrinsic::loongarch_crc_w_b_w:
3534 case Intrinsic::loongarch_crc_w_h_w:
3535 case Intrinsic::loongarch_crc_w_w_w:
3536 case Intrinsic::loongarch_crc_w_d_w:
3537 case Intrinsic::loongarch_crcc_w_b_w:
3538 case Intrinsic::loongarch_crcc_w_h_w:
3539 case Intrinsic::loongarch_crcc_w_w_w:
3540 case Intrinsic::loongarch_crcc_w_d_w:
3541 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3542 case Intrinsic::loongarch_csrrd_w:
3543 case Intrinsic::loongarch_csrrd_d: {
3544 unsigned Imm = Op.getConstantOperandVal(2);
3545 return !isUInt<14>(Imm)
3546 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3547 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3548 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3549 }
3550 case Intrinsic::loongarch_csrwr_w:
3551 case Intrinsic::loongarch_csrwr_d: {
3552 unsigned Imm = Op.getConstantOperandVal(3);
3553 return !isUInt<14>(Imm)
3554 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3555 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3556 {Chain, Op.getOperand(2),
3557 DAG.getConstant(Imm, DL, GRLenVT)});
3558 }
3559 case Intrinsic::loongarch_csrxchg_w:
3560 case Intrinsic::loongarch_csrxchg_d: {
3561 unsigned Imm = Op.getConstantOperandVal(4);
3562 return !isUInt<14>(Imm)
3563 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3564 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3565 {Chain, Op.getOperand(2), Op.getOperand(3),
3566 DAG.getConstant(Imm, DL, GRLenVT)});
3567 }
3568 case Intrinsic::loongarch_iocsrrd_d: {
3569 return DAG.getNode(
3570 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3571 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3572 }
3573#define IOCSRRD_CASE(NAME, NODE) \
3574 case Intrinsic::loongarch_##NAME: { \
3575 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3576 {Chain, Op.getOperand(2)}); \
3577 }
3578 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3579 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3580 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3581#undef IOCSRRD_CASE
3582 case Intrinsic::loongarch_cpucfg: {
3583 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3584 {Chain, Op.getOperand(2)});
3585 }
3586 case Intrinsic::loongarch_lddir_d: {
3587 unsigned Imm = Op.getConstantOperandVal(3);
3588 return !isUInt<8>(Imm)
3589 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3590 : Op;
3591 }
3592 case Intrinsic::loongarch_movfcsr2gr: {
3593 if (!Subtarget.hasBasicF())
3594 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3595 unsigned Imm = Op.getConstantOperandVal(2);
3596 return !isUInt<2>(Imm)
3597 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3598 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3599 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3600 }
3601 case Intrinsic::loongarch_lsx_vld:
3602 case Intrinsic::loongarch_lsx_vldrepl_b:
3603 case Intrinsic::loongarch_lasx_xvld:
3604 case Intrinsic::loongarch_lasx_xvldrepl_b:
3605 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3606 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3607 : SDValue();
3608 case Intrinsic::loongarch_lsx_vldrepl_h:
3609 case Intrinsic::loongarch_lasx_xvldrepl_h:
3610 return !isShiftedInt<11, 1>(
3611 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3613 Op, "argument out of range or not a multiple of 2", DAG)
3614 : SDValue();
3615 case Intrinsic::loongarch_lsx_vldrepl_w:
3616 case Intrinsic::loongarch_lasx_xvldrepl_w:
3617 return !isShiftedInt<10, 2>(
3618 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3620 Op, "argument out of range or not a multiple of 4", DAG)
3621 : SDValue();
3622 case Intrinsic::loongarch_lsx_vldrepl_d:
3623 case Intrinsic::loongarch_lasx_xvldrepl_d:
3624 return !isShiftedInt<9, 3>(
3625 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3627 Op, "argument out of range or not a multiple of 8", DAG)
3628 : SDValue();
3629 }
3630}
3631
3632// Helper function that emits error message for intrinsics with void return
3633// value and return the chain.
3635 SelectionDAG &DAG) {
3636
3637 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3638 return Op.getOperand(0);
3639}
3640
3641SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3642 SelectionDAG &DAG) const {
3643 SDLoc DL(Op);
3644 MVT GRLenVT = Subtarget.getGRLenVT();
3645 SDValue Chain = Op.getOperand(0);
3646 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3647 SDValue Op2 = Op.getOperand(2);
3648 const StringRef ErrorMsgOOR = "argument out of range";
3649 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3650 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3651 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3652
3653 switch (IntrinsicEnum) {
3654 default:
3655 // TODO: Add more Intrinsics.
3656 return SDValue();
3657 case Intrinsic::loongarch_cacop_d:
3658 case Intrinsic::loongarch_cacop_w: {
3659 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3660 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3661 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3662 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3663 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3664 unsigned Imm1 = Op2->getAsZExtVal();
3665 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3666 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3667 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3668 return Op;
3669 }
3670 case Intrinsic::loongarch_dbar: {
3671 unsigned Imm = Op2->getAsZExtVal();
3672 return !isUInt<15>(Imm)
3673 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3674 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3675 DAG.getConstant(Imm, DL, GRLenVT));
3676 }
3677 case Intrinsic::loongarch_ibar: {
3678 unsigned Imm = Op2->getAsZExtVal();
3679 return !isUInt<15>(Imm)
3680 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3681 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3682 DAG.getConstant(Imm, DL, GRLenVT));
3683 }
3684 case Intrinsic::loongarch_break: {
3685 unsigned Imm = Op2->getAsZExtVal();
3686 return !isUInt<15>(Imm)
3687 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3688 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3689 DAG.getConstant(Imm, DL, GRLenVT));
3690 }
3691 case Intrinsic::loongarch_movgr2fcsr: {
3692 if (!Subtarget.hasBasicF())
3693 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3694 unsigned Imm = Op2->getAsZExtVal();
3695 return !isUInt<2>(Imm)
3696 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3697 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3698 DAG.getConstant(Imm, DL, GRLenVT),
3699 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3700 Op.getOperand(3)));
3701 }
3702 case Intrinsic::loongarch_syscall: {
3703 unsigned Imm = Op2->getAsZExtVal();
3704 return !isUInt<15>(Imm)
3705 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3706 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3707 DAG.getConstant(Imm, DL, GRLenVT));
3708 }
3709#define IOCSRWR_CASE(NAME, NODE) \
3710 case Intrinsic::loongarch_##NAME: { \
3711 SDValue Op3 = Op.getOperand(3); \
3712 return Subtarget.is64Bit() \
3713 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3714 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3715 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3716 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3717 Op3); \
3718 }
3719 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3720 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3721 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3722#undef IOCSRWR_CASE
3723 case Intrinsic::loongarch_iocsrwr_d: {
3724 return !Subtarget.is64Bit()
3725 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3726 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3727 Op2,
3728 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3729 Op.getOperand(3)));
3730 }
3731#define ASRT_LE_GT_CASE(NAME) \
3732 case Intrinsic::loongarch_##NAME: { \
3733 return !Subtarget.is64Bit() \
3734 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3735 : Op; \
3736 }
3737 ASRT_LE_GT_CASE(asrtle_d)
3738 ASRT_LE_GT_CASE(asrtgt_d)
3739#undef ASRT_LE_GT_CASE
3740 case Intrinsic::loongarch_ldpte_d: {
3741 unsigned Imm = Op.getConstantOperandVal(3);
3742 return !Subtarget.is64Bit()
3743 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3744 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3745 : Op;
3746 }
3747 case Intrinsic::loongarch_lsx_vst:
3748 case Intrinsic::loongarch_lasx_xvst:
3749 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3750 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3751 : SDValue();
3752 case Intrinsic::loongarch_lasx_xvstelm_b:
3753 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3754 !isUInt<5>(Op.getConstantOperandVal(5)))
3755 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3756 : SDValue();
3757 case Intrinsic::loongarch_lsx_vstelm_b:
3758 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3759 !isUInt<4>(Op.getConstantOperandVal(5)))
3760 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3761 : SDValue();
3762 case Intrinsic::loongarch_lasx_xvstelm_h:
3763 return (!isShiftedInt<8, 1>(
3764 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3765 !isUInt<4>(Op.getConstantOperandVal(5)))
3767 Op, "argument out of range or not a multiple of 2", DAG)
3768 : SDValue();
3769 case Intrinsic::loongarch_lsx_vstelm_h:
3770 return (!isShiftedInt<8, 1>(
3771 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3772 !isUInt<3>(Op.getConstantOperandVal(5)))
3774 Op, "argument out of range or not a multiple of 2", DAG)
3775 : SDValue();
3776 case Intrinsic::loongarch_lasx_xvstelm_w:
3777 return (!isShiftedInt<8, 2>(
3778 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3779 !isUInt<3>(Op.getConstantOperandVal(5)))
3781 Op, "argument out of range or not a multiple of 4", DAG)
3782 : SDValue();
3783 case Intrinsic::loongarch_lsx_vstelm_w:
3784 return (!isShiftedInt<8, 2>(
3785 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3786 !isUInt<2>(Op.getConstantOperandVal(5)))
3788 Op, "argument out of range or not a multiple of 4", DAG)
3789 : SDValue();
3790 case Intrinsic::loongarch_lasx_xvstelm_d:
3791 return (!isShiftedInt<8, 3>(
3792 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3793 !isUInt<2>(Op.getConstantOperandVal(5)))
3795 Op, "argument out of range or not a multiple of 8", DAG)
3796 : SDValue();
3797 case Intrinsic::loongarch_lsx_vstelm_d:
3798 return (!isShiftedInt<8, 3>(
3799 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3800 !isUInt<1>(Op.getConstantOperandVal(5)))
3802 Op, "argument out of range or not a multiple of 8", DAG)
3803 : SDValue();
3804 }
3805}
3806
3807SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3808 SelectionDAG &DAG) const {
3809 SDLoc DL(Op);
3810 SDValue Lo = Op.getOperand(0);
3811 SDValue Hi = Op.getOperand(1);
3812 SDValue Shamt = Op.getOperand(2);
3813 EVT VT = Lo.getValueType();
3814
3815 // if Shamt-GRLen < 0: // Shamt < GRLen
3816 // Lo = Lo << Shamt
3817 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3818 // else:
3819 // Lo = 0
3820 // Hi = Lo << (Shamt-GRLen)
3821
3822 SDValue Zero = DAG.getConstant(0, DL, VT);
3823 SDValue One = DAG.getConstant(1, DL, VT);
3824 SDValue MinusGRLen =
3825 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3826 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3827 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3828 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3829
3830 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3831 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3832 SDValue ShiftRightLo =
3833 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
3834 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3835 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3836 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
3837
3838 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3839
3840 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3841 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3842
3843 SDValue Parts[2] = {Lo, Hi};
3844 return DAG.getMergeValues(Parts, DL);
3845}
3846
3847SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3848 SelectionDAG &DAG,
3849 bool IsSRA) const {
3850 SDLoc DL(Op);
3851 SDValue Lo = Op.getOperand(0);
3852 SDValue Hi = Op.getOperand(1);
3853 SDValue Shamt = Op.getOperand(2);
3854 EVT VT = Lo.getValueType();
3855
3856 // SRA expansion:
3857 // if Shamt-GRLen < 0: // Shamt < GRLen
3858 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3859 // Hi = Hi >>s Shamt
3860 // else:
3861 // Lo = Hi >>s (Shamt-GRLen);
3862 // Hi = Hi >>s (GRLen-1)
3863 //
3864 // SRL expansion:
3865 // if Shamt-GRLen < 0: // Shamt < GRLen
3866 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3867 // Hi = Hi >>u Shamt
3868 // else:
3869 // Lo = Hi >>u (Shamt-GRLen);
3870 // Hi = 0;
3871
3872 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3873
3874 SDValue Zero = DAG.getConstant(0, DL, VT);
3875 SDValue One = DAG.getConstant(1, DL, VT);
3876 SDValue MinusGRLen =
3877 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
3878 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
3879 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
3880 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
3881
3882 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3883 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3884 SDValue ShiftLeftHi =
3885 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
3886 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3887 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3888 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
3889 SDValue HiFalse =
3890 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
3891
3892 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
3893
3894 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3895 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3896
3897 SDValue Parts[2] = {Lo, Hi};
3898 return DAG.getMergeValues(Parts, DL);
3899}
3900
3901// Returns the opcode of the target-specific SDNode that implements the 32-bit
3902// form of the given Opcode.
3904 switch (Opcode) {
3905 default:
3906 llvm_unreachable("Unexpected opcode");
3907 case ISD::SDIV:
3908 return LoongArchISD::DIV_W;
3909 case ISD::UDIV:
3910 return LoongArchISD::DIV_WU;
3911 case ISD::SREM:
3912 return LoongArchISD::MOD_W;
3913 case ISD::UREM:
3914 return LoongArchISD::MOD_WU;
3915 case ISD::SHL:
3916 return LoongArchISD::SLL_W;
3917 case ISD::SRA:
3918 return LoongArchISD::SRA_W;
3919 case ISD::SRL:
3920 return LoongArchISD::SRL_W;
3921 case ISD::ROTL:
3922 case ISD::ROTR:
3923 return LoongArchISD::ROTR_W;
3924 case ISD::CTTZ:
3925 return LoongArchISD::CTZ_W;
3926 case ISD::CTLZ:
3927 return LoongArchISD::CLZ_W;
3928 }
3929}
3930
3931// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
3932// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
3933// otherwise be promoted to i64, making it difficult to select the
3934// SLL_W/.../*W later one because the fact the operation was originally of
3935// type i8/i16/i32 is lost.
3937 unsigned ExtOpc = ISD::ANY_EXTEND) {
3938 SDLoc DL(N);
3939 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
3940 SDValue NewOp0, NewRes;
3941
3942 switch (NumOp) {
3943 default:
3944 llvm_unreachable("Unexpected NumOp");
3945 case 1: {
3946 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3947 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
3948 break;
3949 }
3950 case 2: {
3951 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
3952 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
3953 if (N->getOpcode() == ISD::ROTL) {
3954 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
3955 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
3956 }
3957 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
3958 break;
3959 }
3960 // TODO:Handle more NumOp.
3961 }
3962
3963 // ReplaceNodeResults requires we maintain the same type for the return
3964 // value.
3965 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
3966}
3967
3968// Converts the given 32-bit operation to a i64 operation with signed extension
3969// semantic to reduce the signed extension instructions.
3971 SDLoc DL(N);
3972 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
3973 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
3974 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
3975 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
3976 DAG.getValueType(MVT::i32));
3977 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
3978}
3979
3980// Helper function that emits error message for intrinsics with/without chain
3981// and return a UNDEF or and the chain as the results.
3984 StringRef ErrorMsg, bool WithChain = true) {
3985 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
3986 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
3987 if (!WithChain)
3988 return;
3989 Results.push_back(N->getOperand(0));
3990}
3991
3992template <unsigned N>
3993static void
3995 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
3996 unsigned ResOp) {
3997 const StringRef ErrorMsgOOR = "argument out of range";
3998 unsigned Imm = Node->getConstantOperandVal(2);
3999 if (!isUInt<N>(Imm)) {
4001 /*WithChain=*/false);
4002 return;
4003 }
4004 SDLoc DL(Node);
4005 SDValue Vec = Node->getOperand(1);
4006
4007 SDValue PickElt =
4008 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4009 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4011 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4012 PickElt.getValue(0)));
4013}
4014
4017 SelectionDAG &DAG,
4018 const LoongArchSubtarget &Subtarget,
4019 unsigned ResOp) {
4020 SDLoc DL(N);
4021 SDValue Vec = N->getOperand(1);
4022
4023 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4024 Results.push_back(
4025 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4026}
4027
4028static void
4030 SelectionDAG &DAG,
4031 const LoongArchSubtarget &Subtarget) {
4032 switch (N->getConstantOperandVal(0)) {
4033 default:
4034 llvm_unreachable("Unexpected Intrinsic.");
4035 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4036 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4038 break;
4039 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4040 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4041 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4043 break;
4044 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4045 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4047 break;
4048 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4049 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4051 break;
4052 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4053 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4054 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4056 break;
4057 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4058 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4060 break;
4061 case Intrinsic::loongarch_lsx_bz_b:
4062 case Intrinsic::loongarch_lsx_bz_h:
4063 case Intrinsic::loongarch_lsx_bz_w:
4064 case Intrinsic::loongarch_lsx_bz_d:
4065 case Intrinsic::loongarch_lasx_xbz_b:
4066 case Intrinsic::loongarch_lasx_xbz_h:
4067 case Intrinsic::loongarch_lasx_xbz_w:
4068 case Intrinsic::loongarch_lasx_xbz_d:
4069 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4071 break;
4072 case Intrinsic::loongarch_lsx_bz_v:
4073 case Intrinsic::loongarch_lasx_xbz_v:
4074 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4076 break;
4077 case Intrinsic::loongarch_lsx_bnz_b:
4078 case Intrinsic::loongarch_lsx_bnz_h:
4079 case Intrinsic::loongarch_lsx_bnz_w:
4080 case Intrinsic::loongarch_lsx_bnz_d:
4081 case Intrinsic::loongarch_lasx_xbnz_b:
4082 case Intrinsic::loongarch_lasx_xbnz_h:
4083 case Intrinsic::loongarch_lasx_xbnz_w:
4084 case Intrinsic::loongarch_lasx_xbnz_d:
4085 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4087 break;
4088 case Intrinsic::loongarch_lsx_bnz_v:
4089 case Intrinsic::loongarch_lasx_xbnz_v:
4090 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4092 break;
4093 }
4094}
4095
4098 SelectionDAG &DAG) {
4099 assert(N->getValueType(0) == MVT::i128 &&
4100 "AtomicCmpSwap on types less than 128 should be legal");
4101 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4102
4103 unsigned Opcode;
4104 switch (MemOp->getMergedOrdering()) {
4108 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4109 break;
4112 Opcode = LoongArch::PseudoCmpXchg128;
4113 break;
4114 default:
4115 llvm_unreachable("Unexpected ordering!");
4116 }
4117
4118 SDLoc DL(N);
4119 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4120 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4121 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4122 NewVal.first, NewVal.second, N->getOperand(0)};
4123
4124 SDNode *CmpSwap = DAG.getMachineNode(
4125 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4126 Ops);
4127 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4128 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4129 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4130 Results.push_back(SDValue(CmpSwap, 3));
4131}
4132
4135 SDLoc DL(N);
4136 EVT VT = N->getValueType(0);
4137 switch (N->getOpcode()) {
4138 default:
4139 llvm_unreachable("Don't know how to legalize this operation");
4140 case ISD::ADD:
4141 case ISD::SUB:
4142 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4143 "Unexpected custom legalisation");
4144 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4145 break;
4146 case ISD::SDIV:
4147 case ISD::UDIV:
4148 case ISD::SREM:
4149 case ISD::UREM:
4150 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4151 "Unexpected custom legalisation");
4152 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4153 Subtarget.hasDiv32() && VT == MVT::i32
4155 : ISD::SIGN_EXTEND));
4156 break;
4157 case ISD::SHL:
4158 case ISD::SRA:
4159 case ISD::SRL:
4160 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4161 "Unexpected custom legalisation");
4162 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4163 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4164 break;
4165 }
4166 break;
4167 case ISD::ROTL:
4168 case ISD::ROTR:
4169 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4170 "Unexpected custom legalisation");
4171 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4172 break;
4173 case ISD::FP_TO_SINT: {
4174 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4175 "Unexpected custom legalisation");
4176 SDValue Src = N->getOperand(0);
4177 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4178 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4180 if (!isTypeLegal(Src.getValueType()))
4181 return;
4182 if (Src.getValueType() == MVT::f16)
4183 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4184 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4185 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4186 return;
4187 }
4188 // If the FP type needs to be softened, emit a library call using the 'si'
4189 // version. If we left it to default legalization we'd end up with 'di'.
4190 RTLIB::Libcall LC;
4191 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4192 MakeLibCallOptions CallOptions;
4193 EVT OpVT = Src.getValueType();
4194 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4195 SDValue Chain = SDValue();
4196 SDValue Result;
4197 std::tie(Result, Chain) =
4198 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4199 Results.push_back(Result);
4200 break;
4201 }
4202 case ISD::BITCAST: {
4203 SDValue Src = N->getOperand(0);
4204 EVT SrcVT = Src.getValueType();
4205 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4206 Subtarget.hasBasicF()) {
4207 SDValue Dst =
4208 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4209 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4210 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4212 DAG.getVTList(MVT::i32, MVT::i32), Src);
4213 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4214 NewReg.getValue(0), NewReg.getValue(1));
4215 Results.push_back(RetReg);
4216 }
4217 break;
4218 }
4219 case ISD::FP_TO_UINT: {
4220 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4221 "Unexpected custom legalisation");
4222 auto &TLI = DAG.getTargetLoweringInfo();
4223 SDValue Tmp1, Tmp2;
4224 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4225 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4226 break;
4227 }
4228 case ISD::BSWAP: {
4229 SDValue Src = N->getOperand(0);
4230 assert((VT == MVT::i16 || VT == MVT::i32) &&
4231 "Unexpected custom legalization");
4232 MVT GRLenVT = Subtarget.getGRLenVT();
4233 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4234 SDValue Tmp;
4235 switch (VT.getSizeInBits()) {
4236 default:
4237 llvm_unreachable("Unexpected operand width");
4238 case 16:
4239 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4240 break;
4241 case 32:
4242 // Only LA64 will get to here due to the size mismatch between VT and
4243 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4244 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4245 break;
4246 }
4247 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4248 break;
4249 }
4250 case ISD::BITREVERSE: {
4251 SDValue Src = N->getOperand(0);
4252 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4253 "Unexpected custom legalization");
4254 MVT GRLenVT = Subtarget.getGRLenVT();
4255 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4256 SDValue Tmp;
4257 switch (VT.getSizeInBits()) {
4258 default:
4259 llvm_unreachable("Unexpected operand width");
4260 case 8:
4261 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4262 break;
4263 case 32:
4264 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4265 break;
4266 }
4267 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4268 break;
4269 }
4270 case ISD::CTLZ:
4271 case ISD::CTTZ: {
4272 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4273 "Unexpected custom legalisation");
4274 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4275 break;
4276 }
4278 SDValue Chain = N->getOperand(0);
4279 SDValue Op2 = N->getOperand(2);
4280 MVT GRLenVT = Subtarget.getGRLenVT();
4281 const StringRef ErrorMsgOOR = "argument out of range";
4282 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4283 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4284
4285 switch (N->getConstantOperandVal(1)) {
4286 default:
4287 llvm_unreachable("Unexpected Intrinsic.");
4288 case Intrinsic::loongarch_movfcsr2gr: {
4289 if (!Subtarget.hasBasicF()) {
4290 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4291 return;
4292 }
4293 unsigned Imm = Op2->getAsZExtVal();
4294 if (!isUInt<2>(Imm)) {
4295 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4296 return;
4297 }
4298 SDValue MOVFCSR2GRResults = DAG.getNode(
4299 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4300 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4301 Results.push_back(
4302 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4303 Results.push_back(MOVFCSR2GRResults.getValue(1));
4304 break;
4305 }
4306#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4307 case Intrinsic::loongarch_##NAME: { \
4308 SDValue NODE = DAG.getNode( \
4309 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4310 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4311 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4312 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4313 Results.push_back(NODE.getValue(1)); \
4314 break; \
4315 }
4316 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4317 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4318 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4319 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4320 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4321 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4322#undef CRC_CASE_EXT_BINARYOP
4323
4324#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4325 case Intrinsic::loongarch_##NAME: { \
4326 SDValue NODE = DAG.getNode( \
4327 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4328 {Chain, Op2, \
4329 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4330 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4331 Results.push_back(NODE.getValue(1)); \
4332 break; \
4333 }
4334 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4335 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4336#undef CRC_CASE_EXT_UNARYOP
4337#define CSR_CASE(ID) \
4338 case Intrinsic::loongarch_##ID: { \
4339 if (!Subtarget.is64Bit()) \
4340 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4341 break; \
4342 }
4343 CSR_CASE(csrrd_d);
4344 CSR_CASE(csrwr_d);
4345 CSR_CASE(csrxchg_d);
4346 CSR_CASE(iocsrrd_d);
4347#undef CSR_CASE
4348 case Intrinsic::loongarch_csrrd_w: {
4349 unsigned Imm = Op2->getAsZExtVal();
4350 if (!isUInt<14>(Imm)) {
4351 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4352 return;
4353 }
4354 SDValue CSRRDResults =
4355 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4356 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4357 Results.push_back(
4358 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4359 Results.push_back(CSRRDResults.getValue(1));
4360 break;
4361 }
4362 case Intrinsic::loongarch_csrwr_w: {
4363 unsigned Imm = N->getConstantOperandVal(3);
4364 if (!isUInt<14>(Imm)) {
4365 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4366 return;
4367 }
4368 SDValue CSRWRResults =
4369 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4370 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4371 DAG.getConstant(Imm, DL, GRLenVT)});
4372 Results.push_back(
4373 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4374 Results.push_back(CSRWRResults.getValue(1));
4375 break;
4376 }
4377 case Intrinsic::loongarch_csrxchg_w: {
4378 unsigned Imm = N->getConstantOperandVal(4);
4379 if (!isUInt<14>(Imm)) {
4380 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4381 return;
4382 }
4383 SDValue CSRXCHGResults = DAG.getNode(
4384 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4385 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4386 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4387 DAG.getConstant(Imm, DL, GRLenVT)});
4388 Results.push_back(
4389 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4390 Results.push_back(CSRXCHGResults.getValue(1));
4391 break;
4392 }
4393#define IOCSRRD_CASE(NAME, NODE) \
4394 case Intrinsic::loongarch_##NAME: { \
4395 SDValue IOCSRRDResults = \
4396 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4397 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4398 Results.push_back( \
4399 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4400 Results.push_back(IOCSRRDResults.getValue(1)); \
4401 break; \
4402 }
4403 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4404 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4405 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4406#undef IOCSRRD_CASE
4407 case Intrinsic::loongarch_cpucfg: {
4408 SDValue CPUCFGResults =
4409 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4410 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4411 Results.push_back(
4412 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4413 Results.push_back(CPUCFGResults.getValue(1));
4414 break;
4415 }
4416 case Intrinsic::loongarch_lddir_d: {
4417 if (!Subtarget.is64Bit()) {
4418 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4419 return;
4420 }
4421 break;
4422 }
4423 }
4424 break;
4425 }
4426 case ISD::READ_REGISTER: {
4427 if (Subtarget.is64Bit())
4428 DAG.getContext()->emitError(
4429 "On LA64, only 64-bit registers can be read.");
4430 else
4431 DAG.getContext()->emitError(
4432 "On LA32, only 32-bit registers can be read.");
4433 Results.push_back(DAG.getUNDEF(VT));
4434 Results.push_back(N->getOperand(0));
4435 break;
4436 }
4438 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4439 break;
4440 }
4441 case ISD::LROUND: {
4442 SDValue Op0 = N->getOperand(0);
4443 EVT OpVT = Op0.getValueType();
4444 RTLIB::Libcall LC =
4445 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4446 MakeLibCallOptions CallOptions;
4447 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4448 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4449 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4450 Results.push_back(Result);
4451 break;
4452 }
4453 case ISD::ATOMIC_CMP_SWAP: {
4455 break;
4456 }
4457 case ISD::TRUNCATE: {
4458 MVT VT = N->getSimpleValueType(0);
4459 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4460 return;
4461
4462 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4463 SDValue In = N->getOperand(0);
4464 EVT InVT = In.getValueType();
4465 EVT InEltVT = InVT.getVectorElementType();
4466 EVT EltVT = VT.getVectorElementType();
4467 unsigned MinElts = VT.getVectorNumElements();
4468 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4469 unsigned InBits = InVT.getSizeInBits();
4470
4471 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4472 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4473 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4474 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4475 for (unsigned I = 0; I < MinElts; ++I)
4476 TruncMask[I] = Scale * I;
4477
4478 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4479 MVT SVT = In.getSimpleValueType().getScalarType();
4480 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4481 SDValue WidenIn =
4482 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4483 DAG.getVectorIdxConstant(0, DL));
4484 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4485 "Illegal vector type in truncation");
4486 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4487 Results.push_back(
4488 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4489 return;
4490 }
4491 }
4492
4493 break;
4494 }
4495 }
4496}
4497
4500 const LoongArchSubtarget &Subtarget) {
4501 if (DCI.isBeforeLegalizeOps())
4502 return SDValue();
4503
4504 SDValue FirstOperand = N->getOperand(0);
4505 SDValue SecondOperand = N->getOperand(1);
4506 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4507 EVT ValTy = N->getValueType(0);
4508 SDLoc DL(N);
4509 uint64_t lsb, msb;
4510 unsigned SMIdx, SMLen;
4511 ConstantSDNode *CN;
4512 SDValue NewOperand;
4513 MVT GRLenVT = Subtarget.getGRLenVT();
4514
4515 // BSTRPICK requires the 32S feature.
4516 if (!Subtarget.has32S())
4517 return SDValue();
4518
4519 // Op's second operand must be a shifted mask.
4520 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4521 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4522 return SDValue();
4523
4524 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4525 // Pattern match BSTRPICK.
4526 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4527 // => BSTRPICK $dst, $src, msb, lsb
4528 // where msb = lsb + len - 1
4529
4530 // The second operand of the shift must be an immediate.
4531 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4532 return SDValue();
4533
4534 lsb = CN->getZExtValue();
4535
4536 // Return if the shifted mask does not start at bit 0 or the sum of its
4537 // length and lsb exceeds the word's size.
4538 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4539 return SDValue();
4540
4541 NewOperand = FirstOperand.getOperand(0);
4542 } else {
4543 // Pattern match BSTRPICK.
4544 // $dst = and $src, (2**len- 1) , if len > 12
4545 // => BSTRPICK $dst, $src, msb, lsb
4546 // where lsb = 0 and msb = len - 1
4547
4548 // If the mask is <= 0xfff, andi can be used instead.
4549 if (CN->getZExtValue() <= 0xfff)
4550 return SDValue();
4551
4552 // Return if the MSB exceeds.
4553 if (SMIdx + SMLen > ValTy.getSizeInBits())
4554 return SDValue();
4555
4556 if (SMIdx > 0) {
4557 // Omit if the constant has more than 2 uses. This a conservative
4558 // decision. Whether it is a win depends on the HW microarchitecture.
4559 // However it should always be better for 1 and 2 uses.
4560 if (CN->use_size() > 2)
4561 return SDValue();
4562 // Return if the constant can be composed by a single LU12I.W.
4563 if ((CN->getZExtValue() & 0xfff) == 0)
4564 return SDValue();
4565 // Return if the constand can be composed by a single ADDI with
4566 // the zero register.
4567 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4568 return SDValue();
4569 }
4570
4571 lsb = SMIdx;
4572 NewOperand = FirstOperand;
4573 }
4574
4575 msb = lsb + SMLen - 1;
4576 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4577 DAG.getConstant(msb, DL, GRLenVT),
4578 DAG.getConstant(lsb, DL, GRLenVT));
4579 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4580 return NR0;
4581 // Try to optimize to
4582 // bstrpick $Rd, $Rs, msb, lsb
4583 // slli $Rd, $Rd, lsb
4584 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4585 DAG.getConstant(lsb, DL, GRLenVT));
4586}
4587
4590 const LoongArchSubtarget &Subtarget) {
4591 // BSTRPICK requires the 32S feature.
4592 if (!Subtarget.has32S())
4593 return SDValue();
4594
4595 if (DCI.isBeforeLegalizeOps())
4596 return SDValue();
4597
4598 // $dst = srl (and $src, Mask), Shamt
4599 // =>
4600 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4601 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4602 //
4603
4604 SDValue FirstOperand = N->getOperand(0);
4605 ConstantSDNode *CN;
4606 EVT ValTy = N->getValueType(0);
4607 SDLoc DL(N);
4608 MVT GRLenVT = Subtarget.getGRLenVT();
4609 unsigned MaskIdx, MaskLen;
4610 uint64_t Shamt;
4611
4612 // The first operand must be an AND and the second operand of the AND must be
4613 // a shifted mask.
4614 if (FirstOperand.getOpcode() != ISD::AND ||
4615 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4616 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4617 return SDValue();
4618
4619 // The second operand (shift amount) must be an immediate.
4620 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4621 return SDValue();
4622
4623 Shamt = CN->getZExtValue();
4624 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4625 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4626 FirstOperand->getOperand(0),
4627 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4628 DAG.getConstant(Shamt, DL, GRLenVT));
4629
4630 return SDValue();
4631}
4632
4633// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4634// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4635static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4636 unsigned Depth) {
4637 // Limit recursion.
4639 return false;
4640 switch (Src.getOpcode()) {
4641 case ISD::SETCC:
4642 case ISD::TRUNCATE:
4643 return Src.getOperand(0).getValueSizeInBits() == Size;
4644 case ISD::FREEZE:
4645 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4646 case ISD::AND:
4647 case ISD::XOR:
4648 case ISD::OR:
4649 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4650 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4651 case ISD::SELECT:
4652 case ISD::VSELECT:
4653 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4654 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4655 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4656 case ISD::BUILD_VECTOR:
4657 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4658 ISD::isBuildVectorAllOnes(Src.getNode());
4659 }
4660 return false;
4661}
4662
4663// Helper to push sign extension of vXi1 SETCC result through bitops.
4665 SDValue Src, const SDLoc &DL) {
4666 switch (Src.getOpcode()) {
4667 case ISD::SETCC:
4668 case ISD::FREEZE:
4669 case ISD::TRUNCATE:
4670 case ISD::BUILD_VECTOR:
4671 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4672 case ISD::AND:
4673 case ISD::XOR:
4674 case ISD::OR:
4675 return DAG.getNode(
4676 Src.getOpcode(), DL, SExtVT,
4677 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4678 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4679 case ISD::SELECT:
4680 case ISD::VSELECT:
4681 return DAG.getSelect(
4682 DL, SExtVT, Src.getOperand(0),
4683 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4684 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4685 }
4686 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4687}
4688
4689static SDValue
4692 const LoongArchSubtarget &Subtarget) {
4693 SDLoc DL(N);
4694 EVT VT = N->getValueType(0);
4695 SDValue Src = N->getOperand(0);
4696 EVT SrcVT = Src.getValueType();
4697
4698 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4699 return SDValue();
4700
4701 bool UseLASX;
4702 unsigned Opc = ISD::DELETED_NODE;
4703 EVT CmpVT = Src.getOperand(0).getValueType();
4704 EVT EltVT = CmpVT.getVectorElementType();
4705
4706 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4707 UseLASX = false;
4708 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4709 CmpVT.getSizeInBits() == 256)
4710 UseLASX = true;
4711 else
4712 return SDValue();
4713
4714 SDValue SrcN1 = Src.getOperand(1);
4715 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4716 default:
4717 break;
4718 case ISD::SETEQ:
4719 // x == 0 => not (vmsknez.b x)
4720 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4722 break;
4723 case ISD::SETGT:
4724 // x > -1 => vmskgez.b x
4725 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4727 break;
4728 case ISD::SETGE:
4729 // x >= 0 => vmskgez.b x
4730 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4732 break;
4733 case ISD::SETLT:
4734 // x < 0 => vmskltz.{b,h,w,d} x
4735 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4736 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4737 EltVT == MVT::i64))
4739 break;
4740 case ISD::SETLE:
4741 // x <= -1 => vmskltz.{b,h,w,d} x
4742 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4743 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4744 EltVT == MVT::i64))
4746 break;
4747 case ISD::SETNE:
4748 // x != 0 => vmsknez.b x
4749 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4751 break;
4752 }
4753
4754 if (Opc == ISD::DELETED_NODE)
4755 return SDValue();
4756
4757 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4759 V = DAG.getZExtOrTrunc(V, DL, T);
4760 return DAG.getBitcast(VT, V);
4761}
4762
4765 const LoongArchSubtarget &Subtarget) {
4766 SDLoc DL(N);
4767 EVT VT = N->getValueType(0);
4768 SDValue Src = N->getOperand(0);
4769 EVT SrcVT = Src.getValueType();
4770
4771 if (!DCI.isBeforeLegalizeOps())
4772 return SDValue();
4773
4774 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4775 return SDValue();
4776
4777 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4778 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4779 if (Res)
4780 return Res;
4781
4782 // Generate vXi1 using [X]VMSKLTZ
4783 MVT SExtVT;
4784 unsigned Opc;
4785 bool UseLASX = false;
4786 bool PropagateSExt = false;
4787
4788 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4789 EVT CmpVT = Src.getOperand(0).getValueType();
4790 if (CmpVT.getSizeInBits() > 256)
4791 return SDValue();
4792 }
4793
4794 switch (SrcVT.getSimpleVT().SimpleTy) {
4795 default:
4796 return SDValue();
4797 case MVT::v2i1:
4798 SExtVT = MVT::v2i64;
4799 break;
4800 case MVT::v4i1:
4801 SExtVT = MVT::v4i32;
4802 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4803 SExtVT = MVT::v4i64;
4804 UseLASX = true;
4805 PropagateSExt = true;
4806 }
4807 break;
4808 case MVT::v8i1:
4809 SExtVT = MVT::v8i16;
4810 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4811 SExtVT = MVT::v8i32;
4812 UseLASX = true;
4813 PropagateSExt = true;
4814 }
4815 break;
4816 case MVT::v16i1:
4817 SExtVT = MVT::v16i8;
4818 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4819 SExtVT = MVT::v16i16;
4820 UseLASX = true;
4821 PropagateSExt = true;
4822 }
4823 break;
4824 case MVT::v32i1:
4825 SExtVT = MVT::v32i8;
4826 UseLASX = true;
4827 break;
4828 };
4829 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4830 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4831
4832 SDValue V;
4833 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
4834 if (Src.getSimpleValueType() == MVT::v32i8) {
4835 SDValue Lo, Hi;
4836 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
4837 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
4838 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
4839 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
4840 DAG.getConstant(16, DL, MVT::i8));
4841 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
4842 } else if (UseLASX) {
4843 return SDValue();
4844 }
4845 }
4846
4847 if (!V) {
4849 V = DAG.getNode(Opc, DL, MVT::i64, Src);
4850 }
4851
4853 V = DAG.getZExtOrTrunc(V, DL, T);
4854 return DAG.getBitcast(VT, V);
4855}
4856
4859 const LoongArchSubtarget &Subtarget) {
4860 MVT GRLenVT = Subtarget.getGRLenVT();
4861 EVT ValTy = N->getValueType(0);
4862 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4863 ConstantSDNode *CN0, *CN1;
4864 SDLoc DL(N);
4865 unsigned ValBits = ValTy.getSizeInBits();
4866 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4867 unsigned Shamt;
4868 bool SwapAndRetried = false;
4869
4870 // BSTRPICK requires the 32S feature.
4871 if (!Subtarget.has32S())
4872 return SDValue();
4873
4874 if (DCI.isBeforeLegalizeOps())
4875 return SDValue();
4876
4877 if (ValBits != 32 && ValBits != 64)
4878 return SDValue();
4879
4880Retry:
4881 // 1st pattern to match BSTRINS:
4882 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
4883 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
4884 // =>
4885 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4886 if (N0.getOpcode() == ISD::AND &&
4887 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4888 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4889 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
4890 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4891 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4892 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4893 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4894 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4895 (MaskIdx0 + MaskLen0 <= ValBits)) {
4896 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4897 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4898 N1.getOperand(0).getOperand(0),
4899 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4900 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4901 }
4902
4903 // 2nd pattern to match BSTRINS:
4904 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
4905 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
4906 // =>
4907 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4908 if (N0.getOpcode() == ISD::AND &&
4909 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4910 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4911 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
4912 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4913 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4914 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
4915 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
4916 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
4917 (MaskIdx0 + MaskLen0 <= ValBits)) {
4918 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
4919 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4920 N1.getOperand(0).getOperand(0),
4921 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
4922 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4923 }
4924
4925 // 3rd pattern to match BSTRINS:
4926 // R = or (and X, mask0), (and Y, mask1)
4927 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
4928 // =>
4929 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
4930 // where msb = lsb + size - 1
4931 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4932 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4933 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4934 (MaskIdx0 + MaskLen0 <= 64) &&
4935 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
4936 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4937 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
4938 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4939 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
4940 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
4941 DAG.getConstant(ValBits == 32
4942 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4943 : (MaskIdx0 + MaskLen0 - 1),
4944 DL, GRLenVT),
4945 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4946 }
4947
4948 // 4th pattern to match BSTRINS:
4949 // R = or (and X, mask), (shl Y, shamt)
4950 // where mask = (2**shamt - 1)
4951 // =>
4952 // R = BSTRINS X, Y, ValBits - 1, shamt
4953 // where ValBits = 32 or 64
4954 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
4955 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4956 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
4957 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
4958 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
4959 (MaskIdx0 + MaskLen0 <= ValBits)) {
4960 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
4961 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4962 N1.getOperand(0),
4963 DAG.getConstant((ValBits - 1), DL, GRLenVT),
4964 DAG.getConstant(Shamt, DL, GRLenVT));
4965 }
4966
4967 // 5th pattern to match BSTRINS:
4968 // R = or (and X, mask), const
4969 // where ~mask = (2**size - 1) << lsb, mask & const = 0
4970 // =>
4971 // R = BSTRINS X, (const >> lsb), msb, lsb
4972 // where msb = lsb + size - 1
4973 if (N0.getOpcode() == ISD::AND &&
4974 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
4975 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
4976 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
4977 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4978 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
4979 return DAG.getNode(
4980 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
4981 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
4982 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4983 : (MaskIdx0 + MaskLen0 - 1),
4984 DL, GRLenVT),
4985 DAG.getConstant(MaskIdx0, DL, GRLenVT));
4986 }
4987
4988 // 6th pattern.
4989 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
4990 // by the incoming bits are known to be zero.
4991 // =>
4992 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
4993 //
4994 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
4995 // pattern is more common than the 1st. So we put the 1st before the 6th in
4996 // order to match as many nodes as possible.
4997 ConstantSDNode *CNMask, *CNShamt;
4998 unsigned MaskIdx, MaskLen;
4999 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5000 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5001 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5002 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5003 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5004 Shamt = CNShamt->getZExtValue();
5005 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5006 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5007 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5008 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5009 N1.getOperand(0).getOperand(0),
5010 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5011 DAG.getConstant(Shamt, DL, GRLenVT));
5012 }
5013 }
5014
5015 // 7th pattern.
5016 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5017 // overwritten by the incoming bits are known to be zero.
5018 // =>
5019 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5020 //
5021 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5022 // before the 7th in order to match as many nodes as possible.
5023 if (N1.getOpcode() == ISD::AND &&
5024 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5025 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5026 N1.getOperand(0).getOpcode() == ISD::SHL &&
5027 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5028 CNShamt->getZExtValue() == MaskIdx) {
5029 APInt ShMask(ValBits, CNMask->getZExtValue());
5030 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5031 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5032 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5033 N1.getOperand(0).getOperand(0),
5034 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5035 DAG.getConstant(MaskIdx, DL, GRLenVT));
5036 }
5037 }
5038
5039 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5040 if (!SwapAndRetried) {
5041 std::swap(N0, N1);
5042 SwapAndRetried = true;
5043 goto Retry;
5044 }
5045
5046 SwapAndRetried = false;
5047Retry2:
5048 // 8th pattern.
5049 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5050 // the incoming bits are known to be zero.
5051 // =>
5052 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5053 //
5054 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5055 // we put it here in order to match as many nodes as possible or generate less
5056 // instructions.
5057 if (N1.getOpcode() == ISD::AND &&
5058 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5059 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5060 APInt ShMask(ValBits, CNMask->getZExtValue());
5061 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5062 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5063 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5064 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5065 N1->getOperand(0),
5066 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5067 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5068 DAG.getConstant(MaskIdx, DL, GRLenVT));
5069 }
5070 }
5071 // Swap N0/N1 and retry.
5072 if (!SwapAndRetried) {
5073 std::swap(N0, N1);
5074 SwapAndRetried = true;
5075 goto Retry2;
5076 }
5077
5078 return SDValue();
5079}
5080
5081static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5082 ExtType = ISD::NON_EXTLOAD;
5083
5084 switch (V.getNode()->getOpcode()) {
5085 case ISD::LOAD: {
5086 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5087 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5088 (LoadNode->getMemoryVT() == MVT::i16)) {
5089 ExtType = LoadNode->getExtensionType();
5090 return true;
5091 }
5092 return false;
5093 }
5094 case ISD::AssertSext: {
5095 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5096 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5097 ExtType = ISD::SEXTLOAD;
5098 return true;
5099 }
5100 return false;
5101 }
5102 case ISD::AssertZext: {
5103 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5104 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5105 ExtType = ISD::ZEXTLOAD;
5106 return true;
5107 }
5108 return false;
5109 }
5110 default:
5111 return false;
5112 }
5113
5114 return false;
5115}
5116
5117// Eliminate redundant truncation and zero-extension nodes.
5118// * Case 1:
5119// +------------+ +------------+ +------------+
5120// | Input1 | | Input2 | | CC |
5121// +------------+ +------------+ +------------+
5122// | | |
5123// V V +----+
5124// +------------+ +------------+ |
5125// | TRUNCATE | | TRUNCATE | |
5126// +------------+ +------------+ |
5127// | | |
5128// V V |
5129// +------------+ +------------+ |
5130// | ZERO_EXT | | ZERO_EXT | |
5131// +------------+ +------------+ |
5132// | | |
5133// | +-------------+ |
5134// V V | |
5135// +----------------+ | |
5136// | AND | | |
5137// +----------------+ | |
5138// | | |
5139// +---------------+ | |
5140// | | |
5141// V V V
5142// +-------------+
5143// | CMP |
5144// +-------------+
5145// * Case 2:
5146// +------------+ +------------+ +-------------+ +------------+ +------------+
5147// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5148// +------------+ +------------+ +-------------+ +------------+ +------------+
5149// | | | | |
5150// V | | | |
5151// +------------+ | | | |
5152// | XOR |<---------------------+ | |
5153// +------------+ | | |
5154// | | | |
5155// V V +---------------+ |
5156// +------------+ +------------+ | |
5157// | TRUNCATE | | TRUNCATE | | +-------------------------+
5158// +------------+ +------------+ | |
5159// | | | |
5160// V V | |
5161// +------------+ +------------+ | |
5162// | ZERO_EXT | | ZERO_EXT | | |
5163// +------------+ +------------+ | |
5164// | | | |
5165// V V | |
5166// +----------------+ | |
5167// | AND | | |
5168// +----------------+ | |
5169// | | |
5170// +---------------+ | |
5171// | | |
5172// V V V
5173// +-------------+
5174// | CMP |
5175// +-------------+
5178 const LoongArchSubtarget &Subtarget) {
5179 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5180
5181 SDNode *AndNode = N->getOperand(0).getNode();
5182 if (AndNode->getOpcode() != ISD::AND)
5183 return SDValue();
5184
5185 SDValue AndInputValue2 = AndNode->getOperand(1);
5186 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5187 return SDValue();
5188
5189 SDValue CmpInputValue = N->getOperand(1);
5190 SDValue AndInputValue1 = AndNode->getOperand(0);
5191 if (AndInputValue1.getOpcode() == ISD::XOR) {
5192 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5193 return SDValue();
5194 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5195 if (!CN || CN->getSExtValue() != -1)
5196 return SDValue();
5197 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5198 if (!CN || CN->getSExtValue() != 0)
5199 return SDValue();
5200 AndInputValue1 = AndInputValue1.getOperand(0);
5201 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5202 return SDValue();
5203 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5204 if (AndInputValue2 != CmpInputValue)
5205 return SDValue();
5206 } else {
5207 return SDValue();
5208 }
5209
5210 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5211 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5212 return SDValue();
5213
5214 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5215 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5216 return SDValue();
5217
5218 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5219 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5220 ISD::LoadExtType ExtType1;
5221 ISD::LoadExtType ExtType2;
5222
5223 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5224 !checkValueWidth(TruncInputValue2, ExtType2))
5225 return SDValue();
5226
5227 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5228 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5229 return SDValue();
5230
5231 if ((ExtType2 != ISD::ZEXTLOAD) &&
5232 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5233 return SDValue();
5234
5235 // These truncation and zero-extension nodes are not necessary, remove them.
5236 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5237 TruncInputValue1, TruncInputValue2);
5238 SDValue NewSetCC =
5239 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5240 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5241 return SDValue(N, 0);
5242}
5243
5244// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5247 const LoongArchSubtarget &Subtarget) {
5248 if (DCI.isBeforeLegalizeOps())
5249 return SDValue();
5250
5251 SDValue Src = N->getOperand(0);
5252 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5253 return SDValue();
5254
5255 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5256 Src.getOperand(0));
5257}
5258
5259// Perform combines for BR_CC conditions.
5260static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5261 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5262 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5263
5264 // As far as arithmetic right shift always saves the sign,
5265 // shift can be omitted.
5266 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5267 // setge (sra X, N), 0 -> setge X, 0
5268 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5269 LHS.getOpcode() == ISD::SRA) {
5270 LHS = LHS.getOperand(0);
5271 return true;
5272 }
5273
5274 if (!ISD::isIntEqualitySetCC(CCVal))
5275 return false;
5276
5277 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5278 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5279 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5280 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5281 // If we're looking for eq 0 instead of ne 0, we need to invert the
5282 // condition.
5283 bool Invert = CCVal == ISD::SETEQ;
5284 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5285 if (Invert)
5286 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5287
5288 RHS = LHS.getOperand(1);
5289 LHS = LHS.getOperand(0);
5290 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5291
5292 CC = DAG.getCondCode(CCVal);
5293 return true;
5294 }
5295
5296 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5297 // This can occur when legalizing some floating point comparisons.
5298 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5299 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5300 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5301 CC = DAG.getCondCode(CCVal);
5302 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5303 return true;
5304 }
5305
5306 return false;
5307}
5308
5311 const LoongArchSubtarget &Subtarget) {
5312 SDValue LHS = N->getOperand(1);
5313 SDValue RHS = N->getOperand(2);
5314 SDValue CC = N->getOperand(3);
5315 SDLoc DL(N);
5316
5317 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5318 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5319 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5320
5321 return SDValue();
5322}
5323
5324template <unsigned N>
5326 SelectionDAG &DAG,
5327 const LoongArchSubtarget &Subtarget,
5328 bool IsSigned = false) {
5329 SDLoc DL(Node);
5330 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5331 // Check the ImmArg.
5332 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5333 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5334 DAG.getContext()->emitError(Node->getOperationName(0) +
5335 ": argument out of range.");
5336 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5337 }
5338 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5339}
5340
5341template <unsigned N>
5342static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5343 SelectionDAG &DAG, bool IsSigned = false) {
5344 SDLoc DL(Node);
5345 EVT ResTy = Node->getValueType(0);
5346 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5347
5348 // Check the ImmArg.
5349 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5350 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5351 DAG.getContext()->emitError(Node->getOperationName(0) +
5352 ": argument out of range.");
5353 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5354 }
5355 return DAG.getConstant(
5357 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5358 DL, ResTy);
5359}
5360
5362 SDLoc DL(Node);
5363 EVT ResTy = Node->getValueType(0);
5364 SDValue Vec = Node->getOperand(2);
5365 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5366 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5367}
5368
5370 SDLoc DL(Node);
5371 EVT ResTy = Node->getValueType(0);
5372 SDValue One = DAG.getConstant(1, DL, ResTy);
5373 SDValue Bit =
5374 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5375
5376 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5377 DAG.getNOT(DL, Bit, ResTy));
5378}
5379
5380template <unsigned N>
5382 SDLoc DL(Node);
5383 EVT ResTy = Node->getValueType(0);
5384 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5385 // Check the unsigned ImmArg.
5386 if (!isUInt<N>(CImm->getZExtValue())) {
5387 DAG.getContext()->emitError(Node->getOperationName(0) +
5388 ": argument out of range.");
5389 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5390 }
5391
5392 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5393 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5394
5395 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5396}
5397
5398template <unsigned N>
5400 SDLoc DL(Node);
5401 EVT ResTy = Node->getValueType(0);
5402 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5403 // Check the unsigned ImmArg.
5404 if (!isUInt<N>(CImm->getZExtValue())) {
5405 DAG.getContext()->emitError(Node->getOperationName(0) +
5406 ": argument out of range.");
5407 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5408 }
5409
5410 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5411 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5412 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5413}
5414
5415template <unsigned N>
5417 SDLoc DL(Node);
5418 EVT ResTy = Node->getValueType(0);
5419 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5420 // Check the unsigned ImmArg.
5421 if (!isUInt<N>(CImm->getZExtValue())) {
5422 DAG.getContext()->emitError(Node->getOperationName(0) +
5423 ": argument out of range.");
5424 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5425 }
5426
5427 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5428 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5429 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5430}
5431
5432static SDValue
5435 const LoongArchSubtarget &Subtarget) {
5436 SDLoc DL(N);
5437 switch (N->getConstantOperandVal(0)) {
5438 default:
5439 break;
5440 case Intrinsic::loongarch_lsx_vadd_b:
5441 case Intrinsic::loongarch_lsx_vadd_h:
5442 case Intrinsic::loongarch_lsx_vadd_w:
5443 case Intrinsic::loongarch_lsx_vadd_d:
5444 case Intrinsic::loongarch_lasx_xvadd_b:
5445 case Intrinsic::loongarch_lasx_xvadd_h:
5446 case Intrinsic::loongarch_lasx_xvadd_w:
5447 case Intrinsic::loongarch_lasx_xvadd_d:
5448 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5449 N->getOperand(2));
5450 case Intrinsic::loongarch_lsx_vaddi_bu:
5451 case Intrinsic::loongarch_lsx_vaddi_hu:
5452 case Intrinsic::loongarch_lsx_vaddi_wu:
5453 case Intrinsic::loongarch_lsx_vaddi_du:
5454 case Intrinsic::loongarch_lasx_xvaddi_bu:
5455 case Intrinsic::loongarch_lasx_xvaddi_hu:
5456 case Intrinsic::loongarch_lasx_xvaddi_wu:
5457 case Intrinsic::loongarch_lasx_xvaddi_du:
5458 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5459 lowerVectorSplatImm<5>(N, 2, DAG));
5460 case Intrinsic::loongarch_lsx_vsub_b:
5461 case Intrinsic::loongarch_lsx_vsub_h:
5462 case Intrinsic::loongarch_lsx_vsub_w:
5463 case Intrinsic::loongarch_lsx_vsub_d:
5464 case Intrinsic::loongarch_lasx_xvsub_b:
5465 case Intrinsic::loongarch_lasx_xvsub_h:
5466 case Intrinsic::loongarch_lasx_xvsub_w:
5467 case Intrinsic::loongarch_lasx_xvsub_d:
5468 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5469 N->getOperand(2));
5470 case Intrinsic::loongarch_lsx_vsubi_bu:
5471 case Intrinsic::loongarch_lsx_vsubi_hu:
5472 case Intrinsic::loongarch_lsx_vsubi_wu:
5473 case Intrinsic::loongarch_lsx_vsubi_du:
5474 case Intrinsic::loongarch_lasx_xvsubi_bu:
5475 case Intrinsic::loongarch_lasx_xvsubi_hu:
5476 case Intrinsic::loongarch_lasx_xvsubi_wu:
5477 case Intrinsic::loongarch_lasx_xvsubi_du:
5478 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5479 lowerVectorSplatImm<5>(N, 2, DAG));
5480 case Intrinsic::loongarch_lsx_vneg_b:
5481 case Intrinsic::loongarch_lsx_vneg_h:
5482 case Intrinsic::loongarch_lsx_vneg_w:
5483 case Intrinsic::loongarch_lsx_vneg_d:
5484 case Intrinsic::loongarch_lasx_xvneg_b:
5485 case Intrinsic::loongarch_lasx_xvneg_h:
5486 case Intrinsic::loongarch_lasx_xvneg_w:
5487 case Intrinsic::loongarch_lasx_xvneg_d:
5488 return DAG.getNode(
5489 ISD::SUB, DL, N->getValueType(0),
5490 DAG.getConstant(
5491 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5492 /*isSigned=*/true),
5493 SDLoc(N), N->getValueType(0)),
5494 N->getOperand(1));
5495 case Intrinsic::loongarch_lsx_vmax_b:
5496 case Intrinsic::loongarch_lsx_vmax_h:
5497 case Intrinsic::loongarch_lsx_vmax_w:
5498 case Intrinsic::loongarch_lsx_vmax_d:
5499 case Intrinsic::loongarch_lasx_xvmax_b:
5500 case Intrinsic::loongarch_lasx_xvmax_h:
5501 case Intrinsic::loongarch_lasx_xvmax_w:
5502 case Intrinsic::loongarch_lasx_xvmax_d:
5503 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5504 N->getOperand(2));
5505 case Intrinsic::loongarch_lsx_vmax_bu:
5506 case Intrinsic::loongarch_lsx_vmax_hu:
5507 case Intrinsic::loongarch_lsx_vmax_wu:
5508 case Intrinsic::loongarch_lsx_vmax_du:
5509 case Intrinsic::loongarch_lasx_xvmax_bu:
5510 case Intrinsic::loongarch_lasx_xvmax_hu:
5511 case Intrinsic::loongarch_lasx_xvmax_wu:
5512 case Intrinsic::loongarch_lasx_xvmax_du:
5513 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5514 N->getOperand(2));
5515 case Intrinsic::loongarch_lsx_vmaxi_b:
5516 case Intrinsic::loongarch_lsx_vmaxi_h:
5517 case Intrinsic::loongarch_lsx_vmaxi_w:
5518 case Intrinsic::loongarch_lsx_vmaxi_d:
5519 case Intrinsic::loongarch_lasx_xvmaxi_b:
5520 case Intrinsic::loongarch_lasx_xvmaxi_h:
5521 case Intrinsic::loongarch_lasx_xvmaxi_w:
5522 case Intrinsic::loongarch_lasx_xvmaxi_d:
5523 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5524 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5525 case Intrinsic::loongarch_lsx_vmaxi_bu:
5526 case Intrinsic::loongarch_lsx_vmaxi_hu:
5527 case Intrinsic::loongarch_lsx_vmaxi_wu:
5528 case Intrinsic::loongarch_lsx_vmaxi_du:
5529 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5530 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5531 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5532 case Intrinsic::loongarch_lasx_xvmaxi_du:
5533 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5534 lowerVectorSplatImm<5>(N, 2, DAG));
5535 case Intrinsic::loongarch_lsx_vmin_b:
5536 case Intrinsic::loongarch_lsx_vmin_h:
5537 case Intrinsic::loongarch_lsx_vmin_w:
5538 case Intrinsic::loongarch_lsx_vmin_d:
5539 case Intrinsic::loongarch_lasx_xvmin_b:
5540 case Intrinsic::loongarch_lasx_xvmin_h:
5541 case Intrinsic::loongarch_lasx_xvmin_w:
5542 case Intrinsic::loongarch_lasx_xvmin_d:
5543 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5544 N->getOperand(2));
5545 case Intrinsic::loongarch_lsx_vmin_bu:
5546 case Intrinsic::loongarch_lsx_vmin_hu:
5547 case Intrinsic::loongarch_lsx_vmin_wu:
5548 case Intrinsic::loongarch_lsx_vmin_du:
5549 case Intrinsic::loongarch_lasx_xvmin_bu:
5550 case Intrinsic::loongarch_lasx_xvmin_hu:
5551 case Intrinsic::loongarch_lasx_xvmin_wu:
5552 case Intrinsic::loongarch_lasx_xvmin_du:
5553 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5554 N->getOperand(2));
5555 case Intrinsic::loongarch_lsx_vmini_b:
5556 case Intrinsic::loongarch_lsx_vmini_h:
5557 case Intrinsic::loongarch_lsx_vmini_w:
5558 case Intrinsic::loongarch_lsx_vmini_d:
5559 case Intrinsic::loongarch_lasx_xvmini_b:
5560 case Intrinsic::loongarch_lasx_xvmini_h:
5561 case Intrinsic::loongarch_lasx_xvmini_w:
5562 case Intrinsic::loongarch_lasx_xvmini_d:
5563 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5564 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5565 case Intrinsic::loongarch_lsx_vmini_bu:
5566 case Intrinsic::loongarch_lsx_vmini_hu:
5567 case Intrinsic::loongarch_lsx_vmini_wu:
5568 case Intrinsic::loongarch_lsx_vmini_du:
5569 case Intrinsic::loongarch_lasx_xvmini_bu:
5570 case Intrinsic::loongarch_lasx_xvmini_hu:
5571 case Intrinsic::loongarch_lasx_xvmini_wu:
5572 case Intrinsic::loongarch_lasx_xvmini_du:
5573 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5574 lowerVectorSplatImm<5>(N, 2, DAG));
5575 case Intrinsic::loongarch_lsx_vmul_b:
5576 case Intrinsic::loongarch_lsx_vmul_h:
5577 case Intrinsic::loongarch_lsx_vmul_w:
5578 case Intrinsic::loongarch_lsx_vmul_d:
5579 case Intrinsic::loongarch_lasx_xvmul_b:
5580 case Intrinsic::loongarch_lasx_xvmul_h:
5581 case Intrinsic::loongarch_lasx_xvmul_w:
5582 case Intrinsic::loongarch_lasx_xvmul_d:
5583 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5584 N->getOperand(2));
5585 case Intrinsic::loongarch_lsx_vmadd_b:
5586 case Intrinsic::loongarch_lsx_vmadd_h:
5587 case Intrinsic::loongarch_lsx_vmadd_w:
5588 case Intrinsic::loongarch_lsx_vmadd_d:
5589 case Intrinsic::loongarch_lasx_xvmadd_b:
5590 case Intrinsic::loongarch_lasx_xvmadd_h:
5591 case Intrinsic::loongarch_lasx_xvmadd_w:
5592 case Intrinsic::loongarch_lasx_xvmadd_d: {
5593 EVT ResTy = N->getValueType(0);
5594 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5595 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5596 N->getOperand(3)));
5597 }
5598 case Intrinsic::loongarch_lsx_vmsub_b:
5599 case Intrinsic::loongarch_lsx_vmsub_h:
5600 case Intrinsic::loongarch_lsx_vmsub_w:
5601 case Intrinsic::loongarch_lsx_vmsub_d:
5602 case Intrinsic::loongarch_lasx_xvmsub_b:
5603 case Intrinsic::loongarch_lasx_xvmsub_h:
5604 case Intrinsic::loongarch_lasx_xvmsub_w:
5605 case Intrinsic::loongarch_lasx_xvmsub_d: {
5606 EVT ResTy = N->getValueType(0);
5607 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5608 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5609 N->getOperand(3)));
5610 }
5611 case Intrinsic::loongarch_lsx_vdiv_b:
5612 case Intrinsic::loongarch_lsx_vdiv_h:
5613 case Intrinsic::loongarch_lsx_vdiv_w:
5614 case Intrinsic::loongarch_lsx_vdiv_d:
5615 case Intrinsic::loongarch_lasx_xvdiv_b:
5616 case Intrinsic::loongarch_lasx_xvdiv_h:
5617 case Intrinsic::loongarch_lasx_xvdiv_w:
5618 case Intrinsic::loongarch_lasx_xvdiv_d:
5619 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5620 N->getOperand(2));
5621 case Intrinsic::loongarch_lsx_vdiv_bu:
5622 case Intrinsic::loongarch_lsx_vdiv_hu:
5623 case Intrinsic::loongarch_lsx_vdiv_wu:
5624 case Intrinsic::loongarch_lsx_vdiv_du:
5625 case Intrinsic::loongarch_lasx_xvdiv_bu:
5626 case Intrinsic::loongarch_lasx_xvdiv_hu:
5627 case Intrinsic::loongarch_lasx_xvdiv_wu:
5628 case Intrinsic::loongarch_lasx_xvdiv_du:
5629 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5630 N->getOperand(2));
5631 case Intrinsic::loongarch_lsx_vmod_b:
5632 case Intrinsic::loongarch_lsx_vmod_h:
5633 case Intrinsic::loongarch_lsx_vmod_w:
5634 case Intrinsic::loongarch_lsx_vmod_d:
5635 case Intrinsic::loongarch_lasx_xvmod_b:
5636 case Intrinsic::loongarch_lasx_xvmod_h:
5637 case Intrinsic::loongarch_lasx_xvmod_w:
5638 case Intrinsic::loongarch_lasx_xvmod_d:
5639 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5640 N->getOperand(2));
5641 case Intrinsic::loongarch_lsx_vmod_bu:
5642 case Intrinsic::loongarch_lsx_vmod_hu:
5643 case Intrinsic::loongarch_lsx_vmod_wu:
5644 case Intrinsic::loongarch_lsx_vmod_du:
5645 case Intrinsic::loongarch_lasx_xvmod_bu:
5646 case Intrinsic::loongarch_lasx_xvmod_hu:
5647 case Intrinsic::loongarch_lasx_xvmod_wu:
5648 case Intrinsic::loongarch_lasx_xvmod_du:
5649 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5650 N->getOperand(2));
5651 case Intrinsic::loongarch_lsx_vand_v:
5652 case Intrinsic::loongarch_lasx_xvand_v:
5653 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5654 N->getOperand(2));
5655 case Intrinsic::loongarch_lsx_vor_v:
5656 case Intrinsic::loongarch_lasx_xvor_v:
5657 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5658 N->getOperand(2));
5659 case Intrinsic::loongarch_lsx_vxor_v:
5660 case Intrinsic::loongarch_lasx_xvxor_v:
5661 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5662 N->getOperand(2));
5663 case Intrinsic::loongarch_lsx_vnor_v:
5664 case Intrinsic::loongarch_lasx_xvnor_v: {
5665 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5666 N->getOperand(2));
5667 return DAG.getNOT(DL, Res, Res->getValueType(0));
5668 }
5669 case Intrinsic::loongarch_lsx_vandi_b:
5670 case Intrinsic::loongarch_lasx_xvandi_b:
5671 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5672 lowerVectorSplatImm<8>(N, 2, DAG));
5673 case Intrinsic::loongarch_lsx_vori_b:
5674 case Intrinsic::loongarch_lasx_xvori_b:
5675 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5676 lowerVectorSplatImm<8>(N, 2, DAG));
5677 case Intrinsic::loongarch_lsx_vxori_b:
5678 case Intrinsic::loongarch_lasx_xvxori_b:
5679 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5680 lowerVectorSplatImm<8>(N, 2, DAG));
5681 case Intrinsic::loongarch_lsx_vsll_b:
5682 case Intrinsic::loongarch_lsx_vsll_h:
5683 case Intrinsic::loongarch_lsx_vsll_w:
5684 case Intrinsic::loongarch_lsx_vsll_d:
5685 case Intrinsic::loongarch_lasx_xvsll_b:
5686 case Intrinsic::loongarch_lasx_xvsll_h:
5687 case Intrinsic::loongarch_lasx_xvsll_w:
5688 case Intrinsic::loongarch_lasx_xvsll_d:
5689 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5690 truncateVecElts(N, DAG));
5691 case Intrinsic::loongarch_lsx_vslli_b:
5692 case Intrinsic::loongarch_lasx_xvslli_b:
5693 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5694 lowerVectorSplatImm<3>(N, 2, DAG));
5695 case Intrinsic::loongarch_lsx_vslli_h:
5696 case Intrinsic::loongarch_lasx_xvslli_h:
5697 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5698 lowerVectorSplatImm<4>(N, 2, DAG));
5699 case Intrinsic::loongarch_lsx_vslli_w:
5700 case Intrinsic::loongarch_lasx_xvslli_w:
5701 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5702 lowerVectorSplatImm<5>(N, 2, DAG));
5703 case Intrinsic::loongarch_lsx_vslli_d:
5704 case Intrinsic::loongarch_lasx_xvslli_d:
5705 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5706 lowerVectorSplatImm<6>(N, 2, DAG));
5707 case Intrinsic::loongarch_lsx_vsrl_b:
5708 case Intrinsic::loongarch_lsx_vsrl_h:
5709 case Intrinsic::loongarch_lsx_vsrl_w:
5710 case Intrinsic::loongarch_lsx_vsrl_d:
5711 case Intrinsic::loongarch_lasx_xvsrl_b:
5712 case Intrinsic::loongarch_lasx_xvsrl_h:
5713 case Intrinsic::loongarch_lasx_xvsrl_w:
5714 case Intrinsic::loongarch_lasx_xvsrl_d:
5715 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5716 truncateVecElts(N, DAG));
5717 case Intrinsic::loongarch_lsx_vsrli_b:
5718 case Intrinsic::loongarch_lasx_xvsrli_b:
5719 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5720 lowerVectorSplatImm<3>(N, 2, DAG));
5721 case Intrinsic::loongarch_lsx_vsrli_h:
5722 case Intrinsic::loongarch_lasx_xvsrli_h:
5723 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5724 lowerVectorSplatImm<4>(N, 2, DAG));
5725 case Intrinsic::loongarch_lsx_vsrli_w:
5726 case Intrinsic::loongarch_lasx_xvsrli_w:
5727 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5728 lowerVectorSplatImm<5>(N, 2, DAG));
5729 case Intrinsic::loongarch_lsx_vsrli_d:
5730 case Intrinsic::loongarch_lasx_xvsrli_d:
5731 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5732 lowerVectorSplatImm<6>(N, 2, DAG));
5733 case Intrinsic::loongarch_lsx_vsra_b:
5734 case Intrinsic::loongarch_lsx_vsra_h:
5735 case Intrinsic::loongarch_lsx_vsra_w:
5736 case Intrinsic::loongarch_lsx_vsra_d:
5737 case Intrinsic::loongarch_lasx_xvsra_b:
5738 case Intrinsic::loongarch_lasx_xvsra_h:
5739 case Intrinsic::loongarch_lasx_xvsra_w:
5740 case Intrinsic::loongarch_lasx_xvsra_d:
5741 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5742 truncateVecElts(N, DAG));
5743 case Intrinsic::loongarch_lsx_vsrai_b:
5744 case Intrinsic::loongarch_lasx_xvsrai_b:
5745 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5746 lowerVectorSplatImm<3>(N, 2, DAG));
5747 case Intrinsic::loongarch_lsx_vsrai_h:
5748 case Intrinsic::loongarch_lasx_xvsrai_h:
5749 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5750 lowerVectorSplatImm<4>(N, 2, DAG));
5751 case Intrinsic::loongarch_lsx_vsrai_w:
5752 case Intrinsic::loongarch_lasx_xvsrai_w:
5753 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5754 lowerVectorSplatImm<5>(N, 2, DAG));
5755 case Intrinsic::loongarch_lsx_vsrai_d:
5756 case Intrinsic::loongarch_lasx_xvsrai_d:
5757 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5758 lowerVectorSplatImm<6>(N, 2, DAG));
5759 case Intrinsic::loongarch_lsx_vclz_b:
5760 case Intrinsic::loongarch_lsx_vclz_h:
5761 case Intrinsic::loongarch_lsx_vclz_w:
5762 case Intrinsic::loongarch_lsx_vclz_d:
5763 case Intrinsic::loongarch_lasx_xvclz_b:
5764 case Intrinsic::loongarch_lasx_xvclz_h:
5765 case Intrinsic::loongarch_lasx_xvclz_w:
5766 case Intrinsic::loongarch_lasx_xvclz_d:
5767 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
5768 case Intrinsic::loongarch_lsx_vpcnt_b:
5769 case Intrinsic::loongarch_lsx_vpcnt_h:
5770 case Intrinsic::loongarch_lsx_vpcnt_w:
5771 case Intrinsic::loongarch_lsx_vpcnt_d:
5772 case Intrinsic::loongarch_lasx_xvpcnt_b:
5773 case Intrinsic::loongarch_lasx_xvpcnt_h:
5774 case Intrinsic::loongarch_lasx_xvpcnt_w:
5775 case Intrinsic::loongarch_lasx_xvpcnt_d:
5776 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
5777 case Intrinsic::loongarch_lsx_vbitclr_b:
5778 case Intrinsic::loongarch_lsx_vbitclr_h:
5779 case Intrinsic::loongarch_lsx_vbitclr_w:
5780 case Intrinsic::loongarch_lsx_vbitclr_d:
5781 case Intrinsic::loongarch_lasx_xvbitclr_b:
5782 case Intrinsic::loongarch_lasx_xvbitclr_h:
5783 case Intrinsic::loongarch_lasx_xvbitclr_w:
5784 case Intrinsic::loongarch_lasx_xvbitclr_d:
5785 return lowerVectorBitClear(N, DAG);
5786 case Intrinsic::loongarch_lsx_vbitclri_b:
5787 case Intrinsic::loongarch_lasx_xvbitclri_b:
5788 return lowerVectorBitClearImm<3>(N, DAG);
5789 case Intrinsic::loongarch_lsx_vbitclri_h:
5790 case Intrinsic::loongarch_lasx_xvbitclri_h:
5791 return lowerVectorBitClearImm<4>(N, DAG);
5792 case Intrinsic::loongarch_lsx_vbitclri_w:
5793 case Intrinsic::loongarch_lasx_xvbitclri_w:
5794 return lowerVectorBitClearImm<5>(N, DAG);
5795 case Intrinsic::loongarch_lsx_vbitclri_d:
5796 case Intrinsic::loongarch_lasx_xvbitclri_d:
5797 return lowerVectorBitClearImm<6>(N, DAG);
5798 case Intrinsic::loongarch_lsx_vbitset_b:
5799 case Intrinsic::loongarch_lsx_vbitset_h:
5800 case Intrinsic::loongarch_lsx_vbitset_w:
5801 case Intrinsic::loongarch_lsx_vbitset_d:
5802 case Intrinsic::loongarch_lasx_xvbitset_b:
5803 case Intrinsic::loongarch_lasx_xvbitset_h:
5804 case Intrinsic::loongarch_lasx_xvbitset_w:
5805 case Intrinsic::loongarch_lasx_xvbitset_d: {
5806 EVT VecTy = N->getValueType(0);
5807 SDValue One = DAG.getConstant(1, DL, VecTy);
5808 return DAG.getNode(
5809 ISD::OR, DL, VecTy, N->getOperand(1),
5810 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5811 }
5812 case Intrinsic::loongarch_lsx_vbitseti_b:
5813 case Intrinsic::loongarch_lasx_xvbitseti_b:
5814 return lowerVectorBitSetImm<3>(N, DAG);
5815 case Intrinsic::loongarch_lsx_vbitseti_h:
5816 case Intrinsic::loongarch_lasx_xvbitseti_h:
5817 return lowerVectorBitSetImm<4>(N, DAG);
5818 case Intrinsic::loongarch_lsx_vbitseti_w:
5819 case Intrinsic::loongarch_lasx_xvbitseti_w:
5820 return lowerVectorBitSetImm<5>(N, DAG);
5821 case Intrinsic::loongarch_lsx_vbitseti_d:
5822 case Intrinsic::loongarch_lasx_xvbitseti_d:
5823 return lowerVectorBitSetImm<6>(N, DAG);
5824 case Intrinsic::loongarch_lsx_vbitrev_b:
5825 case Intrinsic::loongarch_lsx_vbitrev_h:
5826 case Intrinsic::loongarch_lsx_vbitrev_w:
5827 case Intrinsic::loongarch_lsx_vbitrev_d:
5828 case Intrinsic::loongarch_lasx_xvbitrev_b:
5829 case Intrinsic::loongarch_lasx_xvbitrev_h:
5830 case Intrinsic::loongarch_lasx_xvbitrev_w:
5831 case Intrinsic::loongarch_lasx_xvbitrev_d: {
5832 EVT VecTy = N->getValueType(0);
5833 SDValue One = DAG.getConstant(1, DL, VecTy);
5834 return DAG.getNode(
5835 ISD::XOR, DL, VecTy, N->getOperand(1),
5836 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
5837 }
5838 case Intrinsic::loongarch_lsx_vbitrevi_b:
5839 case Intrinsic::loongarch_lasx_xvbitrevi_b:
5840 return lowerVectorBitRevImm<3>(N, DAG);
5841 case Intrinsic::loongarch_lsx_vbitrevi_h:
5842 case Intrinsic::loongarch_lasx_xvbitrevi_h:
5843 return lowerVectorBitRevImm<4>(N, DAG);
5844 case Intrinsic::loongarch_lsx_vbitrevi_w:
5845 case Intrinsic::loongarch_lasx_xvbitrevi_w:
5846 return lowerVectorBitRevImm<5>(N, DAG);
5847 case Intrinsic::loongarch_lsx_vbitrevi_d:
5848 case Intrinsic::loongarch_lasx_xvbitrevi_d:
5849 return lowerVectorBitRevImm<6>(N, DAG);
5850 case Intrinsic::loongarch_lsx_vfadd_s:
5851 case Intrinsic::loongarch_lsx_vfadd_d:
5852 case Intrinsic::loongarch_lasx_xvfadd_s:
5853 case Intrinsic::loongarch_lasx_xvfadd_d:
5854 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
5855 N->getOperand(2));
5856 case Intrinsic::loongarch_lsx_vfsub_s:
5857 case Intrinsic::loongarch_lsx_vfsub_d:
5858 case Intrinsic::loongarch_lasx_xvfsub_s:
5859 case Intrinsic::loongarch_lasx_xvfsub_d:
5860 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
5861 N->getOperand(2));
5862 case Intrinsic::loongarch_lsx_vfmul_s:
5863 case Intrinsic::loongarch_lsx_vfmul_d:
5864 case Intrinsic::loongarch_lasx_xvfmul_s:
5865 case Intrinsic::loongarch_lasx_xvfmul_d:
5866 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
5867 N->getOperand(2));
5868 case Intrinsic::loongarch_lsx_vfdiv_s:
5869 case Intrinsic::loongarch_lsx_vfdiv_d:
5870 case Intrinsic::loongarch_lasx_xvfdiv_s:
5871 case Intrinsic::loongarch_lasx_xvfdiv_d:
5872 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
5873 N->getOperand(2));
5874 case Intrinsic::loongarch_lsx_vfmadd_s:
5875 case Intrinsic::loongarch_lsx_vfmadd_d:
5876 case Intrinsic::loongarch_lasx_xvfmadd_s:
5877 case Intrinsic::loongarch_lasx_xvfmadd_d:
5878 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
5879 N->getOperand(2), N->getOperand(3));
5880 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
5881 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5882 N->getOperand(1), N->getOperand(2),
5883 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
5884 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
5885 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
5886 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5887 N->getOperand(1), N->getOperand(2),
5888 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
5889 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
5890 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
5891 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5892 N->getOperand(1), N->getOperand(2),
5893 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
5894 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
5895 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
5896 N->getOperand(1), N->getOperand(2),
5897 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
5898 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
5899 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
5900 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
5901 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
5902 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
5903 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
5904 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
5905 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
5906 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
5907 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5908 N->getOperand(1)));
5909 case Intrinsic::loongarch_lsx_vreplve_b:
5910 case Intrinsic::loongarch_lsx_vreplve_h:
5911 case Intrinsic::loongarch_lsx_vreplve_w:
5912 case Intrinsic::loongarch_lsx_vreplve_d:
5913 case Intrinsic::loongarch_lasx_xvreplve_b:
5914 case Intrinsic::loongarch_lasx_xvreplve_h:
5915 case Intrinsic::loongarch_lasx_xvreplve_w:
5916 case Intrinsic::loongarch_lasx_xvreplve_d:
5917 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
5918 N->getOperand(1),
5919 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
5920 N->getOperand(2)));
5921 }
5922 return SDValue();
5923}
5924
5927 const LoongArchSubtarget &Subtarget) {
5928 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
5929 // conversion is unnecessary and can be replaced with the
5930 // MOVFR2GR_S_LA64 operand.
5931 SDValue Op0 = N->getOperand(0);
5933 return Op0.getOperand(0);
5934 return SDValue();
5935}
5936
5939 const LoongArchSubtarget &Subtarget) {
5940 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
5941 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
5942 // operand.
5943 SDValue Op0 = N->getOperand(0);
5945 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
5946 "Unexpected value type!");
5947 return Op0.getOperand(0);
5948 }
5949 return SDValue();
5950}
5951
5954 const LoongArchSubtarget &Subtarget) {
5955 MVT VT = N->getSimpleValueType(0);
5956 unsigned NumBits = VT.getScalarSizeInBits();
5957
5958 // Simplify the inputs.
5959 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5960 APInt DemandedMask(APInt::getAllOnes(NumBits));
5961 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
5962 return SDValue(N, 0);
5963
5964 return SDValue();
5965}
5966
5967static SDValue
5970 const LoongArchSubtarget &Subtarget) {
5971 SDValue Op0 = N->getOperand(0);
5972 SDLoc DL(N);
5973
5974 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
5975 // redundant. Instead, use BuildPairF64's operands directly.
5977 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
5978
5979 if (Op0->isUndef()) {
5980 SDValue Lo = DAG.getUNDEF(MVT::i32);
5981 SDValue Hi = DAG.getUNDEF(MVT::i32);
5982 return DCI.CombineTo(N, Lo, Hi);
5983 }
5984
5985 // It's cheaper to materialise two 32-bit integers than to load a double
5986 // from the constant pool and transfer it to integer registers through the
5987 // stack.
5988 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
5989 APInt V = C->getValueAPF().bitcastToAPInt();
5990 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
5991 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
5992 return DCI.CombineTo(N, Lo, Hi);
5993 }
5994
5995 return SDValue();
5996}
5997
5999 DAGCombinerInfo &DCI) const {
6000 SelectionDAG &DAG = DCI.DAG;
6001 switch (N->getOpcode()) {
6002 default:
6003 break;
6004 case ISD::AND:
6005 return performANDCombine(N, DAG, DCI, Subtarget);
6006 case ISD::OR:
6007 return performORCombine(N, DAG, DCI, Subtarget);
6008 case ISD::SETCC:
6009 return performSETCCCombine(N, DAG, DCI, Subtarget);
6010 case ISD::SRL:
6011 return performSRLCombine(N, DAG, DCI, Subtarget);
6012 case ISD::BITCAST:
6013 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6015 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6017 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6019 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6021 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6023 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6026 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6028 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6029 }
6030 return SDValue();
6031}
6032
6035 if (!ZeroDivCheck)
6036 return MBB;
6037
6038 // Build instructions:
6039 // MBB:
6040 // div(or mod) $dst, $dividend, $divisor
6041 // bne $divisor, $zero, SinkMBB
6042 // BreakMBB:
6043 // break 7 // BRK_DIVZERO
6044 // SinkMBB:
6045 // fallthrough
6046 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6048 MachineFunction *MF = MBB->getParent();
6049 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6050 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6051 MF->insert(It, BreakMBB);
6052 MF->insert(It, SinkMBB);
6053
6054 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6055 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6056 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6057
6058 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6059 DebugLoc DL = MI.getDebugLoc();
6060 MachineOperand &Divisor = MI.getOperand(2);
6061 Register DivisorReg = Divisor.getReg();
6062
6063 // MBB:
6064 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6065 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6066 .addReg(LoongArch::R0)
6067 .addMBB(SinkMBB);
6068 MBB->addSuccessor(BreakMBB);
6069 MBB->addSuccessor(SinkMBB);
6070
6071 // BreakMBB:
6072 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6073 // definition of BRK_DIVZERO.
6074 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6075 BreakMBB->addSuccessor(SinkMBB);
6076
6077 // Clear Divisor's kill flag.
6078 Divisor.setIsKill(false);
6079
6080 return SinkMBB;
6081}
6082
6083static MachineBasicBlock *
6085 const LoongArchSubtarget &Subtarget) {
6086 unsigned CondOpc;
6087 switch (MI.getOpcode()) {
6088 default:
6089 llvm_unreachable("Unexpected opcode");
6090 case LoongArch::PseudoVBZ:
6091 CondOpc = LoongArch::VSETEQZ_V;
6092 break;
6093 case LoongArch::PseudoVBZ_B:
6094 CondOpc = LoongArch::VSETANYEQZ_B;
6095 break;
6096 case LoongArch::PseudoVBZ_H:
6097 CondOpc = LoongArch::VSETANYEQZ_H;
6098 break;
6099 case LoongArch::PseudoVBZ_W:
6100 CondOpc = LoongArch::VSETANYEQZ_W;
6101 break;
6102 case LoongArch::PseudoVBZ_D:
6103 CondOpc = LoongArch::VSETANYEQZ_D;
6104 break;
6105 case LoongArch::PseudoVBNZ:
6106 CondOpc = LoongArch::VSETNEZ_V;
6107 break;
6108 case LoongArch::PseudoVBNZ_B:
6109 CondOpc = LoongArch::VSETALLNEZ_B;
6110 break;
6111 case LoongArch::PseudoVBNZ_H:
6112 CondOpc = LoongArch::VSETALLNEZ_H;
6113 break;
6114 case LoongArch::PseudoVBNZ_W:
6115 CondOpc = LoongArch::VSETALLNEZ_W;
6116 break;
6117 case LoongArch::PseudoVBNZ_D:
6118 CondOpc = LoongArch::VSETALLNEZ_D;
6119 break;
6120 case LoongArch::PseudoXVBZ:
6121 CondOpc = LoongArch::XVSETEQZ_V;
6122 break;
6123 case LoongArch::PseudoXVBZ_B:
6124 CondOpc = LoongArch::XVSETANYEQZ_B;
6125 break;
6126 case LoongArch::PseudoXVBZ_H:
6127 CondOpc = LoongArch::XVSETANYEQZ_H;
6128 break;
6129 case LoongArch::PseudoXVBZ_W:
6130 CondOpc = LoongArch::XVSETANYEQZ_W;
6131 break;
6132 case LoongArch::PseudoXVBZ_D:
6133 CondOpc = LoongArch::XVSETANYEQZ_D;
6134 break;
6135 case LoongArch::PseudoXVBNZ:
6136 CondOpc = LoongArch::XVSETNEZ_V;
6137 break;
6138 case LoongArch::PseudoXVBNZ_B:
6139 CondOpc = LoongArch::XVSETALLNEZ_B;
6140 break;
6141 case LoongArch::PseudoXVBNZ_H:
6142 CondOpc = LoongArch::XVSETALLNEZ_H;
6143 break;
6144 case LoongArch::PseudoXVBNZ_W:
6145 CondOpc = LoongArch::XVSETALLNEZ_W;
6146 break;
6147 case LoongArch::PseudoXVBNZ_D:
6148 CondOpc = LoongArch::XVSETALLNEZ_D;
6149 break;
6150 }
6151
6152 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6153 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6154 DebugLoc DL = MI.getDebugLoc();
6157
6158 MachineFunction *F = BB->getParent();
6159 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6160 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6161 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6162
6163 F->insert(It, FalseBB);
6164 F->insert(It, TrueBB);
6165 F->insert(It, SinkBB);
6166
6167 // Transfer the remainder of MBB and its successor edges to Sink.
6168 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6170
6171 // Insert the real instruction to BB.
6172 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6173 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6174
6175 // Insert branch.
6176 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6177 BB->addSuccessor(FalseBB);
6178 BB->addSuccessor(TrueBB);
6179
6180 // FalseBB.
6181 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6182 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6183 .addReg(LoongArch::R0)
6184 .addImm(0);
6185 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6186 FalseBB->addSuccessor(SinkBB);
6187
6188 // TrueBB.
6189 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6190 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6191 .addReg(LoongArch::R0)
6192 .addImm(1);
6193 TrueBB->addSuccessor(SinkBB);
6194
6195 // SinkBB: merge the results.
6196 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6197 MI.getOperand(0).getReg())
6198 .addReg(RD1)
6199 .addMBB(FalseBB)
6200 .addReg(RD2)
6201 .addMBB(TrueBB);
6202
6203 // The pseudo instruction is gone now.
6204 MI.eraseFromParent();
6205 return SinkBB;
6206}
6207
6208static MachineBasicBlock *
6210 const LoongArchSubtarget &Subtarget) {
6211 unsigned InsOp;
6212 unsigned BroadcastOp;
6213 unsigned HalfSize;
6214 switch (MI.getOpcode()) {
6215 default:
6216 llvm_unreachable("Unexpected opcode");
6217 case LoongArch::PseudoXVINSGR2VR_B:
6218 HalfSize = 16;
6219 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6220 InsOp = LoongArch::XVEXTRINS_B;
6221 break;
6222 case LoongArch::PseudoXVINSGR2VR_H:
6223 HalfSize = 8;
6224 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6225 InsOp = LoongArch::XVEXTRINS_H;
6226 break;
6227 }
6228 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6229 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6230 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6231 DebugLoc DL = MI.getDebugLoc();
6233 // XDst = vector_insert XSrc, Elt, Idx
6234 Register XDst = MI.getOperand(0).getReg();
6235 Register XSrc = MI.getOperand(1).getReg();
6236 Register Elt = MI.getOperand(2).getReg();
6237 unsigned Idx = MI.getOperand(3).getImm();
6238
6239 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6240 Idx < HalfSize) {
6241 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6242 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6243
6244 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6245 .addReg(XSrc, 0, LoongArch::sub_128);
6246 BuildMI(*BB, MI, DL,
6247 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6248 : LoongArch::VINSGR2VR_B),
6249 ScratchSubReg2)
6250 .addReg(ScratchSubReg1)
6251 .addReg(Elt)
6252 .addImm(Idx);
6253
6254 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6255 .addImm(0)
6256 .addReg(ScratchSubReg2)
6257 .addImm(LoongArch::sub_128);
6258 } else {
6259 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6260 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6261
6262 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6263
6264 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6265 .addReg(ScratchReg1)
6266 .addReg(XSrc)
6267 .addImm(Idx >= HalfSize ? 48 : 18);
6268
6269 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6270 .addReg(XSrc)
6271 .addReg(ScratchReg2)
6272 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6273 }
6274
6275 MI.eraseFromParent();
6276 return BB;
6277}
6278
6281 const LoongArchSubtarget &Subtarget) {
6282 assert(Subtarget.hasExtLSX());
6283 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6284 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6285 DebugLoc DL = MI.getDebugLoc();
6287 Register Dst = MI.getOperand(0).getReg();
6288 Register Src = MI.getOperand(1).getReg();
6289 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6290 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6291 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6292
6293 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6294 BuildMI(*BB, MI, DL,
6295 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6296 : LoongArch::VINSGR2VR_W),
6297 ScratchReg2)
6298 .addReg(ScratchReg1)
6299 .addReg(Src)
6300 .addImm(0);
6301 BuildMI(
6302 *BB, MI, DL,
6303 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6304 ScratchReg3)
6305 .addReg(ScratchReg2);
6306 BuildMI(*BB, MI, DL,
6307 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6308 : LoongArch::VPICKVE2GR_W),
6309 Dst)
6310 .addReg(ScratchReg3)
6311 .addImm(0);
6312
6313 MI.eraseFromParent();
6314 return BB;
6315}
6316
6317static MachineBasicBlock *
6319 const LoongArchSubtarget &Subtarget) {
6320 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6321 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6322 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6324 Register Dst = MI.getOperand(0).getReg();
6325 Register Src = MI.getOperand(1).getReg();
6326 DebugLoc DL = MI.getDebugLoc();
6327 unsigned EleBits = 8;
6328 unsigned NotOpc = 0;
6329 unsigned MskOpc;
6330
6331 switch (MI.getOpcode()) {
6332 default:
6333 llvm_unreachable("Unexpected opcode");
6334 case LoongArch::PseudoVMSKLTZ_B:
6335 MskOpc = LoongArch::VMSKLTZ_B;
6336 break;
6337 case LoongArch::PseudoVMSKLTZ_H:
6338 MskOpc = LoongArch::VMSKLTZ_H;
6339 EleBits = 16;
6340 break;
6341 case LoongArch::PseudoVMSKLTZ_W:
6342 MskOpc = LoongArch::VMSKLTZ_W;
6343 EleBits = 32;
6344 break;
6345 case LoongArch::PseudoVMSKLTZ_D:
6346 MskOpc = LoongArch::VMSKLTZ_D;
6347 EleBits = 64;
6348 break;
6349 case LoongArch::PseudoVMSKGEZ_B:
6350 MskOpc = LoongArch::VMSKGEZ_B;
6351 break;
6352 case LoongArch::PseudoVMSKEQZ_B:
6353 MskOpc = LoongArch::VMSKNZ_B;
6354 NotOpc = LoongArch::VNOR_V;
6355 break;
6356 case LoongArch::PseudoVMSKNEZ_B:
6357 MskOpc = LoongArch::VMSKNZ_B;
6358 break;
6359 case LoongArch::PseudoXVMSKLTZ_B:
6360 MskOpc = LoongArch::XVMSKLTZ_B;
6361 RC = &LoongArch::LASX256RegClass;
6362 break;
6363 case LoongArch::PseudoXVMSKLTZ_H:
6364 MskOpc = LoongArch::XVMSKLTZ_H;
6365 RC = &LoongArch::LASX256RegClass;
6366 EleBits = 16;
6367 break;
6368 case LoongArch::PseudoXVMSKLTZ_W:
6369 MskOpc = LoongArch::XVMSKLTZ_W;
6370 RC = &LoongArch::LASX256RegClass;
6371 EleBits = 32;
6372 break;
6373 case LoongArch::PseudoXVMSKLTZ_D:
6374 MskOpc = LoongArch::XVMSKLTZ_D;
6375 RC = &LoongArch::LASX256RegClass;
6376 EleBits = 64;
6377 break;
6378 case LoongArch::PseudoXVMSKGEZ_B:
6379 MskOpc = LoongArch::XVMSKGEZ_B;
6380 RC = &LoongArch::LASX256RegClass;
6381 break;
6382 case LoongArch::PseudoXVMSKEQZ_B:
6383 MskOpc = LoongArch::XVMSKNZ_B;
6384 NotOpc = LoongArch::XVNOR_V;
6385 RC = &LoongArch::LASX256RegClass;
6386 break;
6387 case LoongArch::PseudoXVMSKNEZ_B:
6388 MskOpc = LoongArch::XVMSKNZ_B;
6389 RC = &LoongArch::LASX256RegClass;
6390 break;
6391 }
6392
6393 Register Msk = MRI.createVirtualRegister(RC);
6394 if (NotOpc) {
6395 Register Tmp = MRI.createVirtualRegister(RC);
6396 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6397 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6398 .addReg(Tmp, RegState::Kill)
6399 .addReg(Tmp, RegState::Kill);
6400 } else {
6401 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6402 }
6403
6404 if (TRI->getRegSizeInBits(*RC) > 128) {
6405 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6406 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6407 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6408 .addReg(Msk)
6409 .addImm(0);
6410 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6411 .addReg(Msk, RegState::Kill)
6412 .addImm(4);
6413 BuildMI(*BB, MI, DL,
6414 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6415 : LoongArch::BSTRINS_W),
6416 Dst)
6419 .addImm(256 / EleBits - 1)
6420 .addImm(128 / EleBits);
6421 } else {
6422 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6423 .addReg(Msk, RegState::Kill)
6424 .addImm(0);
6425 }
6426
6427 MI.eraseFromParent();
6428 return BB;
6429}
6430
6431static MachineBasicBlock *
6433 const LoongArchSubtarget &Subtarget) {
6434 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6435 "Unexpected instruction");
6436
6437 MachineFunction &MF = *BB->getParent();
6438 DebugLoc DL = MI.getDebugLoc();
6440 Register LoReg = MI.getOperand(0).getReg();
6441 Register HiReg = MI.getOperand(1).getReg();
6442 Register SrcReg = MI.getOperand(2).getReg();
6443
6444 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6445 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6446 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6447 MI.eraseFromParent(); // The pseudo instruction is gone now.
6448 return BB;
6449}
6450
6451static MachineBasicBlock *
6453 const LoongArchSubtarget &Subtarget) {
6454 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6455 "Unexpected instruction");
6456
6457 MachineFunction &MF = *BB->getParent();
6458 DebugLoc DL = MI.getDebugLoc();
6461 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6462 Register DstReg = MI.getOperand(0).getReg();
6463 Register LoReg = MI.getOperand(1).getReg();
6464 Register HiReg = MI.getOperand(2).getReg();
6465
6466 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6467 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6468 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6469 .addReg(TmpReg, RegState::Kill)
6470 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6471 MI.eraseFromParent(); // The pseudo instruction is gone now.
6472 return BB;
6473}
6474
6476 switch (MI.getOpcode()) {
6477 default:
6478 return false;
6479 case LoongArch::Select_GPR_Using_CC_GPR:
6480 return true;
6481 }
6482}
6483
6484static MachineBasicBlock *
6486 const LoongArchSubtarget &Subtarget) {
6487 // To "insert" Select_* instructions, we actually have to insert the triangle
6488 // control-flow pattern. The incoming instructions know the destination vreg
6489 // to set, the condition code register to branch on, the true/false values to
6490 // select between, and the condcode to use to select the appropriate branch.
6491 //
6492 // We produce the following control flow:
6493 // HeadMBB
6494 // | \
6495 // | IfFalseMBB
6496 // | /
6497 // TailMBB
6498 //
6499 // When we find a sequence of selects we attempt to optimize their emission
6500 // by sharing the control flow. Currently we only handle cases where we have
6501 // multiple selects with the exact same condition (same LHS, RHS and CC).
6502 // The selects may be interleaved with other instructions if the other
6503 // instructions meet some requirements we deem safe:
6504 // - They are not pseudo instructions.
6505 // - They are debug instructions. Otherwise,
6506 // - They do not have side-effects, do not access memory and their inputs do
6507 // not depend on the results of the select pseudo-instructions.
6508 // The TrueV/FalseV operands of the selects cannot depend on the result of
6509 // previous selects in the sequence.
6510 // These conditions could be further relaxed. See the X86 target for a
6511 // related approach and more information.
6512
6513 Register LHS = MI.getOperand(1).getReg();
6514 Register RHS;
6515 if (MI.getOperand(2).isReg())
6516 RHS = MI.getOperand(2).getReg();
6517 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6518
6519 SmallVector<MachineInstr *, 4> SelectDebugValues;
6520 SmallSet<Register, 4> SelectDests;
6521 SelectDests.insert(MI.getOperand(0).getReg());
6522
6523 MachineInstr *LastSelectPseudo = &MI;
6524 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6525 SequenceMBBI != E; ++SequenceMBBI) {
6526 if (SequenceMBBI->isDebugInstr())
6527 continue;
6528 if (isSelectPseudo(*SequenceMBBI)) {
6529 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6530 !SequenceMBBI->getOperand(2).isReg() ||
6531 SequenceMBBI->getOperand(2).getReg() != RHS ||
6532 SequenceMBBI->getOperand(3).getImm() != CC ||
6533 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6534 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6535 break;
6536 LastSelectPseudo = &*SequenceMBBI;
6537 SequenceMBBI->collectDebugValues(SelectDebugValues);
6538 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6539 continue;
6540 }
6541 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6542 SequenceMBBI->mayLoadOrStore() ||
6543 SequenceMBBI->usesCustomInsertionHook())
6544 break;
6545 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6546 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6547 }))
6548 break;
6549 }
6550
6551 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6552 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6553 DebugLoc DL = MI.getDebugLoc();
6555
6556 MachineBasicBlock *HeadMBB = BB;
6557 MachineFunction *F = BB->getParent();
6558 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6559 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6560
6561 F->insert(I, IfFalseMBB);
6562 F->insert(I, TailMBB);
6563
6564 // Set the call frame size on entry to the new basic blocks.
6565 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6566 IfFalseMBB->setCallFrameSize(CallFrameSize);
6567 TailMBB->setCallFrameSize(CallFrameSize);
6568
6569 // Transfer debug instructions associated with the selects to TailMBB.
6570 for (MachineInstr *DebugInstr : SelectDebugValues) {
6571 TailMBB->push_back(DebugInstr->removeFromParent());
6572 }
6573
6574 // Move all instructions after the sequence to TailMBB.
6575 TailMBB->splice(TailMBB->end(), HeadMBB,
6576 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6577 // Update machine-CFG edges by transferring all successors of the current
6578 // block to the new block which will contain the Phi nodes for the selects.
6579 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6580 // Set the successors for HeadMBB.
6581 HeadMBB->addSuccessor(IfFalseMBB);
6582 HeadMBB->addSuccessor(TailMBB);
6583
6584 // Insert appropriate branch.
6585 if (MI.getOperand(2).isImm())
6586 BuildMI(HeadMBB, DL, TII.get(CC))
6587 .addReg(LHS)
6588 .addImm(MI.getOperand(2).getImm())
6589 .addMBB(TailMBB);
6590 else
6591 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6592
6593 // IfFalseMBB just falls through to TailMBB.
6594 IfFalseMBB->addSuccessor(TailMBB);
6595
6596 // Create PHIs for all of the select pseudo-instructions.
6597 auto SelectMBBI = MI.getIterator();
6598 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6599 auto InsertionPoint = TailMBB->begin();
6600 while (SelectMBBI != SelectEnd) {
6601 auto Next = std::next(SelectMBBI);
6602 if (isSelectPseudo(*SelectMBBI)) {
6603 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6604 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6605 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6606 .addReg(SelectMBBI->getOperand(4).getReg())
6607 .addMBB(HeadMBB)
6608 .addReg(SelectMBBI->getOperand(5).getReg())
6609 .addMBB(IfFalseMBB);
6610 SelectMBBI->eraseFromParent();
6611 }
6612 SelectMBBI = Next;
6613 }
6614
6615 F->getProperties().resetNoPHIs();
6616 return TailMBB;
6617}
6618
6619MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6620 MachineInstr &MI, MachineBasicBlock *BB) const {
6621 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6622 DebugLoc DL = MI.getDebugLoc();
6623
6624 switch (MI.getOpcode()) {
6625 default:
6626 llvm_unreachable("Unexpected instr type to insert");
6627 case LoongArch::DIV_W:
6628 case LoongArch::DIV_WU:
6629 case LoongArch::MOD_W:
6630 case LoongArch::MOD_WU:
6631 case LoongArch::DIV_D:
6632 case LoongArch::DIV_DU:
6633 case LoongArch::MOD_D:
6634 case LoongArch::MOD_DU:
6635 return insertDivByZeroTrap(MI, BB);
6636 break;
6637 case LoongArch::WRFCSR: {
6638 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6639 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6640 .addReg(MI.getOperand(1).getReg());
6641 MI.eraseFromParent();
6642 return BB;
6643 }
6644 case LoongArch::RDFCSR: {
6645 MachineInstr *ReadFCSR =
6646 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6647 MI.getOperand(0).getReg())
6648 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6649 ReadFCSR->getOperand(1).setIsUndef();
6650 MI.eraseFromParent();
6651 return BB;
6652 }
6653 case LoongArch::Select_GPR_Using_CC_GPR:
6654 return emitSelectPseudo(MI, BB, Subtarget);
6655 case LoongArch::BuildPairF64Pseudo:
6656 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6657 case LoongArch::SplitPairF64Pseudo:
6658 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6659 case LoongArch::PseudoVBZ:
6660 case LoongArch::PseudoVBZ_B:
6661 case LoongArch::PseudoVBZ_H:
6662 case LoongArch::PseudoVBZ_W:
6663 case LoongArch::PseudoVBZ_D:
6664 case LoongArch::PseudoVBNZ:
6665 case LoongArch::PseudoVBNZ_B:
6666 case LoongArch::PseudoVBNZ_H:
6667 case LoongArch::PseudoVBNZ_W:
6668 case LoongArch::PseudoVBNZ_D:
6669 case LoongArch::PseudoXVBZ:
6670 case LoongArch::PseudoXVBZ_B:
6671 case LoongArch::PseudoXVBZ_H:
6672 case LoongArch::PseudoXVBZ_W:
6673 case LoongArch::PseudoXVBZ_D:
6674 case LoongArch::PseudoXVBNZ:
6675 case LoongArch::PseudoXVBNZ_B:
6676 case LoongArch::PseudoXVBNZ_H:
6677 case LoongArch::PseudoXVBNZ_W:
6678 case LoongArch::PseudoXVBNZ_D:
6679 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6680 case LoongArch::PseudoXVINSGR2VR_B:
6681 case LoongArch::PseudoXVINSGR2VR_H:
6682 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6683 case LoongArch::PseudoCTPOP:
6684 return emitPseudoCTPOP(MI, BB, Subtarget);
6685 case LoongArch::PseudoVMSKLTZ_B:
6686 case LoongArch::PseudoVMSKLTZ_H:
6687 case LoongArch::PseudoVMSKLTZ_W:
6688 case LoongArch::PseudoVMSKLTZ_D:
6689 case LoongArch::PseudoVMSKGEZ_B:
6690 case LoongArch::PseudoVMSKEQZ_B:
6691 case LoongArch::PseudoVMSKNEZ_B:
6692 case LoongArch::PseudoXVMSKLTZ_B:
6693 case LoongArch::PseudoXVMSKLTZ_H:
6694 case LoongArch::PseudoXVMSKLTZ_W:
6695 case LoongArch::PseudoXVMSKLTZ_D:
6696 case LoongArch::PseudoXVMSKGEZ_B:
6697 case LoongArch::PseudoXVMSKEQZ_B:
6698 case LoongArch::PseudoXVMSKNEZ_B:
6699 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6700 case TargetOpcode::STATEPOINT:
6701 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6702 // while bl call instruction (where statepoint will be lowered at the
6703 // end) has implicit def. This def is early-clobber as it will be set at
6704 // the moment of the call and earlier than any use is read.
6705 // Add this implicit dead def here as a workaround.
6706 MI.addOperand(*MI.getMF(),
6708 LoongArch::R1, /*isDef*/ true,
6709 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
6710 /*isUndef*/ false, /*isEarlyClobber*/ true));
6711 if (!Subtarget.is64Bit())
6712 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
6713 return emitPatchPoint(MI, BB);
6714 }
6715}
6716
6718 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6719 unsigned *Fast) const {
6720 if (!Subtarget.hasUAL())
6721 return false;
6722
6723 // TODO: set reasonable speed number.
6724 if (Fast)
6725 *Fast = 1;
6726 return true;
6727}
6728
6729const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
6730 switch ((LoongArchISD::NodeType)Opcode) {
6732 break;
6733
6734#define NODE_NAME_CASE(node) \
6735 case LoongArchISD::node: \
6736 return "LoongArchISD::" #node;
6737
6738 // TODO: Add more target-dependent nodes later.
6739 NODE_NAME_CASE(CALL)
6740 NODE_NAME_CASE(CALL_MEDIUM)
6741 NODE_NAME_CASE(CALL_LARGE)
6742 NODE_NAME_CASE(RET)
6743 NODE_NAME_CASE(TAIL)
6744 NODE_NAME_CASE(TAIL_MEDIUM)
6745 NODE_NAME_CASE(TAIL_LARGE)
6746 NODE_NAME_CASE(SELECT_CC)
6747 NODE_NAME_CASE(BR_CC)
6748 NODE_NAME_CASE(BRCOND)
6749 NODE_NAME_CASE(SLL_W)
6750 NODE_NAME_CASE(SRA_W)
6751 NODE_NAME_CASE(SRL_W)
6752 NODE_NAME_CASE(BSTRINS)
6753 NODE_NAME_CASE(BSTRPICK)
6754 NODE_NAME_CASE(MOVGR2FR_W_LA64)
6755 NODE_NAME_CASE(MOVFR2GR_S_LA64)
6756 NODE_NAME_CASE(FTINT)
6757 NODE_NAME_CASE(BUILD_PAIR_F64)
6758 NODE_NAME_CASE(SPLIT_PAIR_F64)
6759 NODE_NAME_CASE(REVB_2H)
6760 NODE_NAME_CASE(REVB_2W)
6761 NODE_NAME_CASE(BITREV_4B)
6762 NODE_NAME_CASE(BITREV_8B)
6763 NODE_NAME_CASE(BITREV_W)
6764 NODE_NAME_CASE(ROTR_W)
6765 NODE_NAME_CASE(ROTL_W)
6766 NODE_NAME_CASE(DIV_W)
6767 NODE_NAME_CASE(DIV_WU)
6768 NODE_NAME_CASE(MOD_W)
6769 NODE_NAME_CASE(MOD_WU)
6770 NODE_NAME_CASE(CLZ_W)
6771 NODE_NAME_CASE(CTZ_W)
6772 NODE_NAME_CASE(DBAR)
6773 NODE_NAME_CASE(IBAR)
6774 NODE_NAME_CASE(BREAK)
6775 NODE_NAME_CASE(SYSCALL)
6776 NODE_NAME_CASE(CRC_W_B_W)
6777 NODE_NAME_CASE(CRC_W_H_W)
6778 NODE_NAME_CASE(CRC_W_W_W)
6779 NODE_NAME_CASE(CRC_W_D_W)
6780 NODE_NAME_CASE(CRCC_W_B_W)
6781 NODE_NAME_CASE(CRCC_W_H_W)
6782 NODE_NAME_CASE(CRCC_W_W_W)
6783 NODE_NAME_CASE(CRCC_W_D_W)
6784 NODE_NAME_CASE(CSRRD)
6785 NODE_NAME_CASE(CSRWR)
6786 NODE_NAME_CASE(CSRXCHG)
6787 NODE_NAME_CASE(IOCSRRD_B)
6788 NODE_NAME_CASE(IOCSRRD_H)
6789 NODE_NAME_CASE(IOCSRRD_W)
6790 NODE_NAME_CASE(IOCSRRD_D)
6791 NODE_NAME_CASE(IOCSRWR_B)
6792 NODE_NAME_CASE(IOCSRWR_H)
6793 NODE_NAME_CASE(IOCSRWR_W)
6794 NODE_NAME_CASE(IOCSRWR_D)
6795 NODE_NAME_CASE(CPUCFG)
6796 NODE_NAME_CASE(MOVGR2FCSR)
6797 NODE_NAME_CASE(MOVFCSR2GR)
6798 NODE_NAME_CASE(CACOP_D)
6799 NODE_NAME_CASE(CACOP_W)
6800 NODE_NAME_CASE(VSHUF)
6801 NODE_NAME_CASE(VPICKEV)
6802 NODE_NAME_CASE(VPICKOD)
6803 NODE_NAME_CASE(VPACKEV)
6804 NODE_NAME_CASE(VPACKOD)
6805 NODE_NAME_CASE(VILVL)
6806 NODE_NAME_CASE(VILVH)
6807 NODE_NAME_CASE(VSHUF4I)
6808 NODE_NAME_CASE(VREPLVEI)
6809 NODE_NAME_CASE(VREPLGR2VR)
6810 NODE_NAME_CASE(XVPERMI)
6811 NODE_NAME_CASE(VPICK_SEXT_ELT)
6812 NODE_NAME_CASE(VPICK_ZEXT_ELT)
6813 NODE_NAME_CASE(VREPLVE)
6814 NODE_NAME_CASE(VALL_ZERO)
6815 NODE_NAME_CASE(VANY_ZERO)
6816 NODE_NAME_CASE(VALL_NONZERO)
6817 NODE_NAME_CASE(VANY_NONZERO)
6818 NODE_NAME_CASE(FRECIPE)
6819 NODE_NAME_CASE(FRSQRTE)
6820 NODE_NAME_CASE(VSLLI)
6821 NODE_NAME_CASE(VSRLI)
6822 NODE_NAME_CASE(VBSLL)
6823 NODE_NAME_CASE(VBSRL)
6824 NODE_NAME_CASE(VLDREPL)
6825 NODE_NAME_CASE(VMSKLTZ)
6826 NODE_NAME_CASE(VMSKGEZ)
6827 NODE_NAME_CASE(VMSKEQZ)
6828 NODE_NAME_CASE(VMSKNEZ)
6829 NODE_NAME_CASE(XVMSKLTZ)
6830 NODE_NAME_CASE(XVMSKGEZ)
6831 NODE_NAME_CASE(XVMSKEQZ)
6832 NODE_NAME_CASE(XVMSKNEZ)
6833 NODE_NAME_CASE(VHADDW)
6834 }
6835#undef NODE_NAME_CASE
6836 return nullptr;
6837}
6838
6839//===----------------------------------------------------------------------===//
6840// Calling Convention Implementation
6841//===----------------------------------------------------------------------===//
6842
6843// Eight general-purpose registers a0-a7 used for passing integer arguments,
6844// with a0-a1 reused to return values. Generally, the GPRs are used to pass
6845// fixed-point arguments, and floating-point arguments when no FPR is available
6846// or with soft float ABI.
6847const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
6848 LoongArch::R7, LoongArch::R8, LoongArch::R9,
6849 LoongArch::R10, LoongArch::R11};
6850// Eight floating-point registers fa0-fa7 used for passing floating-point
6851// arguments, and fa0-fa1 are also used to return values.
6852const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
6853 LoongArch::F3, LoongArch::F4, LoongArch::F5,
6854 LoongArch::F6, LoongArch::F7};
6855// FPR32 and FPR64 alias each other.
6857 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
6858 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
6859
6860const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
6861 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
6862 LoongArch::VR6, LoongArch::VR7};
6863
6864const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
6865 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
6866 LoongArch::XR6, LoongArch::XR7};
6867
6868// Pass a 2*GRLen argument that has been split into two GRLen values through
6869// registers or the stack as necessary.
6870static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
6871 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
6872 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
6873 ISD::ArgFlagsTy ArgFlags2) {
6874 unsigned GRLenInBytes = GRLen / 8;
6875 if (Register Reg = State.AllocateReg(ArgGPRs)) {
6876 // At least one half can be passed via register.
6877 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
6878 VA1.getLocVT(), CCValAssign::Full));
6879 } else {
6880 // Both halves must be passed on the stack, with proper alignment.
6881 Align StackAlign =
6882 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
6883 State.addLoc(
6885 State.AllocateStack(GRLenInBytes, StackAlign),
6886 VA1.getLocVT(), CCValAssign::Full));
6888 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6889 LocVT2, CCValAssign::Full));
6890 return false;
6891 }
6892 if (Register Reg = State.AllocateReg(ArgGPRs)) {
6893 // The second half can also be passed via register.
6894 State.addLoc(
6895 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
6896 } else {
6897 // The second half is passed via the stack, without additional alignment.
6899 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
6900 LocVT2, CCValAssign::Full));
6901 }
6902 return false;
6903}
6904
6905// Implements the LoongArch calling convention. Returns true upon failure.
6907 unsigned ValNo, MVT ValVT,
6908 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6909 CCState &State, bool IsRet, Type *OrigTy) {
6910 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
6911 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
6912 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
6913 MVT LocVT = ValVT;
6914
6915 // Any return value split into more than two values can't be returned
6916 // directly.
6917 if (IsRet && ValNo > 1)
6918 return true;
6919
6920 // If passing a variadic argument, or if no FPR is available.
6921 bool UseGPRForFloat = true;
6922
6923 switch (ABI) {
6924 default:
6925 llvm_unreachable("Unexpected ABI");
6926 break;
6931 UseGPRForFloat = ArgFlags.isVarArg();
6932 break;
6935 break;
6936 }
6937
6938 // If this is a variadic argument, the LoongArch calling convention requires
6939 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
6940 // byte alignment. An aligned register should be used regardless of whether
6941 // the original argument was split during legalisation or not. The argument
6942 // will not be passed by registers if the original type is larger than
6943 // 2*GRLen, so the register alignment rule does not apply.
6944 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
6945 if (ArgFlags.isVarArg() &&
6946 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
6947 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
6948 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
6949 // Skip 'odd' register if necessary.
6950 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
6951 State.AllocateReg(ArgGPRs);
6952 }
6953
6954 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6955 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6956 State.getPendingArgFlags();
6957
6958 assert(PendingLocs.size() == PendingArgFlags.size() &&
6959 "PendingLocs and PendingArgFlags out of sync");
6960
6961 // FPR32 and FPR64 alias each other.
6962 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
6963 UseGPRForFloat = true;
6964
6965 if (UseGPRForFloat && ValVT == MVT::f32) {
6966 LocVT = GRLenVT;
6967 LocInfo = CCValAssign::BCvt;
6968 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
6969 LocVT = MVT::i64;
6970 LocInfo = CCValAssign::BCvt;
6971 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
6972 // Handle passing f64 on LA32D with a soft float ABI or when floating point
6973 // registers are exhausted.
6974 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
6975 // Depending on available argument GPRS, f64 may be passed in a pair of
6976 // GPRs, split between a GPR and the stack, or passed completely on the
6977 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6978 // cases.
6979 MCRegister Reg = State.AllocateReg(ArgGPRs);
6980 if (!Reg) {
6981 int64_t StackOffset = State.AllocateStack(8, Align(8));
6982 State.addLoc(
6983 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6984 return false;
6985 }
6986 LocVT = MVT::i32;
6987 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
6988 MCRegister HiReg = State.AllocateReg(ArgGPRs);
6989 if (HiReg) {
6990 State.addLoc(
6991 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
6992 } else {
6993 int64_t StackOffset = State.AllocateStack(4, Align(4));
6994 State.addLoc(
6995 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
6996 }
6997 return false;
6998 }
6999
7000 // Split arguments might be passed indirectly, so keep track of the pending
7001 // values.
7002 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7003 LocVT = GRLenVT;
7004 LocInfo = CCValAssign::Indirect;
7005 PendingLocs.push_back(
7006 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7007 PendingArgFlags.push_back(ArgFlags);
7008 if (!ArgFlags.isSplitEnd()) {
7009 return false;
7010 }
7011 }
7012
7013 // If the split argument only had two elements, it should be passed directly
7014 // in registers or on the stack.
7015 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7016 PendingLocs.size() <= 2) {
7017 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7018 // Apply the normal calling convention rules to the first half of the
7019 // split argument.
7020 CCValAssign VA = PendingLocs[0];
7021 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7022 PendingLocs.clear();
7023 PendingArgFlags.clear();
7024 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7025 ArgFlags);
7026 }
7027
7028 // Allocate to a register if possible, or else a stack slot.
7029 Register Reg;
7030 unsigned StoreSizeBytes = GRLen / 8;
7031 Align StackAlign = Align(GRLen / 8);
7032
7033 if (ValVT == MVT::f32 && !UseGPRForFloat)
7034 Reg = State.AllocateReg(ArgFPR32s);
7035 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7036 Reg = State.AllocateReg(ArgFPR64s);
7037 else if (ValVT.is128BitVector())
7038 Reg = State.AllocateReg(ArgVRs);
7039 else if (ValVT.is256BitVector())
7040 Reg = State.AllocateReg(ArgXRs);
7041 else
7042 Reg = State.AllocateReg(ArgGPRs);
7043
7044 unsigned StackOffset =
7045 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7046
7047 // If we reach this point and PendingLocs is non-empty, we must be at the
7048 // end of a split argument that must be passed indirectly.
7049 if (!PendingLocs.empty()) {
7050 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7051 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7052 for (auto &It : PendingLocs) {
7053 if (Reg)
7054 It.convertToReg(Reg);
7055 else
7056 It.convertToMem(StackOffset);
7057 State.addLoc(It);
7058 }
7059 PendingLocs.clear();
7060 PendingArgFlags.clear();
7061 return false;
7062 }
7063 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7064 "Expected an GRLenVT at this stage");
7065
7066 if (Reg) {
7067 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7068 return false;
7069 }
7070
7071 // When a floating-point value is passed on the stack, no bit-cast is needed.
7072 if (ValVT.isFloatingPoint()) {
7073 LocVT = ValVT;
7074 LocInfo = CCValAssign::Full;
7075 }
7076
7077 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7078 return false;
7079}
7080
7081void LoongArchTargetLowering::analyzeInputArgs(
7082 MachineFunction &MF, CCState &CCInfo,
7083 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7084 LoongArchCCAssignFn Fn) const {
7086 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7087 MVT ArgVT = Ins[i].VT;
7088 Type *ArgTy = nullptr;
7089 if (IsRet)
7090 ArgTy = FType->getReturnType();
7091 else if (Ins[i].isOrigArg())
7092 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7095 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7096 CCInfo, IsRet, ArgTy)) {
7097 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7098 << '\n');
7099 llvm_unreachable("");
7100 }
7101 }
7102}
7103
7104void LoongArchTargetLowering::analyzeOutputArgs(
7105 MachineFunction &MF, CCState &CCInfo,
7106 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7107 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7108 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7109 MVT ArgVT = Outs[i].VT;
7110 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7113 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7114 CCInfo, IsRet, OrigTy)) {
7115 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7116 << "\n");
7117 llvm_unreachable("");
7118 }
7119 }
7120}
7121
7122// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7123// values.
7125 const CCValAssign &VA, const SDLoc &DL) {
7126 switch (VA.getLocInfo()) {
7127 default:
7128 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7129 case CCValAssign::Full:
7131 break;
7132 case CCValAssign::BCvt:
7133 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7134 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7135 else
7136 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7137 break;
7138 }
7139 return Val;
7140}
7141
7143 const CCValAssign &VA, const SDLoc &DL,
7144 const ISD::InputArg &In,
7145 const LoongArchTargetLowering &TLI) {
7148 EVT LocVT = VA.getLocVT();
7149 SDValue Val;
7150 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7151 Register VReg = RegInfo.createVirtualRegister(RC);
7152 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7153 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7154
7155 // If input is sign extended from 32 bits, note it for the OptW pass.
7156 if (In.isOrigArg()) {
7157 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7158 if (OrigArg->getType()->isIntegerTy()) {
7159 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7160 // An input zero extended from i31 can also be considered sign extended.
7161 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7162 (BitWidth < 32 && In.Flags.isZExt())) {
7165 LAFI->addSExt32Register(VReg);
7166 }
7167 }
7168 }
7169
7170 return convertLocVTToValVT(DAG, Val, VA, DL);
7171}
7172
7173// The caller is responsible for loading the full value if the argument is
7174// passed with CCValAssign::Indirect.
7176 const CCValAssign &VA, const SDLoc &DL) {
7178 MachineFrameInfo &MFI = MF.getFrameInfo();
7179 EVT ValVT = VA.getValVT();
7180 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7181 /*IsImmutable=*/true);
7182 SDValue FIN = DAG.getFrameIndex(
7184
7185 ISD::LoadExtType ExtType;
7186 switch (VA.getLocInfo()) {
7187 default:
7188 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7189 case CCValAssign::Full:
7191 case CCValAssign::BCvt:
7192 ExtType = ISD::NON_EXTLOAD;
7193 break;
7194 }
7195 return DAG.getExtLoad(
7196 ExtType, DL, VA.getLocVT(), Chain, FIN,
7198}
7199
7201 const CCValAssign &VA,
7202 const CCValAssign &HiVA,
7203 const SDLoc &DL) {
7204 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7205 "Unexpected VA");
7207 MachineFrameInfo &MFI = MF.getFrameInfo();
7209
7210 assert(VA.isRegLoc() && "Expected register VA assignment");
7211
7212 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7213 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7214 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7215 SDValue Hi;
7216 if (HiVA.isMemLoc()) {
7217 // Second half of f64 is passed on the stack.
7218 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7219 /*IsImmutable=*/true);
7220 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7221 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7223 } else {
7224 // Second half of f64 is passed in another GPR.
7225 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7226 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7227 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7228 }
7229 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7230}
7231
7233 const CCValAssign &VA, const SDLoc &DL) {
7234 EVT LocVT = VA.getLocVT();
7235
7236 switch (VA.getLocInfo()) {
7237 default:
7238 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7239 case CCValAssign::Full:
7240 break;
7241 case CCValAssign::BCvt:
7242 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7243 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7244 else
7245 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7246 break;
7247 }
7248 return Val;
7249}
7250
7251static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7252 CCValAssign::LocInfo LocInfo,
7253 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7254 CCState &State) {
7255 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7256 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7257 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7258 static const MCPhysReg GPRList[] = {
7259 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7260 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7261 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7262 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7263 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7264 return false;
7265 }
7266 }
7267
7268 if (LocVT == MVT::f32) {
7269 // Pass in STG registers: F1, F2, F3, F4
7270 // fs0,fs1,fs2,fs3
7271 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7272 LoongArch::F26, LoongArch::F27};
7273 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7274 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7275 return false;
7276 }
7277 }
7278
7279 if (LocVT == MVT::f64) {
7280 // Pass in STG registers: D1, D2, D3, D4
7281 // fs4,fs5,fs6,fs7
7282 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7283 LoongArch::F30_64, LoongArch::F31_64};
7284 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7285 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7286 return false;
7287 }
7288 }
7289
7290 report_fatal_error("No registers left in GHC calling convention");
7291 return true;
7292}
7293
7294// Transform physical registers into virtual registers.
7296 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7297 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7298 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7299
7301
7302 switch (CallConv) {
7303 default:
7304 llvm_unreachable("Unsupported calling convention");
7305 case CallingConv::C:
7306 case CallingConv::Fast:
7308 break;
7309 case CallingConv::GHC:
7310 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7311 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7313 "GHC calling convention requires the F and D extensions");
7314 }
7315
7316 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7317 MVT GRLenVT = Subtarget.getGRLenVT();
7318 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7319 // Used with varargs to acumulate store chains.
7320 std::vector<SDValue> OutChains;
7321
7322 // Assign locations to all of the incoming arguments.
7324 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7325
7326 if (CallConv == CallingConv::GHC)
7328 else
7329 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7330
7331 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7332 CCValAssign &VA = ArgLocs[i];
7333 SDValue ArgValue;
7334 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7335 // case.
7336 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7337 assert(VA.needsCustom());
7338 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7339 } else if (VA.isRegLoc())
7340 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7341 else
7342 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7343 if (VA.getLocInfo() == CCValAssign::Indirect) {
7344 // If the original argument was split and passed by reference, we need to
7345 // load all parts of it here (using the same address).
7346 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7348 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7349 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7350 assert(ArgPartOffset == 0);
7351 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7352 CCValAssign &PartVA = ArgLocs[i + 1];
7353 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7354 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7355 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7356 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7358 ++i;
7359 ++InsIdx;
7360 }
7361 continue;
7362 }
7363 InVals.push_back(ArgValue);
7364 }
7365
7366 if (IsVarArg) {
7368 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7369 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7370 MachineFrameInfo &MFI = MF.getFrameInfo();
7371 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7372 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7373
7374 // Offset of the first variable argument from stack pointer, and size of
7375 // the vararg save area. For now, the varargs save area is either zero or
7376 // large enough to hold a0-a7.
7377 int VaArgOffset, VarArgsSaveSize;
7378
7379 // If all registers are allocated, then all varargs must be passed on the
7380 // stack and we don't need to save any argregs.
7381 if (ArgRegs.size() == Idx) {
7382 VaArgOffset = CCInfo.getStackSize();
7383 VarArgsSaveSize = 0;
7384 } else {
7385 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7386 VaArgOffset = -VarArgsSaveSize;
7387 }
7388
7389 // Record the frame index of the first variable argument
7390 // which is a value necessary to VASTART.
7391 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7392 LoongArchFI->setVarArgsFrameIndex(FI);
7393
7394 // If saving an odd number of registers then create an extra stack slot to
7395 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7396 // offsets to even-numbered registered remain 2*GRLen-aligned.
7397 if (Idx % 2) {
7398 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7399 true);
7400 VarArgsSaveSize += GRLenInBytes;
7401 }
7402
7403 // Copy the integer registers that may have been used for passing varargs
7404 // to the vararg save area.
7405 for (unsigned I = Idx; I < ArgRegs.size();
7406 ++I, VaArgOffset += GRLenInBytes) {
7407 const Register Reg = RegInfo.createVirtualRegister(RC);
7408 RegInfo.addLiveIn(ArgRegs[I], Reg);
7409 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7410 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7411 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7412 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7414 cast<StoreSDNode>(Store.getNode())
7415 ->getMemOperand()
7416 ->setValue((Value *)nullptr);
7417 OutChains.push_back(Store);
7418 }
7419 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7420 }
7421
7422 // All stores are grouped in one node to allow the matching between
7423 // the size of Ins and InVals. This only happens for vararg functions.
7424 if (!OutChains.empty()) {
7425 OutChains.push_back(Chain);
7426 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7427 }
7428
7429 return Chain;
7430}
7431
7433 return CI->isTailCall();
7434}
7435
7436// Check if the return value is used as only a return value, as otherwise
7437// we can't perform a tail-call.
7439 SDValue &Chain) const {
7440 if (N->getNumValues() != 1)
7441 return false;
7442 if (!N->hasNUsesOfValue(1, 0))
7443 return false;
7444
7445 SDNode *Copy = *N->user_begin();
7446 if (Copy->getOpcode() != ISD::CopyToReg)
7447 return false;
7448
7449 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7450 // isn't safe to perform a tail call.
7451 if (Copy->getGluedNode())
7452 return false;
7453
7454 // The copy must be used by a LoongArchISD::RET, and nothing else.
7455 bool HasRet = false;
7456 for (SDNode *Node : Copy->users()) {
7457 if (Node->getOpcode() != LoongArchISD::RET)
7458 return false;
7459 HasRet = true;
7460 }
7461
7462 if (!HasRet)
7463 return false;
7464
7465 Chain = Copy->getOperand(0);
7466 return true;
7467}
7468
7469// Check whether the call is eligible for tail call optimization.
7470bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7471 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7472 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7473
7474 auto CalleeCC = CLI.CallConv;
7475 auto &Outs = CLI.Outs;
7476 auto &Caller = MF.getFunction();
7477 auto CallerCC = Caller.getCallingConv();
7478
7479 // Do not tail call opt if the stack is used to pass parameters.
7480 if (CCInfo.getStackSize() != 0)
7481 return false;
7482
7483 // Do not tail call opt if any parameters need to be passed indirectly.
7484 for (auto &VA : ArgLocs)
7485 if (VA.getLocInfo() == CCValAssign::Indirect)
7486 return false;
7487
7488 // Do not tail call opt if either caller or callee uses struct return
7489 // semantics.
7490 auto IsCallerStructRet = Caller.hasStructRetAttr();
7491 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7492 if (IsCallerStructRet || IsCalleeStructRet)
7493 return false;
7494
7495 // Do not tail call opt if either the callee or caller has a byval argument.
7496 for (auto &Arg : Outs)
7497 if (Arg.Flags.isByVal())
7498 return false;
7499
7500 // The callee has to preserve all registers the caller needs to preserve.
7501 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7502 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7503 if (CalleeCC != CallerCC) {
7504 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7505 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7506 return false;
7507 }
7508 return true;
7509}
7510
7512 return DAG.getDataLayout().getPrefTypeAlign(
7513 VT.getTypeForEVT(*DAG.getContext()));
7514}
7515
7516// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7517// and output parameter nodes.
7518SDValue
7520 SmallVectorImpl<SDValue> &InVals) const {
7521 SelectionDAG &DAG = CLI.DAG;
7522 SDLoc &DL = CLI.DL;
7524 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7526 SDValue Chain = CLI.Chain;
7527 SDValue Callee = CLI.Callee;
7528 CallingConv::ID CallConv = CLI.CallConv;
7529 bool IsVarArg = CLI.IsVarArg;
7530 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7531 MVT GRLenVT = Subtarget.getGRLenVT();
7532 bool &IsTailCall = CLI.IsTailCall;
7533
7535
7536 // Analyze the operands of the call, assigning locations to each operand.
7538 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7539
7540 if (CallConv == CallingConv::GHC)
7541 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7542 else
7543 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7544
7545 // Check if it's really possible to do a tail call.
7546 if (IsTailCall)
7547 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7548
7549 if (IsTailCall)
7550 ++NumTailCalls;
7551 else if (CLI.CB && CLI.CB->isMustTailCall())
7552 report_fatal_error("failed to perform tail call elimination on a call "
7553 "site marked musttail");
7554
7555 // Get a count of how many bytes are to be pushed on the stack.
7556 unsigned NumBytes = ArgCCInfo.getStackSize();
7557
7558 // Create local copies for byval args.
7559 SmallVector<SDValue> ByValArgs;
7560 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7561 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7562 if (!Flags.isByVal())
7563 continue;
7564
7565 SDValue Arg = OutVals[i];
7566 unsigned Size = Flags.getByValSize();
7567 Align Alignment = Flags.getNonZeroByValAlign();
7568
7569 int FI =
7570 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7571 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7572 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7573
7574 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7575 /*IsVolatile=*/false,
7576 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7578 ByValArgs.push_back(FIPtr);
7579 }
7580
7581 if (!IsTailCall)
7582 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7583
7584 // Copy argument values to their designated locations.
7586 SmallVector<SDValue> MemOpChains;
7587 SDValue StackPtr;
7588 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7589 ++i, ++OutIdx) {
7590 CCValAssign &VA = ArgLocs[i];
7591 SDValue ArgValue = OutVals[OutIdx];
7592 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7593
7594 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7595 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7596 assert(VA.isRegLoc() && "Expected register VA assignment");
7597 assert(VA.needsCustom());
7598 SDValue SplitF64 =
7600 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7601 SDValue Lo = SplitF64.getValue(0);
7602 SDValue Hi = SplitF64.getValue(1);
7603
7604 Register RegLo = VA.getLocReg();
7605 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7606
7607 // Get the CCValAssign for the Hi part.
7608 CCValAssign &HiVA = ArgLocs[++i];
7609
7610 if (HiVA.isMemLoc()) {
7611 // Second half of f64 is passed on the stack.
7612 if (!StackPtr.getNode())
7613 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7615 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7616 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7617 // Emit the store.
7618 MemOpChains.push_back(DAG.getStore(
7619 Chain, DL, Hi, Address,
7621 } else {
7622 // Second half of f64 is passed in another GPR.
7623 Register RegHigh = HiVA.getLocReg();
7624 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7625 }
7626 continue;
7627 }
7628
7629 // Promote the value if needed.
7630 // For now, only handle fully promoted and indirect arguments.
7631 if (VA.getLocInfo() == CCValAssign::Indirect) {
7632 // Store the argument in a stack slot and pass its address.
7633 Align StackAlign =
7634 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7635 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7636 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7637 // If the original argument was split and passed by reference, we need to
7638 // store the required parts of it here (and pass just one address).
7639 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7640 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7641 assert(ArgPartOffset == 0);
7642 // Calculate the total size to store. We don't have access to what we're
7643 // actually storing other than performing the loop and collecting the
7644 // info.
7646 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7647 SDValue PartValue = OutVals[OutIdx + 1];
7648 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7649 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7650 EVT PartVT = PartValue.getValueType();
7651
7652 StoredSize += PartVT.getStoreSize();
7653 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7654 Parts.push_back(std::make_pair(PartValue, Offset));
7655 ++i;
7656 ++OutIdx;
7657 }
7658 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7659 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7660 MemOpChains.push_back(
7661 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7663 for (const auto &Part : Parts) {
7664 SDValue PartValue = Part.first;
7665 SDValue PartOffset = Part.second;
7667 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7668 MemOpChains.push_back(
7669 DAG.getStore(Chain, DL, PartValue, Address,
7671 }
7672 ArgValue = SpillSlot;
7673 } else {
7674 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7675 }
7676
7677 // Use local copy if it is a byval arg.
7678 if (Flags.isByVal())
7679 ArgValue = ByValArgs[j++];
7680
7681 if (VA.isRegLoc()) {
7682 // Queue up the argument copies and emit them at the end.
7683 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7684 } else {
7685 assert(VA.isMemLoc() && "Argument not register or memory");
7686 assert(!IsTailCall && "Tail call not allowed if stack is used "
7687 "for passing parameters");
7688
7689 // Work out the address of the stack slot.
7690 if (!StackPtr.getNode())
7691 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7693 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7695
7696 // Emit the store.
7697 MemOpChains.push_back(
7698 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7699 }
7700 }
7701
7702 // Join the stores, which are independent of one another.
7703 if (!MemOpChains.empty())
7704 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7705
7706 SDValue Glue;
7707
7708 // Build a sequence of copy-to-reg nodes, chained and glued together.
7709 for (auto &Reg : RegsToPass) {
7710 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
7711 Glue = Chain.getValue(1);
7712 }
7713
7714 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7715 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7716 // split it and then direct call can be matched by PseudoCALL.
7717 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
7718 const GlobalValue *GV = S->getGlobal();
7719 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7722 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
7723 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7724 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
7727 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
7728 }
7729
7730 // The first call operand is the chain and the second is the target address.
7732 Ops.push_back(Chain);
7733 Ops.push_back(Callee);
7734
7735 // Add argument registers to the end of the list so that they are
7736 // known live into the call.
7737 for (auto &Reg : RegsToPass)
7738 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
7739
7740 if (!IsTailCall) {
7741 // Add a register mask operand representing the call-preserved registers.
7742 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7743 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7744 assert(Mask && "Missing call preserved mask for calling convention");
7745 Ops.push_back(DAG.getRegisterMask(Mask));
7746 }
7747
7748 // Glue the call to the argument copies, if any.
7749 if (Glue.getNode())
7750 Ops.push_back(Glue);
7751
7752 // Emit the call.
7753 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7754 unsigned Op;
7755 switch (DAG.getTarget().getCodeModel()) {
7756 default:
7757 report_fatal_error("Unsupported code model");
7758 case CodeModel::Small:
7759 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7760 break;
7761 case CodeModel::Medium:
7762 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7764 break;
7765 case CodeModel::Large:
7766 assert(Subtarget.is64Bit() && "Large code model requires LA64");
7768 break;
7769 }
7770
7771 if (IsTailCall) {
7773 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
7774 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
7775 return Ret;
7776 }
7777
7778 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
7779 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
7780 Glue = Chain.getValue(1);
7781
7782 // Mark the end of the call, which is glued to the call itself.
7783 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
7784 Glue = Chain.getValue(1);
7785
7786 // Assign locations to each value returned by this call.
7788 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7789 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
7790
7791 // Copy all of the result registers out of their specified physreg.
7792 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
7793 auto &VA = RVLocs[i];
7794 // Copy the value out.
7795 SDValue RetValue =
7796 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
7797 // Glue the RetValue to the end of the call sequence.
7798 Chain = RetValue.getValue(1);
7799 Glue = RetValue.getValue(2);
7800
7801 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7802 assert(VA.needsCustom());
7803 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
7804 MVT::i32, Glue);
7805 Chain = RetValue2.getValue(1);
7806 Glue = RetValue2.getValue(2);
7807 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
7808 RetValue, RetValue2);
7809 } else
7810 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
7811
7812 InVals.push_back(RetValue);
7813 }
7814
7815 return Chain;
7816}
7817
7819 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7820 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7821 const Type *RetTy) const {
7823 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7824
7825 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7826 LoongArchABI::ABI ABI =
7827 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7828 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
7829 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
7830 return false;
7831 }
7832 return true;
7833}
7834
7836 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7838 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7839 SelectionDAG &DAG) const {
7840 // Stores the assignment of the return value to a location.
7842
7843 // Info about the registers and stack slot.
7844 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7845 *DAG.getContext());
7846
7847 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7848 nullptr, CC_LoongArch);
7849 if (CallConv == CallingConv::GHC && !RVLocs.empty())
7850 report_fatal_error("GHC functions return void only");
7851 SDValue Glue;
7852 SmallVector<SDValue, 4> RetOps(1, Chain);
7853
7854 // Copy the result values into the output registers.
7855 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
7856 SDValue Val = OutVals[OutIdx];
7857 CCValAssign &VA = RVLocs[i];
7858 assert(VA.isRegLoc() && "Can only return in registers!");
7859
7860 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7861 // Handle returning f64 on LA32D with a soft float ABI.
7862 assert(VA.isRegLoc() && "Expected return via registers");
7863 assert(VA.needsCustom());
7865 DAG.getVTList(MVT::i32, MVT::i32), Val);
7866 SDValue Lo = SplitF64.getValue(0);
7867 SDValue Hi = SplitF64.getValue(1);
7868 Register RegLo = VA.getLocReg();
7869 Register RegHi = RVLocs[++i].getLocReg();
7870
7871 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
7872 Glue = Chain.getValue(1);
7873 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
7874 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
7875 Glue = Chain.getValue(1);
7876 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
7877 } else {
7878 // Handle a 'normal' return.
7879 Val = convertValVTToLocVT(DAG, Val, VA, DL);
7880 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
7881
7882 // Guarantee that all emitted copies are stuck together.
7883 Glue = Chain.getValue(1);
7884 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7885 }
7886 }
7887
7888 RetOps[0] = Chain; // Update chain.
7889
7890 // Add the glue node if we have it.
7891 if (Glue.getNode())
7892 RetOps.push_back(Glue);
7893
7894 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
7895}
7896
7898 EVT VT) const {
7899 if (!Subtarget.hasExtLSX())
7900 return false;
7901
7902 if (VT == MVT::f32) {
7903 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
7904 return (masked == 0x3e000000 || masked == 0x40000000);
7905 }
7906
7907 if (VT == MVT::f64) {
7908 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
7909 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
7910 }
7911
7912 return false;
7913}
7914
7915bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7916 bool ForCodeSize) const {
7917 // TODO: Maybe need more checks here after vector extension is supported.
7918 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7919 return false;
7920 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7921 return false;
7922 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
7923}
7924
7926 return true;
7927}
7928
7930 return true;
7931}
7932
7933bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
7934 const Instruction *I) const {
7935 if (!Subtarget.is64Bit())
7936 return isa<LoadInst>(I) || isa<StoreInst>(I);
7937
7938 if (isa<LoadInst>(I))
7939 return true;
7940
7941 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
7942 // require fences beacuse we can use amswap_db.[w/d].
7943 Type *Ty = I->getOperand(0)->getType();
7944 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
7945 unsigned Size = Ty->getIntegerBitWidth();
7946 return (Size == 8 || Size == 16);
7947 }
7948
7949 return false;
7950}
7951
7953 LLVMContext &Context,
7954 EVT VT) const {
7955 if (!VT.isVector())
7956 return getPointerTy(DL);
7958}
7959
7961 // TODO: Support vectors.
7962 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
7963}
7964
7966 const CallInst &I,
7967 MachineFunction &MF,
7968 unsigned Intrinsic) const {
7969 switch (Intrinsic) {
7970 default:
7971 return false;
7972 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
7973 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
7974 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
7975 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
7977 Info.memVT = MVT::i32;
7978 Info.ptrVal = I.getArgOperand(0);
7979 Info.offset = 0;
7980 Info.align = Align(4);
7983 return true;
7984 // TODO: Add more Intrinsics later.
7985 }
7986}
7987
7988// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
7989// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
7990// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
7991// regression, we need to implement it manually.
7994
7996 Op == AtomicRMWInst::And) &&
7997 "Unable to expand");
7998 unsigned MinWordSize = 4;
7999
8000 IRBuilder<> Builder(AI);
8001 LLVMContext &Ctx = Builder.getContext();
8002 const DataLayout &DL = AI->getDataLayout();
8003 Type *ValueType = AI->getType();
8004 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8005
8006 Value *Addr = AI->getPointerOperand();
8007 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8008 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8009
8010 Value *AlignedAddr = Builder.CreateIntrinsic(
8011 Intrinsic::ptrmask, {PtrTy, IntTy},
8012 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8013 "AlignedAddr");
8014
8015 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8016 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8017 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8018 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8019 Value *Mask = Builder.CreateShl(
8020 ConstantInt::get(WordType,
8021 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8022 ShiftAmt, "Mask");
8023 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8024 Value *ValOperand_Shifted =
8025 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8026 ShiftAmt, "ValOperand_Shifted");
8027 Value *NewOperand;
8028 if (Op == AtomicRMWInst::And)
8029 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8030 else
8031 NewOperand = ValOperand_Shifted;
8032
8033 AtomicRMWInst *NewAI =
8034 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8035 AI->getOrdering(), AI->getSyncScopeID());
8036
8037 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8038 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8039 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8040 AI->replaceAllUsesWith(FinalOldResult);
8041 AI->eraseFromParent();
8042}
8043
8046 // TODO: Add more AtomicRMWInst that needs to be extended.
8047
8048 // Since floating-point operation requires a non-trivial set of data
8049 // operations, use CmpXChg to expand.
8050 if (AI->isFloatingPointOperation() ||
8056
8057 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8060 AI->getOperation() == AtomicRMWInst::Sub)) {
8062 }
8063
8064 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8065 if (Subtarget.hasLAMCAS()) {
8066 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8070 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8072 }
8073
8074 if (Size == 8 || Size == 16)
8077}
8078
8079static Intrinsic::ID
8081 AtomicRMWInst::BinOp BinOp) {
8082 if (GRLen == 64) {
8083 switch (BinOp) {
8084 default:
8085 llvm_unreachable("Unexpected AtomicRMW BinOp");
8087 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8088 case AtomicRMWInst::Add:
8089 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8090 case AtomicRMWInst::Sub:
8091 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8093 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8095 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8097 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8098 case AtomicRMWInst::Max:
8099 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8100 case AtomicRMWInst::Min:
8101 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8102 // TODO: support other AtomicRMWInst.
8103 }
8104 }
8105
8106 if (GRLen == 32) {
8107 switch (BinOp) {
8108 default:
8109 llvm_unreachable("Unexpected AtomicRMW BinOp");
8111 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8112 case AtomicRMWInst::Add:
8113 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8114 case AtomicRMWInst::Sub:
8115 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8117 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8119 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8121 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8122 case AtomicRMWInst::Max:
8123 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8124 case AtomicRMWInst::Min:
8125 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8126 // TODO: support other AtomicRMWInst.
8127 }
8128 }
8129
8130 llvm_unreachable("Unexpected GRLen\n");
8131}
8132
8135 AtomicCmpXchgInst *CI) const {
8136
8137 if (Subtarget.hasLAMCAS())
8139
8141 if (Size == 8 || Size == 16)
8144}
8145
8147 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8148 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8149 unsigned GRLen = Subtarget.getGRLen();
8150 AtomicOrdering FailOrd = CI->getFailureOrdering();
8151 Value *FailureOrdering =
8152 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8153 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8154 if (GRLen == 64) {
8155 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8156 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8157 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8158 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8159 }
8160 Type *Tys[] = {AlignedAddr->getType()};
8161 Value *Result = Builder.CreateIntrinsic(
8162 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8163 if (GRLen == 64)
8164 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8165 return Result;
8166}
8167
8169 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8170 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8171 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8172 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8173 // mask, as this produces better code than the LL/SC loop emitted by
8174 // int_loongarch_masked_atomicrmw_xchg.
8175 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8176 isa<ConstantInt>(AI->getValOperand())) {
8177 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
8178 if (CVal->isZero())
8179 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8180 Builder.CreateNot(Mask, "Inv_Mask"),
8181 AI->getAlign(), Ord);
8182 if (CVal->isMinusOne())
8183 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8184 AI->getAlign(), Ord);
8185 }
8186
8187 unsigned GRLen = Subtarget.getGRLen();
8188 Value *Ordering =
8189 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8190 Type *Tys[] = {AlignedAddr->getType()};
8192 AI->getModule(),
8194
8195 if (GRLen == 64) {
8196 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8197 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8198 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8199 }
8200
8201 Value *Result;
8202
8203 // Must pass the shift amount needed to sign extend the loaded value prior
8204 // to performing a signed comparison for min/max. ShiftAmt is the number of
8205 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8206 // is the number of bits to left+right shift the value in order to
8207 // sign-extend.
8208 if (AI->getOperation() == AtomicRMWInst::Min ||
8210 const DataLayout &DL = AI->getDataLayout();
8211 unsigned ValWidth =
8212 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8213 Value *SextShamt =
8214 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8215 Result = Builder.CreateCall(LlwOpScwLoop,
8216 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8217 } else {
8218 Result =
8219 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8220 }
8221
8222 if (GRLen == 64)
8223 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8224 return Result;
8225}
8226
8228 const MachineFunction &MF, EVT VT) const {
8229 VT = VT.getScalarType();
8230
8231 if (!VT.isSimple())
8232 return false;
8233
8234 switch (VT.getSimpleVT().SimpleTy) {
8235 case MVT::f32:
8236 case MVT::f64:
8237 return true;
8238 default:
8239 break;
8240 }
8241
8242 return false;
8243}
8244
8246 const Constant *PersonalityFn) const {
8247 return LoongArch::R4;
8248}
8249
8251 const Constant *PersonalityFn) const {
8252 return LoongArch::R5;
8253}
8254
8255//===----------------------------------------------------------------------===//
8256// Target Optimization Hooks
8257//===----------------------------------------------------------------------===//
8258
8260 const LoongArchSubtarget &Subtarget) {
8261 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8262 // IEEE float has 23 digits and double has 52 digits.
8263 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8264 return RefinementSteps;
8265}
8266
8268 SelectionDAG &DAG, int Enabled,
8269 int &RefinementSteps,
8270 bool &UseOneConstNR,
8271 bool Reciprocal) const {
8272 if (Subtarget.hasFrecipe()) {
8273 SDLoc DL(Operand);
8274 EVT VT = Operand.getValueType();
8275
8276 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8277 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8278 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8279 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8280 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8281
8282 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8283 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8284
8285 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8286 if (Reciprocal)
8287 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8288
8289 return Estimate;
8290 }
8291 }
8292
8293 return SDValue();
8294}
8295
8297 SelectionDAG &DAG,
8298 int Enabled,
8299 int &RefinementSteps) const {
8300 if (Subtarget.hasFrecipe()) {
8301 SDLoc DL(Operand);
8302 EVT VT = Operand.getValueType();
8303
8304 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8305 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8306 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8307 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8308 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8309
8310 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8311 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8312
8313 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8314 }
8315 }
8316
8317 return SDValue();
8318}
8319
8320//===----------------------------------------------------------------------===//
8321// LoongArch Inline Assembly Support
8322//===----------------------------------------------------------------------===//
8323
8325LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8326 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8327 //
8328 // 'f': A floating-point register (if available).
8329 // 'k': A memory operand whose address is formed by a base register and
8330 // (optionally scaled) index register.
8331 // 'l': A signed 16-bit constant.
8332 // 'm': A memory operand whose address is formed by a base register and
8333 // offset that is suitable for use in instructions with the same
8334 // addressing mode as st.w and ld.w.
8335 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8336 // instruction)
8337 // 'I': A signed 12-bit constant (for arithmetic instructions).
8338 // 'J': Integer zero.
8339 // 'K': An unsigned 12-bit constant (for logic instructions).
8340 // "ZB": An address that is held in a general-purpose register. The offset is
8341 // zero.
8342 // "ZC": A memory operand whose address is formed by a base register and
8343 // offset that is suitable for use in instructions with the same
8344 // addressing mode as ll.w and sc.w.
8345 if (Constraint.size() == 1) {
8346 switch (Constraint[0]) {
8347 default:
8348 break;
8349 case 'f':
8350 case 'q':
8351 return C_RegisterClass;
8352 case 'l':
8353 case 'I':
8354 case 'J':
8355 case 'K':
8356 return C_Immediate;
8357 case 'k':
8358 return C_Memory;
8359 }
8360 }
8361
8362 if (Constraint == "ZC" || Constraint == "ZB")
8363 return C_Memory;
8364
8365 // 'm' is handled here.
8366 return TargetLowering::getConstraintType(Constraint);
8367}
8368
8369InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8370 StringRef ConstraintCode) const {
8371 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8376}
8377
8378std::pair<unsigned, const TargetRegisterClass *>
8379LoongArchTargetLowering::getRegForInlineAsmConstraint(
8380 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8381 // First, see if this is a constraint that directly corresponds to a LoongArch
8382 // register class.
8383 if (Constraint.size() == 1) {
8384 switch (Constraint[0]) {
8385 case 'r':
8386 // TODO: Support fixed vectors up to GRLen?
8387 if (VT.isVector())
8388 break;
8389 return std::make_pair(0U, &LoongArch::GPRRegClass);
8390 case 'q':
8391 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8392 case 'f':
8393 if (Subtarget.hasBasicF() && VT == MVT::f32)
8394 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8395 if (Subtarget.hasBasicD() && VT == MVT::f64)
8396 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8397 if (Subtarget.hasExtLSX() &&
8398 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8399 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8400 if (Subtarget.hasExtLASX() &&
8401 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8402 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8403 break;
8404 default:
8405 break;
8406 }
8407 }
8408
8409 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8410 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8411 // constraints while the official register name is prefixed with a '$'. So we
8412 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8413 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8414 // case insensitive, so no need to convert the constraint to upper case here.
8415 //
8416 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8417 // decode the usage of register name aliases into their official names. And
8418 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8419 // official register names.
8420 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8421 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8422 bool IsFP = Constraint[2] == 'f';
8423 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8424 std::pair<unsigned, const TargetRegisterClass *> R;
8426 TRI, join_items("", Temp.first, Temp.second), VT);
8427 // Match those names to the widest floating point register type available.
8428 if (IsFP) {
8429 unsigned RegNo = R.first;
8430 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8431 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8432 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8433 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8434 }
8435 }
8436 }
8437 return R;
8438 }
8439
8440 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8441}
8442
8443void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8444 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8445 SelectionDAG &DAG) const {
8446 // Currently only support length 1 constraints.
8447 if (Constraint.size() == 1) {
8448 switch (Constraint[0]) {
8449 case 'l':
8450 // Validate & create a 16-bit signed immediate operand.
8451 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8452 uint64_t CVal = C->getSExtValue();
8453 if (isInt<16>(CVal))
8454 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8455 Subtarget.getGRLenVT()));
8456 }
8457 return;
8458 case 'I':
8459 // Validate & create a 12-bit signed immediate operand.
8460 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8461 uint64_t CVal = C->getSExtValue();
8462 if (isInt<12>(CVal))
8463 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8464 Subtarget.getGRLenVT()));
8465 }
8466 return;
8467 case 'J':
8468 // Validate & create an integer zero operand.
8469 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8470 if (C->getZExtValue() == 0)
8471 Ops.push_back(
8472 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8473 return;
8474 case 'K':
8475 // Validate & create a 12-bit unsigned immediate operand.
8476 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8477 uint64_t CVal = C->getZExtValue();
8478 if (isUInt<12>(CVal))
8479 Ops.push_back(
8480 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8481 }
8482 return;
8483 default:
8484 break;
8485 }
8486 }
8487 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8488}
8489
8490#define GET_REGISTER_MATCHER
8491#include "LoongArchGenAsmMatcher.inc"
8492
8495 const MachineFunction &MF) const {
8496 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8497 std::string NewRegName = Name.second.str();
8498 Register Reg = MatchRegisterAltName(NewRegName);
8499 if (!Reg)
8500 Reg = MatchRegisterName(NewRegName);
8501 if (!Reg)
8502 return Reg;
8503 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8504 if (!ReservedRegs.test(Reg))
8505 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8506 StringRef(RegName) + "\"."));
8507 return Reg;
8508}
8509
8511 EVT VT, SDValue C) const {
8512 // TODO: Support vectors.
8513 if (!VT.isScalarInteger())
8514 return false;
8515
8516 // Omit the optimization if the data size exceeds GRLen.
8517 if (VT.getSizeInBits() > Subtarget.getGRLen())
8518 return false;
8519
8520 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8521 const APInt &Imm = ConstNode->getAPIntValue();
8522 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8523 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8524 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8525 return true;
8526 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8527 if (ConstNode->hasOneUse() &&
8528 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8529 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8530 return true;
8531 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8532 // in which the immediate has two set bits. Or Break (MUL x, imm)
8533 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8534 // equals to (1 << s0) - (1 << s1).
8535 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8536 unsigned Shifts = Imm.countr_zero();
8537 // Reject immediates which can be composed via a single LUI.
8538 if (Shifts >= 12)
8539 return false;
8540 // Reject multiplications can be optimized to
8541 // (SLLI (ALSL x, x, 1/2/3/4), s).
8542 APInt ImmPop = Imm.ashr(Shifts);
8543 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8544 return false;
8545 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8546 // since it needs one more instruction than other 3 cases.
8547 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8548 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8549 (ImmSmall - Imm).isPowerOf2())
8550 return true;
8551 }
8552 }
8553
8554 return false;
8555}
8556
8558 const AddrMode &AM,
8559 Type *Ty, unsigned AS,
8560 Instruction *I) const {
8561 // LoongArch has four basic addressing modes:
8562 // 1. reg
8563 // 2. reg + 12-bit signed offset
8564 // 3. reg + 14-bit signed offset left-shifted by 2
8565 // 4. reg1 + reg2
8566 // TODO: Add more checks after support vector extension.
8567
8568 // No global is ever allowed as a base.
8569 if (AM.BaseGV)
8570 return false;
8571
8572 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8573 // with `UAL` feature.
8574 if (!isInt<12>(AM.BaseOffs) &&
8575 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8576 return false;
8577
8578 switch (AM.Scale) {
8579 case 0:
8580 // "r+i" or just "i", depending on HasBaseReg.
8581 break;
8582 case 1:
8583 // "r+r+i" is not allowed.
8584 if (AM.HasBaseReg && AM.BaseOffs)
8585 return false;
8586 // Otherwise we have "r+r" or "r+i".
8587 break;
8588 case 2:
8589 // "2*r+r" or "2*r+i" is not allowed.
8590 if (AM.HasBaseReg || AM.BaseOffs)
8591 return false;
8592 // Allow "2*r" as "r+r".
8593 break;
8594 default:
8595 return false;
8596 }
8597
8598 return true;
8599}
8600
8602 return isInt<12>(Imm);
8603}
8604
8606 return isInt<12>(Imm);
8607}
8608
8610 // Zexts are free if they can be combined with a load.
8611 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8612 // poorly with type legalization of compares preferring sext.
8613 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8614 EVT MemVT = LD->getMemoryVT();
8615 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8616 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8617 LD->getExtensionType() == ISD::ZEXTLOAD))
8618 return true;
8619 }
8620
8621 return TargetLowering::isZExtFree(Val, VT2);
8622}
8623
8625 EVT DstVT) const {
8626 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8627}
8628
8630 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8631}
8632
8634 // TODO: Support vectors.
8635 if (Y.getValueType().isVector())
8636 return false;
8637
8638 return !isa<ConstantSDNode>(Y);
8639}
8640
8642 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8643 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8644}
8645
8647 Type *Ty, bool IsSigned) const {
8648 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8649 return true;
8650
8651 return IsSigned;
8652}
8653
8655 // Return false to suppress the unnecessary extensions if the LibCall
8656 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8657 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8658 Type.getSizeInBits() < Subtarget.getGRLen()))
8659 return false;
8660 return true;
8661}
8662
8663// memcpy, and other memory intrinsics, typically tries to use wider load/store
8664// if the source/dest is aligned and the copy size is large enough. We therefore
8665// want to align such objects passed to memory intrinsics.
8667 unsigned &MinSize,
8668 Align &PrefAlign) const {
8669 if (!isa<MemIntrinsic>(CI))
8670 return false;
8671
8672 if (Subtarget.is64Bit()) {
8673 MinSize = 8;
8674 PrefAlign = Align(8);
8675 } else {
8676 MinSize = 4;
8677 PrefAlign = Align(4);
8678 }
8679
8680 return true;
8681}
8682
8685 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
8686 VT.getVectorElementType() != MVT::i1)
8687 return TypeWidenVector;
8688
8690}
8691
8692bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8693 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8694 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8695 bool IsABIRegCopy = CC.has_value();
8696 EVT ValueVT = Val.getValueType();
8697
8698 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8699 PartVT == MVT::f32) {
8700 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8701 // nan, and cast to f32.
8702 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8703 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8704 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8705 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8706 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8707 Parts[0] = Val;
8708 return true;
8709 }
8710
8711 return false;
8712}
8713
8714SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8715 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8716 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8717 bool IsABIRegCopy = CC.has_value();
8718
8719 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8720 PartVT == MVT::f32) {
8721 SDValue Val = Parts[0];
8722
8723 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8724 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
8725 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
8726 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
8727 return Val;
8728 }
8729
8730 return SDValue();
8731}
8732
8733MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8734 CallingConv::ID CC,
8735 EVT VT) const {
8736 // Use f32 to pass f16.
8737 if (VT == MVT::f16 && Subtarget.hasBasicF())
8738 return MVT::f32;
8739
8740 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8741}
8742
8743unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8744 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8745 // Use f32 to pass f16.
8746 if (VT == MVT::f16 && Subtarget.hasBasicF())
8747 return 1;
8748
8749 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8750}
8751
8753 SDValue Op, const APInt &OriginalDemandedBits,
8754 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8755 unsigned Depth) const {
8756 EVT VT = Op.getValueType();
8757 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8758 unsigned Opc = Op.getOpcode();
8759 switch (Opc) {
8760 default:
8761 break;
8764 SDValue Src = Op.getOperand(0);
8765 MVT SrcVT = Src.getSimpleValueType();
8766 unsigned SrcBits = SrcVT.getScalarSizeInBits();
8767 unsigned NumElts = SrcVT.getVectorNumElements();
8768
8769 // If we don't need the sign bits at all just return zero.
8770 if (OriginalDemandedBits.countr_zero() >= NumElts)
8771 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
8772
8773 // Only demand the vector elements of the sign bits we need.
8774 APInt KnownUndef, KnownZero;
8775 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
8776 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
8777 TLO, Depth + 1))
8778 return true;
8779
8780 Known.Zero = KnownZero.zext(BitWidth);
8781 Known.Zero.setHighBits(BitWidth - NumElts);
8782
8783 // [X]VMSKLTZ only uses the MSB from each vector element.
8784 KnownBits KnownSrc;
8785 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
8786 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
8787 Depth + 1))
8788 return true;
8789
8790 if (KnownSrc.One[SrcBits - 1])
8791 Known.One.setLowBits(NumElts);
8792 else if (KnownSrc.Zero[SrcBits - 1])
8793 Known.Zero.setLowBits(NumElts);
8794
8795 // Attempt to avoid multi-use ops if we don't need anything from it.
8797 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
8798 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
8799 return false;
8800 }
8801 }
8802
8804 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
8805}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:599
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:843
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
Value * getPointerOperand()
Definition: Instructions.h:886
bool isFloatingPointOperation() const
Definition: Instructions.h:898
BinOp getOperation() const
Definition: Instructions.h:819
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:877
Value * getValOperand()
Definition: Instructions.h:890
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:863
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition: DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846
A debug info location.
Definition: DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool isDSOLocal() const
Definition: GlobalValue.h:307
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:533
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
LLVMContext & getContext() const
Definition: IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
Class to represent pointers.
Definition: DerivedTypes.h:700
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:740
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:504
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:719
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:808
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:777
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:705
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
LLVM_ABI unsigned getIntegerBitWidth() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1338
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1343
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:505
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ WRITE_REGISTER
Definition: ISDOpcodes.h:135
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1331
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1485
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1075
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1321
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
Definition: ISDOpcodes.h:1059
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ ConstantPool
Definition: ISDOpcodes.h:92
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:145
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:994
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ AssertZext
Definition: ISDOpcodes.h:63
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1730
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:55
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:270
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
@ None
Definition: CodeGenData.h:107
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:198
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)